diff options
Diffstat (limited to 'fs/cachefiles')
| -rw-r--r-- | fs/cachefiles/Kconfig | 35 | ||||
| -rw-r--r-- | fs/cachefiles/Makefile | 9 | ||||
| -rw-r--r-- | fs/cachefiles/bind.c | 281 | ||||
| -rw-r--r-- | fs/cachefiles/cache.c | 428 | ||||
| -rw-r--r-- | fs/cachefiles/daemon.c | 393 | ||||
| -rw-r--r-- | fs/cachefiles/error_inject.c | 36 | ||||
| -rw-r--r-- | fs/cachefiles/interface.c | 732 | ||||
| -rw-r--r-- | fs/cachefiles/internal.h | 448 | ||||
| -rw-r--r-- | fs/cachefiles/io.c | 762 | ||||
| -rw-r--r-- | fs/cachefiles/key.c | 208 | ||||
| -rw-r--r-- | fs/cachefiles/main.c | 45 | ||||
| -rw-r--r-- | fs/cachefiles/namei.c | 1264 | ||||
| -rw-r--r-- | fs/cachefiles/ondemand.c | 762 | ||||
| -rw-r--r-- | fs/cachefiles/proc.c | 134 | ||||
| -rw-r--r-- | fs/cachefiles/rdwr.c | 990 | ||||
| -rw-r--r-- | fs/cachefiles/security.c | 30 | ||||
| -rw-r--r-- | fs/cachefiles/volume.c | 141 | ||||
| -rw-r--r-- | fs/cachefiles/xattr.c | 436 |
18 files changed, 3949 insertions, 3185 deletions
diff --git a/fs/cachefiles/Kconfig b/fs/cachefiles/Kconfig index 80e9c6167f0b..c5a070550ee3 100644 --- a/fs/cachefiles/Kconfig +++ b/fs/cachefiles/Kconfig @@ -1,13 +1,14 @@ +# SPDX-License-Identifier: GPL-2.0-only config CACHEFILES tristate "Filesystem caching on files" - depends on FSCACHE && BLOCK + depends on NETFS_SUPPORT && FSCACHE && BLOCK help This permits use of a mounted filesystem as a cache for other filesystems - primarily networking filesystems - thus allowing fast local disk to enhance the speed of slower devices. - See Documentation/filesystems/caching/cachefiles.txt for more + See Documentation/filesystems/caching/cachefiles.rst for more information. config CACHEFILES_DEBUG @@ -19,21 +20,21 @@ config CACHEFILES_DEBUG enabled by setting bits in /sys/modules/cachefiles/parameter/debug or by including a debugging specifier in /etc/cachefilesd.conf. -config CACHEFILES_HISTOGRAM - bool "Gather latency information on CacheFiles" - depends on CACHEFILES && PROC_FS +config CACHEFILES_ERROR_INJECTION + bool "Provide error injection for cachefiles" + depends on CACHEFILES && SYSCTL help + This permits error injection to be enabled in cachefiles whilst a + cache is in service. - This option causes latency information to be gathered on CacheFiles - operation and exported through file: - - /proc/fs/cachefiles/histogram - - The generation of this histogram adds a certain amount of overhead to - execution as there are a number of points at which data is gathered, - and on a multi-CPU system these may be on cachelines that keep - bouncing between CPUs. On the other hand, the histogram may be - useful for debugging purposes. Saying 'N' here is recommended. +config CACHEFILES_ONDEMAND + bool "Support for on-demand read" + depends on CACHEFILES + default n + help + This permits userspace to enable the cachefiles on-demand read mode. + In this mode, when a cache miss occurs, responsibility for fetching + the data lies with the cachefiles backend instead of with the netfs + and is delegated to userspace. - See Documentation/filesystems/caching/cachefiles.txt for more - information. + If unsure, say N. diff --git a/fs/cachefiles/Makefile b/fs/cachefiles/Makefile index 32cbab0ffce3..c37a7a9af10b 100644 --- a/fs/cachefiles/Makefile +++ b/fs/cachefiles/Makefile @@ -1,18 +1,21 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for caching in a mounted filesystem # cachefiles-y := \ - bind.o \ + cache.o \ daemon.o \ interface.o \ + io.o \ key.o \ main.o \ namei.o \ - rdwr.o \ security.o \ + volume.o \ xattr.o -cachefiles-$(CONFIG_CACHEFILES_HISTOGRAM) += proc.o +cachefiles-$(CONFIG_CACHEFILES_ERROR_INJECTION) += error_inject.o +cachefiles-$(CONFIG_CACHEFILES_ONDEMAND) += ondemand.o obj-$(CONFIG_CACHEFILES) := cachefiles.o diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c deleted file mode 100644 index 622f4696e484..000000000000 --- a/fs/cachefiles/bind.c +++ /dev/null @@ -1,281 +0,0 @@ -/* Bind and unbind a cache from the filesystem backing it - * - * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. - */ - -#include <linux/module.h> -#include <linux/init.h> -#include <linux/sched.h> -#include <linux/completion.h> -#include <linux/slab.h> -#include <linux/fs.h> -#include <linux/file.h> -#include <linux/namei.h> -#include <linux/mount.h> -#include <linux/statfs.h> -#include <linux/ctype.h> -#include "internal.h" - -static int cachefiles_daemon_add_cache(struct cachefiles_cache *caches); - -/* - * bind a directory as a cache - */ -int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args) -{ - _enter("{%u,%u,%u,%u,%u,%u},%s", - cache->frun_percent, - cache->fcull_percent, - cache->fstop_percent, - cache->brun_percent, - cache->bcull_percent, - cache->bstop_percent, - args); - - /* start by checking things over */ - ASSERT(cache->fstop_percent >= 0 && - cache->fstop_percent < cache->fcull_percent && - cache->fcull_percent < cache->frun_percent && - cache->frun_percent < 100); - - ASSERT(cache->bstop_percent >= 0 && - cache->bstop_percent < cache->bcull_percent && - cache->bcull_percent < cache->brun_percent && - cache->brun_percent < 100); - - if (*args) { - kerror("'bind' command doesn't take an argument"); - return -EINVAL; - } - - if (!cache->rootdirname) { - kerror("No cache directory specified"); - return -EINVAL; - } - - /* don't permit already bound caches to be re-bound */ - if (test_bit(CACHEFILES_READY, &cache->flags)) { - kerror("Cache already bound"); - return -EBUSY; - } - - /* make sure we have copies of the tag and dirname strings */ - if (!cache->tag) { - /* the tag string is released by the fops->release() - * function, so we don't release it on error here */ - cache->tag = kstrdup("CacheFiles", GFP_KERNEL); - if (!cache->tag) - return -ENOMEM; - } - - /* add the cache */ - return cachefiles_daemon_add_cache(cache); -} - -/* - * add a cache - */ -static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache) -{ - struct cachefiles_object *fsdef; - struct path path; - struct kstatfs stats; - struct dentry *graveyard, *cachedir, *root; - const struct cred *saved_cred; - int ret; - - _enter(""); - - /* we want to work under the module's security ID */ - ret = cachefiles_get_security_ID(cache); - if (ret < 0) - return ret; - - cachefiles_begin_secure(cache, &saved_cred); - - /* allocate the root index object */ - ret = -ENOMEM; - - fsdef = kmem_cache_alloc(cachefiles_object_jar, GFP_KERNEL); - if (!fsdef) - goto error_root_object; - - ASSERTCMP(fsdef->backer, ==, NULL); - - atomic_set(&fsdef->usage, 1); - fsdef->type = FSCACHE_COOKIE_TYPE_INDEX; - - _debug("- fsdef %p", fsdef); - - /* look up the directory at the root of the cache */ - ret = kern_path(cache->rootdirname, LOOKUP_DIRECTORY, &path); - if (ret < 0) - goto error_open_root; - - cache->mnt = path.mnt; - root = path.dentry; - - /* check parameters */ - ret = -EOPNOTSUPP; - if (!root->d_inode || - !root->d_inode->i_op || - !root->d_inode->i_op->lookup || - !root->d_inode->i_op->mkdir || - !root->d_inode->i_op->setxattr || - !root->d_inode->i_op->getxattr || - !root->d_sb->s_op->statfs || - !root->d_sb->s_op->sync_fs) - goto error_unsupported; - - ret = -EROFS; - if (root->d_sb->s_flags & MS_RDONLY) - goto error_unsupported; - - /* determine the security of the on-disk cache as this governs - * security ID of files we create */ - ret = cachefiles_determine_cache_security(cache, root, &saved_cred); - if (ret < 0) - goto error_unsupported; - - /* get the cache size and blocksize */ - ret = vfs_statfs(&path, &stats); - if (ret < 0) - goto error_unsupported; - - ret = -ERANGE; - if (stats.f_bsize <= 0) - goto error_unsupported; - - ret = -EOPNOTSUPP; - if (stats.f_bsize > PAGE_SIZE) - goto error_unsupported; - - cache->bsize = stats.f_bsize; - cache->bshift = 0; - if (stats.f_bsize < PAGE_SIZE) - cache->bshift = PAGE_SHIFT - ilog2(stats.f_bsize); - - _debug("blksize %u (shift %u)", - cache->bsize, cache->bshift); - - _debug("size %llu, avail %llu", - (unsigned long long) stats.f_blocks, - (unsigned long long) stats.f_bavail); - - /* set up caching limits */ - do_div(stats.f_files, 100); - cache->fstop = stats.f_files * cache->fstop_percent; - cache->fcull = stats.f_files * cache->fcull_percent; - cache->frun = stats.f_files * cache->frun_percent; - - _debug("limits {%llu,%llu,%llu} files", - (unsigned long long) cache->frun, - (unsigned long long) cache->fcull, - (unsigned long long) cache->fstop); - - stats.f_blocks >>= cache->bshift; - do_div(stats.f_blocks, 100); - cache->bstop = stats.f_blocks * cache->bstop_percent; - cache->bcull = stats.f_blocks * cache->bcull_percent; - cache->brun = stats.f_blocks * cache->brun_percent; - - _debug("limits {%llu,%llu,%llu} blocks", - (unsigned long long) cache->brun, - (unsigned long long) cache->bcull, - (unsigned long long) cache->bstop); - - /* get the cache directory and check its type */ - cachedir = cachefiles_get_directory(cache, root, "cache"); - if (IS_ERR(cachedir)) { - ret = PTR_ERR(cachedir); - goto error_unsupported; - } - - fsdef->dentry = cachedir; - fsdef->fscache.cookie = NULL; - - ret = cachefiles_check_object_type(fsdef); - if (ret < 0) - goto error_unsupported; - - /* get the graveyard directory */ - graveyard = cachefiles_get_directory(cache, root, "graveyard"); - if (IS_ERR(graveyard)) { - ret = PTR_ERR(graveyard); - goto error_unsupported; - } - - cache->graveyard = graveyard; - - /* publish the cache */ - fscache_init_cache(&cache->cache, - &cachefiles_cache_ops, - "%s", - fsdef->dentry->d_sb->s_id); - - fscache_object_init(&fsdef->fscache, NULL, &cache->cache); - - ret = fscache_add_cache(&cache->cache, &fsdef->fscache, cache->tag); - if (ret < 0) - goto error_add_cache; - - /* done */ - set_bit(CACHEFILES_READY, &cache->flags); - dput(root); - - printk(KERN_INFO "CacheFiles:" - " File cache on %s registered\n", - cache->cache.identifier); - - /* check how much space the cache has */ - cachefiles_has_space(cache, 0, 0); - cachefiles_end_secure(cache, saved_cred); - return 0; - -error_add_cache: - dput(cache->graveyard); - cache->graveyard = NULL; -error_unsupported: - mntput(cache->mnt); - cache->mnt = NULL; - dput(fsdef->dentry); - fsdef->dentry = NULL; - dput(root); -error_open_root: - kmem_cache_free(cachefiles_object_jar, fsdef); -error_root_object: - cachefiles_end_secure(cache, saved_cred); - kerror("Failed to register: %d", ret); - return ret; -} - -/* - * unbind a cache on fd release - */ -void cachefiles_daemon_unbind(struct cachefiles_cache *cache) -{ - _enter(""); - - if (test_bit(CACHEFILES_READY, &cache->flags)) { - printk(KERN_INFO "CacheFiles:" - " File cache on %s unregistering\n", - cache->cache.identifier); - - fscache_withdraw_cache(&cache->cache); - } - - dput(cache->graveyard); - mntput(cache->mnt); - - kfree(cache->rootdirname); - kfree(cache->secctx); - kfree(cache->tag); - - _leave(""); -} diff --git a/fs/cachefiles/cache.c b/fs/cachefiles/cache.c new file mode 100644 index 000000000000..9fb06dc16520 --- /dev/null +++ b/fs/cachefiles/cache.c @@ -0,0 +1,428 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Manage high-level VFS aspects of a cache. + * + * Copyright (C) 2007, 2021 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/slab.h> +#include <linux/statfs.h> +#include <linux/namei.h> +#include <trace/events/fscache.h> +#include "internal.h" + +/* + * Bring a cache online. + */ +int cachefiles_add_cache(struct cachefiles_cache *cache) +{ + struct fscache_cache *cache_cookie; + struct path path; + struct kstatfs stats; + struct dentry *graveyard, *cachedir, *root; + const struct cred *saved_cred; + int ret; + + _enter(""); + + cache_cookie = fscache_acquire_cache(cache->tag); + if (IS_ERR(cache_cookie)) + return PTR_ERR(cache_cookie); + + /* we want to work under the module's security ID */ + ret = cachefiles_get_security_ID(cache); + if (ret < 0) + goto error_getsec; + + cachefiles_begin_secure(cache, &saved_cred); + + /* look up the directory at the root of the cache */ + ret = kern_path(cache->rootdirname, LOOKUP_DIRECTORY, &path); + if (ret < 0) + goto error_open_root; + + cache->mnt = path.mnt; + root = path.dentry; + + ret = -EINVAL; + if (is_idmapped_mnt(path.mnt)) { + pr_warn("File cache on idmapped mounts not supported"); + goto error_unsupported; + } + + /* Check features of the backing filesystem: + * - Directories must support looking up and directory creation + * - We create tmpfiles to handle invalidation + * - We use xattrs to store metadata + * - We need to be able to query the amount of space available + * - We want to be able to sync the filesystem when stopping the cache + * - We use DIO to/from pages, so the blocksize mustn't be too big. + */ + ret = -EOPNOTSUPP; + if (d_is_negative(root) || + !d_backing_inode(root)->i_op->lookup || + !d_backing_inode(root)->i_op->mkdir || + !d_backing_inode(root)->i_op->tmpfile || + !(d_backing_inode(root)->i_opflags & IOP_XATTR) || + !root->d_sb->s_op->statfs || + !root->d_sb->s_op->sync_fs || + root->d_sb->s_blocksize > PAGE_SIZE) + goto error_unsupported; + + ret = -EROFS; + if (sb_rdonly(root->d_sb)) + goto error_unsupported; + + /* determine the security of the on-disk cache as this governs + * security ID of files we create */ + ret = cachefiles_determine_cache_security(cache, root, &saved_cred); + if (ret < 0) + goto error_unsupported; + + /* get the cache size and blocksize */ + ret = vfs_statfs(&path, &stats); + if (ret < 0) + goto error_unsupported; + + ret = -ERANGE; + if (stats.f_bsize <= 0) + goto error_unsupported; + + ret = -EOPNOTSUPP; + if (stats.f_bsize > PAGE_SIZE) + goto error_unsupported; + + cache->bsize = stats.f_bsize; + cache->bshift = ilog2(stats.f_bsize); + + _debug("blksize %u (shift %u)", + cache->bsize, cache->bshift); + + _debug("size %llu, avail %llu", + (unsigned long long) stats.f_blocks, + (unsigned long long) stats.f_bavail); + + /* set up caching limits */ + do_div(stats.f_files, 100); + cache->fstop = stats.f_files * cache->fstop_percent; + cache->fcull = stats.f_files * cache->fcull_percent; + cache->frun = stats.f_files * cache->frun_percent; + + _debug("limits {%llu,%llu,%llu} files", + (unsigned long long) cache->frun, + (unsigned long long) cache->fcull, + (unsigned long long) cache->fstop); + + do_div(stats.f_blocks, 100); + cache->bstop = stats.f_blocks * cache->bstop_percent; + cache->bcull = stats.f_blocks * cache->bcull_percent; + cache->brun = stats.f_blocks * cache->brun_percent; + + _debug("limits {%llu,%llu,%llu} blocks", + (unsigned long long) cache->brun, + (unsigned long long) cache->bcull, + (unsigned long long) cache->bstop); + + /* get the cache directory and check its type */ + cachedir = cachefiles_get_directory(cache, root, "cache", NULL); + if (IS_ERR(cachedir)) { + ret = PTR_ERR(cachedir); + goto error_unsupported; + } + + cache->store = cachedir; + + /* get the graveyard directory */ + graveyard = cachefiles_get_directory(cache, root, "graveyard", NULL); + if (IS_ERR(graveyard)) { + ret = PTR_ERR(graveyard); + goto error_unsupported; + } + + cache->graveyard = graveyard; + cache->cache = cache_cookie; + + ret = fscache_add_cache(cache_cookie, &cachefiles_cache_ops, cache); + if (ret < 0) + goto error_add_cache; + + /* done */ + set_bit(CACHEFILES_READY, &cache->flags); + dput(root); + + pr_info("File cache on %s registered\n", cache_cookie->name); + + /* check how much space the cache has */ + cachefiles_has_space(cache, 0, 0, cachefiles_has_space_check); + cachefiles_end_secure(cache, saved_cred); + _leave(" = 0 [%px]", cache->cache); + return 0; + +error_add_cache: + cachefiles_put_directory(cache->graveyard); + cache->graveyard = NULL; +error_unsupported: + cachefiles_put_directory(cache->store); + cache->store = NULL; + mntput(cache->mnt); + cache->mnt = NULL; + dput(root); +error_open_root: + cachefiles_end_secure(cache, saved_cred); + put_cred(cache->cache_cred); + cache->cache_cred = NULL; +error_getsec: + fscache_relinquish_cache(cache_cookie); + cache->cache = NULL; + pr_err("Failed to register: %d\n", ret); + return ret; +} + +/* + * See if we have space for a number of pages and/or a number of files in the + * cache + */ +int cachefiles_has_space(struct cachefiles_cache *cache, + unsigned fnr, unsigned bnr, + enum cachefiles_has_space_for reason) +{ + struct kstatfs stats; + u64 b_avail, b_writing; + int ret; + + struct path path = { + .mnt = cache->mnt, + .dentry = cache->mnt->mnt_root, + }; + + //_enter("{%llu,%llu,%llu,%llu,%llu,%llu},%u,%u", + // (unsigned long long) cache->frun, + // (unsigned long long) cache->fcull, + // (unsigned long long) cache->fstop, + // (unsigned long long) cache->brun, + // (unsigned long long) cache->bcull, + // (unsigned long long) cache->bstop, + // fnr, bnr); + + /* find out how many pages of blockdev are available */ + memset(&stats, 0, sizeof(stats)); + + ret = vfs_statfs(&path, &stats); + if (ret < 0) { + trace_cachefiles_vfs_error(NULL, d_inode(path.dentry), ret, + cachefiles_trace_statfs_error); + if (ret == -EIO) + cachefiles_io_error(cache, "statfs failed"); + _leave(" = %d", ret); + return ret; + } + + b_avail = stats.f_bavail; + b_writing = atomic_long_read(&cache->b_writing); + if (b_avail > b_writing) + b_avail -= b_writing; + else + b_avail = 0; + + //_debug("avail %llu,%llu", + // (unsigned long long)stats.f_ffree, + // (unsigned long long)b_avail); + + /* see if there is sufficient space */ + if (stats.f_ffree > fnr) + stats.f_ffree -= fnr; + else + stats.f_ffree = 0; + + if (b_avail > bnr) + b_avail -= bnr; + else + b_avail = 0; + + ret = -ENOBUFS; + if (stats.f_ffree < cache->fstop || + b_avail < cache->bstop) + goto stop_and_begin_cull; + + ret = 0; + if (stats.f_ffree < cache->fcull || + b_avail < cache->bcull) + goto begin_cull; + + if (test_bit(CACHEFILES_CULLING, &cache->flags) && + stats.f_ffree >= cache->frun && + b_avail >= cache->brun && + test_and_clear_bit(CACHEFILES_CULLING, &cache->flags) + ) { + _debug("cease culling"); + cachefiles_state_changed(cache); + } + + //_leave(" = 0"); + return 0; + +stop_and_begin_cull: + switch (reason) { + case cachefiles_has_space_for_write: + fscache_count_no_write_space(); + break; + case cachefiles_has_space_for_create: + fscache_count_no_create_space(); + break; + default: + break; + } +begin_cull: + if (!test_and_set_bit(CACHEFILES_CULLING, &cache->flags)) { + _debug("### CULL CACHE ###"); + cachefiles_state_changed(cache); + } + + _leave(" = %d", ret); + return ret; +} + +/* + * Mark all the objects as being out of service and queue them all for cleanup. + */ +static void cachefiles_withdraw_objects(struct cachefiles_cache *cache) +{ + struct cachefiles_object *object; + unsigned int count = 0; + + _enter(""); + + spin_lock(&cache->object_list_lock); + + while (!list_empty(&cache->object_list)) { + object = list_first_entry(&cache->object_list, + struct cachefiles_object, cache_link); + cachefiles_see_object(object, cachefiles_obj_see_withdrawal); + list_del_init(&object->cache_link); + fscache_withdraw_cookie(object->cookie); + count++; + if ((count & 63) == 0) { + spin_unlock(&cache->object_list_lock); + cond_resched(); + spin_lock(&cache->object_list_lock); + } + } + + spin_unlock(&cache->object_list_lock); + _leave(" [%u objs]", count); +} + +/* + * Withdraw fscache volumes. + */ +static void cachefiles_withdraw_fscache_volumes(struct cachefiles_cache *cache) +{ + struct list_head *cur; + struct cachefiles_volume *volume; + struct fscache_volume *vcookie; + + _enter(""); +retry: + spin_lock(&cache->object_list_lock); + list_for_each(cur, &cache->volumes) { + volume = list_entry(cur, struct cachefiles_volume, cache_link); + + if (atomic_read(&volume->vcookie->n_accesses) == 0) + continue; + + vcookie = fscache_try_get_volume(volume->vcookie, + fscache_volume_get_withdraw); + if (vcookie) { + spin_unlock(&cache->object_list_lock); + fscache_withdraw_volume(vcookie); + fscache_put_volume(vcookie, fscache_volume_put_withdraw); + goto retry; + } + } + spin_unlock(&cache->object_list_lock); + + _leave(""); +} + +/* + * Withdraw cachefiles volumes. + */ +static void cachefiles_withdraw_volumes(struct cachefiles_cache *cache) +{ + _enter(""); + + for (;;) { + struct fscache_volume *vcookie = NULL; + struct cachefiles_volume *volume = NULL; + + spin_lock(&cache->object_list_lock); + if (!list_empty(&cache->volumes)) { + volume = list_first_entry(&cache->volumes, + struct cachefiles_volume, cache_link); + vcookie = fscache_try_get_volume(volume->vcookie, + fscache_volume_get_withdraw); + if (!vcookie) { + spin_unlock(&cache->object_list_lock); + cpu_relax(); + continue; + } + list_del_init(&volume->cache_link); + } + spin_unlock(&cache->object_list_lock); + if (!volume) + break; + + cachefiles_withdraw_volume(volume); + fscache_put_volume(vcookie, fscache_volume_put_withdraw); + } + + _leave(""); +} + +/* + * Sync a cache to backing disk. + */ +static void cachefiles_sync_cache(struct cachefiles_cache *cache) +{ + const struct cred *saved_cred; + int ret; + + _enter("%s", cache->cache->name); + + /* make sure all pages pinned by operations on behalf of the netfs are + * written to disc */ + cachefiles_begin_secure(cache, &saved_cred); + down_read(&cache->mnt->mnt_sb->s_umount); + ret = sync_filesystem(cache->mnt->mnt_sb); + up_read(&cache->mnt->mnt_sb->s_umount); + cachefiles_end_secure(cache, saved_cred); + + if (ret == -EIO) + cachefiles_io_error(cache, + "Attempt to sync backing fs superblock returned error %d", + ret); +} + +/* + * Withdraw cache objects. + */ +void cachefiles_withdraw_cache(struct cachefiles_cache *cache) +{ + struct fscache_cache *fscache = cache->cache; + + pr_info("File cache on %s unregistering\n", fscache->name); + + fscache_withdraw_cache(fscache); + cachefiles_withdraw_fscache_volumes(cache); + + /* we now have to destroy all the active objects pertaining to this + * cache - which we do by passing them off to thread pool to be + * disposed of */ + cachefiles_withdraw_objects(cache); + fscache_wait_for_objects(fscache); + + cachefiles_withdraw_volumes(cache); + cachefiles_sync_cache(cache); + cache->cache = NULL; + fscache_relinquish_cache(fscache); +} diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c index 0a1467b15516..1806bff8e59b 100644 --- a/fs/cachefiles/daemon.c +++ b/fs/cachefiles/daemon.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* Daemon interface * - * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2007, 2021 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. */ #include <linux/module.h> @@ -19,6 +15,7 @@ #include <linux/namei.h> #include <linux/poll.h> #include <linux/mount.h> +#include <linux/security.h> #include <linux/statfs.h> #include <linux/ctype.h> #include <linux/string.h> @@ -31,7 +28,7 @@ static ssize_t cachefiles_daemon_read(struct file *, char __user *, size_t, loff_t *); static ssize_t cachefiles_daemon_write(struct file *, const char __user *, size_t, loff_t *); -static unsigned int cachefiles_daemon_poll(struct file *, +static __poll_t cachefiles_daemon_poll(struct file *, struct poll_table_struct *); static int cachefiles_daemon_frun(struct cachefiles_cache *, char *); static int cachefiles_daemon_fcull(struct cachefiles_cache *, char *); @@ -45,6 +42,8 @@ static int cachefiles_daemon_dir(struct cachefiles_cache *, char *); static int cachefiles_daemon_inuse(struct cachefiles_cache *, char *); static int cachefiles_daemon_secctx(struct cachefiles_cache *, char *); static int cachefiles_daemon_tag(struct cachefiles_cache *, char *); +static int cachefiles_daemon_bind(struct cachefiles_cache *, char *); +static void cachefiles_daemon_unbind(struct cachefiles_cache *); static unsigned long cachefiles_open; @@ -77,12 +76,16 @@ static const struct cachefiles_daemon_cmd cachefiles_daemon_cmds[] = { { "inuse", cachefiles_daemon_inuse }, { "secctx", cachefiles_daemon_secctx }, { "tag", cachefiles_daemon_tag }, +#ifdef CONFIG_CACHEFILES_ONDEMAND + { "copen", cachefiles_ondemand_copen }, + { "restore", cachefiles_ondemand_restore }, +#endif { "", NULL } }; /* - * do various checks + * Prepare a cache for caching. */ static int cachefiles_daemon_open(struct inode *inode, struct file *file) { @@ -106,9 +109,13 @@ static int cachefiles_daemon_open(struct inode *inode, struct file *file) } mutex_init(&cache->daemon_mutex); - cache->active_nodes = RB_ROOT; - rwlock_init(&cache->active_lock); init_waitqueue_head(&cache->daemon_pollwq); + INIT_LIST_HEAD(&cache->volumes); + INIT_LIST_HEAD(&cache->object_list); + spin_lock_init(&cache->object_list_lock); + refcount_set(&cache->unbind_pincount, 1); + xa_init_flags(&cache->reqs, XA_FLAGS_ALLOC); + xa_init_flags(&cache->ondemand_ids, XA_FLAGS_ALLOC1); /* set default caching limits * - limit at 1% free space and/or free files @@ -127,8 +134,56 @@ static int cachefiles_daemon_open(struct inode *inode, struct file *file) return 0; } +void cachefiles_flush_reqs(struct cachefiles_cache *cache) +{ + struct xarray *xa = &cache->reqs; + struct cachefiles_req *req; + unsigned long index; + + /* + * Make sure the following two operations won't be reordered. + * 1) set CACHEFILES_DEAD bit + * 2) flush requests in the xarray + * Otherwise the request may be enqueued after xarray has been + * flushed, leaving the orphan request never being completed. + * + * CPU 1 CPU 2 + * ===== ===== + * flush requests in the xarray + * test CACHEFILES_DEAD bit + * enqueue the request + * set CACHEFILES_DEAD bit + */ + smp_mb(); + + xa_lock(xa); + xa_for_each(xa, index, req) { + req->error = -EIO; + complete(&req->done); + __xa_erase(xa, index); + } + xa_unlock(xa); + + xa_destroy(&cache->reqs); + xa_destroy(&cache->ondemand_ids); +} + +void cachefiles_put_unbind_pincount(struct cachefiles_cache *cache) +{ + if (refcount_dec_and_test(&cache->unbind_pincount)) { + cachefiles_daemon_unbind(cache); + cachefiles_open = 0; + kfree(cache); + } +} + +void cachefiles_get_unbind_pincount(struct cachefiles_cache *cache) +{ + refcount_inc(&cache->unbind_pincount); +} + /* - * release a cache + * Release a cache. */ static int cachefiles_daemon_release(struct inode *inode, struct file *file) { @@ -140,40 +195,33 @@ static int cachefiles_daemon_release(struct inode *inode, struct file *file) set_bit(CACHEFILES_DEAD, &cache->flags); - cachefiles_daemon_unbind(cache); - - ASSERT(!cache->active_nodes.rb_node); + if (cachefiles_in_ondemand_mode(cache)) + cachefiles_flush_reqs(cache); /* clean up the control file interface */ cache->cachefilesd = NULL; file->private_data = NULL; - cachefiles_open = 0; - kfree(cache); + cachefiles_put_unbind_pincount(cache); _leave(""); return 0; } -/* - * read the cache state - */ -static ssize_t cachefiles_daemon_read(struct file *file, char __user *_buffer, - size_t buflen, loff_t *pos) +static ssize_t cachefiles_do_daemon_read(struct cachefiles_cache *cache, + char __user *_buffer, size_t buflen) { - struct cachefiles_cache *cache = file->private_data; + unsigned long long b_released; + unsigned f_released; char buffer[256]; int n; - //_enter(",,%zu,", buflen); - - if (!test_bit(CACHEFILES_READY, &cache->flags)) - return 0; - /* check how much space the cache has */ - cachefiles_has_space(cache, 0, 0); + cachefiles_has_space(cache, 0, 0, cachefiles_has_space_check); /* summarise */ + f_released = atomic_xchg(&cache->f_released, 0); + b_released = atomic_long_xchg(&cache->b_released, 0); clear_bit(CACHEFILES_STATE_CHANGED, &cache->flags); n = snprintf(buffer, sizeof(buffer), @@ -183,15 +231,18 @@ static ssize_t cachefiles_daemon_read(struct file *file, char __user *_buffer, " fstop=%llx" " brun=%llx" " bcull=%llx" - " bstop=%llx", + " bstop=%llx" + " freleased=%x" + " breleased=%llx", test_bit(CACHEFILES_CULLING, &cache->flags) ? '1' : '0', (unsigned long long) cache->frun, (unsigned long long) cache->fcull, (unsigned long long) cache->fstop, (unsigned long long) cache->brun, (unsigned long long) cache->bcull, - (unsigned long long) cache->bstop - ); + (unsigned long long) cache->bstop, + f_released, + b_released); if (n > buflen) return -EMSGSIZE; @@ -203,7 +254,26 @@ static ssize_t cachefiles_daemon_read(struct file *file, char __user *_buffer, } /* - * command the cache + * Read the cache state. + */ +static ssize_t cachefiles_daemon_read(struct file *file, char __user *_buffer, + size_t buflen, loff_t *pos) +{ + struct cachefiles_cache *cache = file->private_data; + + //_enter(",,%zu,", buflen); + + if (!test_bit(CACHEFILES_READY, &cache->flags)) + return 0; + + if (cachefiles_in_ondemand_mode(cache)) + return cachefiles_ondemand_daemon_read(cache, _buffer, buflen); + else + return cachefiles_do_daemon_read(cache, _buffer, buflen); +} + +/* + * Take a command from cachefilesd, parse it and act on it. */ static ssize_t cachefiles_daemon_write(struct file *file, const char __user *_data, @@ -222,19 +292,13 @@ static ssize_t cachefiles_daemon_write(struct file *file, if (test_bit(CACHEFILES_DEAD, &cache->flags)) return -EIO; - if (datalen < 0 || datalen > PAGE_SIZE - 1) + if (datalen > PAGE_SIZE - 1) return -EOPNOTSUPP; /* drag the command string into the kernel so we can parse it */ - data = kmalloc(datalen + 1, GFP_KERNEL); - if (!data) - return -ENOMEM; - - ret = -EFAULT; - if (copy_from_user(data, _data, datalen) != 0) - goto error; - - data[datalen] = '\0'; + data = memdup_user_nul(_data, datalen); + if (IS_ERR(data)) + return PTR_ERR(data); ret = -EINVAL; if (memchr(data, '\0', datalen)) @@ -287,42 +351,56 @@ found_command: } /* - * poll for culling state - * - use POLLOUT to indicate culling state + * Poll for culling state + * - use EPOLLOUT to indicate culling state */ -static unsigned int cachefiles_daemon_poll(struct file *file, +static __poll_t cachefiles_daemon_poll(struct file *file, struct poll_table_struct *poll) { struct cachefiles_cache *cache = file->private_data; - unsigned int mask; + XA_STATE(xas, &cache->reqs, 0); + struct cachefiles_req *req; + __poll_t mask; poll_wait(file, &cache->daemon_pollwq, poll); mask = 0; - if (test_bit(CACHEFILES_STATE_CHANGED, &cache->flags)) - mask |= POLLIN; + if (cachefiles_in_ondemand_mode(cache)) { + if (!xa_empty(&cache->reqs)) { + xas_lock(&xas); + xas_for_each_marked(&xas, req, ULONG_MAX, CACHEFILES_REQ_NEW) { + if (!cachefiles_ondemand_is_reopening_read(req)) { + mask |= EPOLLIN; + break; + } + } + xas_unlock(&xas); + } + } else { + if (test_bit(CACHEFILES_STATE_CHANGED, &cache->flags)) + mask |= EPOLLIN; + } if (test_bit(CACHEFILES_CULLING, &cache->flags)) - mask |= POLLOUT; + mask |= EPOLLOUT; return mask; } /* - * give a range error for cache space constraints + * Give a range error for cache space constraints * - can be tail-called */ static int cachefiles_daemon_range_error(struct cachefiles_cache *cache, char *args) { - kerror("Free space limits must be in range" - " 0%%<=stop<cull<run<100%%"); + pr_err("Free space limits must be in range 0%%<=stop<cull<run<100%%\n"); return -EINVAL; } /* - * set the percentage of files at which to stop culling + * Set the percentage of files at which to stop culling * - command: "frun <N>%" */ static int cachefiles_daemon_frun(struct cachefiles_cache *cache, char *args) @@ -346,7 +424,7 @@ static int cachefiles_daemon_frun(struct cachefiles_cache *cache, char *args) } /* - * set the percentage of files at which to start culling + * Set the percentage of files at which to start culling * - command: "fcull <N>%" */ static int cachefiles_daemon_fcull(struct cachefiles_cache *cache, char *args) @@ -370,7 +448,7 @@ static int cachefiles_daemon_fcull(struct cachefiles_cache *cache, char *args) } /* - * set the percentage of files at which to stop allocating + * Set the percentage of files at which to stop allocating * - command: "fstop <N>%" */ static int cachefiles_daemon_fstop(struct cachefiles_cache *cache, char *args) @@ -386,7 +464,7 @@ static int cachefiles_daemon_fstop(struct cachefiles_cache *cache, char *args) if (args[0] != '%' || args[1] != '\0') return -EINVAL; - if (fstop < 0 || fstop >= cache->fcull_percent) + if (fstop >= cache->fcull_percent) return cachefiles_daemon_range_error(cache, args); cache->fstop_percent = fstop; @@ -394,7 +472,7 @@ static int cachefiles_daemon_fstop(struct cachefiles_cache *cache, char *args) } /* - * set the percentage of blocks at which to stop culling + * Set the percentage of blocks at which to stop culling * - command: "brun <N>%" */ static int cachefiles_daemon_brun(struct cachefiles_cache *cache, char *args) @@ -418,7 +496,7 @@ static int cachefiles_daemon_brun(struct cachefiles_cache *cache, char *args) } /* - * set the percentage of blocks at which to start culling + * Set the percentage of blocks at which to start culling * - command: "bcull <N>%" */ static int cachefiles_daemon_bcull(struct cachefiles_cache *cache, char *args) @@ -442,7 +520,7 @@ static int cachefiles_daemon_bcull(struct cachefiles_cache *cache, char *args) } /* - * set the percentage of blocks at which to stop allocating + * Set the percentage of blocks at which to stop allocating * - command: "bstop <N>%" */ static int cachefiles_daemon_bstop(struct cachefiles_cache *cache, char *args) @@ -458,7 +536,7 @@ static int cachefiles_daemon_bstop(struct cachefiles_cache *cache, char *args) if (args[0] != '%' || args[1] != '\0') return -EINVAL; - if (bstop < 0 || bstop >= cache->bcull_percent) + if (bstop >= cache->bcull_percent) return cachefiles_daemon_range_error(cache, args); cache->bstop_percent = bstop; @@ -466,7 +544,7 @@ static int cachefiles_daemon_bstop(struct cachefiles_cache *cache, char *args) } /* - * set the cache directory + * Set the cache directory * - command: "dir <name>" */ static int cachefiles_daemon_dir(struct cachefiles_cache *cache, char *args) @@ -476,12 +554,12 @@ static int cachefiles_daemon_dir(struct cachefiles_cache *cache, char *args) _enter(",%s", args); if (!*args) { - kerror("Empty directory specified"); + pr_err("Empty directory specified\n"); return -EINVAL; } if (cache->rootdirname) { - kerror("Second cache directory specified"); + pr_err("Second cache directory specified\n"); return -EEXIST; } @@ -494,35 +572,35 @@ static int cachefiles_daemon_dir(struct cachefiles_cache *cache, char *args) } /* - * set the cache security context + * Set the cache security context * - command: "secctx <ctx>" */ static int cachefiles_daemon_secctx(struct cachefiles_cache *cache, char *args) { - char *secctx; + int err; _enter(",%s", args); if (!*args) { - kerror("Empty security context specified"); + pr_err("Empty security context specified\n"); return -EINVAL; } - if (cache->secctx) { - kerror("Second security context specified"); + if (cache->have_secid) { + pr_err("Second security context specified\n"); return -EINVAL; } - secctx = kstrdup(args, GFP_KERNEL); - if (!secctx) - return -ENOMEM; + err = security_secctx_to_secid(args, strlen(args), &cache->secid); + if (err) + return err; - cache->secctx = secctx; + cache->have_secid = true; return 0; } /* - * set the cache tag + * Set the cache tag * - command: "tag <name>" */ static int cachefiles_daemon_tag(struct cachefiles_cache *cache, char *args) @@ -532,7 +610,7 @@ static int cachefiles_daemon_tag(struct cachefiles_cache *cache, char *args) _enter(",%s", args); if (!*args) { - kerror("Empty tag specified"); + pr_err("Empty tag specified\n"); return -EINVAL; } @@ -548,7 +626,7 @@ static int cachefiles_daemon_tag(struct cachefiles_cache *cache, char *args) } /* - * request a node in the cache be culled from the current working directory + * Request a node in the cache be culled from the current working directory * - command: "cull <name>" */ static int cachefiles_daemon_cull(struct cachefiles_cache *cache, char *args) @@ -563,19 +641,18 @@ static int cachefiles_daemon_cull(struct cachefiles_cache *cache, char *args) goto inval; if (!test_bit(CACHEFILES_READY, &cache->flags)) { - kerror("cull applied to unready cache"); + pr_err("cull applied to unready cache\n"); return -EIO; } if (test_bit(CACHEFILES_DEAD, &cache->flags)) { - kerror("cull applied to dead cache"); + pr_err("cull applied to dead cache\n"); return -EIO; } - /* extract the directory dentry from the cwd */ get_fs_pwd(current->fs, &path); - if (!S_ISDIR(path.dentry->d_inode->i_mode)) + if (!d_can_lookup(path.dentry)) goto notdir; cachefiles_begin_secure(cache, &saved_cred); @@ -588,16 +665,16 @@ static int cachefiles_daemon_cull(struct cachefiles_cache *cache, char *args) notdir: path_put(&path); - kerror("cull command requires dirfd to be a directory"); + pr_err("cull command requires dirfd to be a directory\n"); return -ENOTDIR; inval: - kerror("cull command requires dirfd and filename"); + pr_err("cull command requires dirfd and filename\n"); return -EINVAL; } /* - * set debugging mode + * Set debugging mode * - command: "debug <mask>" */ static int cachefiles_daemon_debug(struct cachefiles_cache *cache, char *args) @@ -615,12 +692,12 @@ static int cachefiles_daemon_debug(struct cachefiles_cache *cache, char *args) return 0; inval: - kerror("debug command requires mask"); + pr_err("debug command requires mask\n"); return -EINVAL; } /* - * find out whether an object in the current working directory is in use or not + * Find out whether an object in the current working directory is in use or not * - command: "inuse <name>" */ static int cachefiles_daemon_inuse(struct cachefiles_cache *cache, char *args) @@ -635,19 +712,18 @@ static int cachefiles_daemon_inuse(struct cachefiles_cache *cache, char *args) goto inval; if (!test_bit(CACHEFILES_READY, &cache->flags)) { - kerror("inuse applied to unready cache"); + pr_err("inuse applied to unready cache\n"); return -EIO; } if (test_bit(CACHEFILES_DEAD, &cache->flags)) { - kerror("inuse applied to dead cache"); + pr_err("inuse applied to dead cache\n"); return -EIO; } - /* extract the directory dentry from the cwd */ get_fs_pwd(current->fs, &path); - if (!S_ISDIR(path.dentry->d_inode->i_mode)) + if (!d_can_lookup(path.dentry)) goto notdir; cachefiles_begin_secure(cache, &saved_cred); @@ -660,93 +736,92 @@ static int cachefiles_daemon_inuse(struct cachefiles_cache *cache, char *args) notdir: path_put(&path); - kerror("inuse command requires dirfd to be a directory"); + pr_err("inuse command requires dirfd to be a directory\n"); return -ENOTDIR; inval: - kerror("inuse command requires dirfd and filename"); + pr_err("inuse command requires dirfd and filename\n"); return -EINVAL; } /* - * see if we have space for a number of pages and/or a number of files in the - * cache + * Bind a directory as a cache */ -int cachefiles_has_space(struct cachefiles_cache *cache, - unsigned fnr, unsigned bnr) +static int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args) { - struct kstatfs stats; - struct path path = { - .mnt = cache->mnt, - .dentry = cache->mnt->mnt_root, - }; - int ret; + _enter("{%u,%u,%u,%u,%u,%u},%s", + cache->frun_percent, + cache->fcull_percent, + cache->fstop_percent, + cache->brun_percent, + cache->bcull_percent, + cache->bstop_percent, + args); + + if (cache->fstop_percent >= cache->fcull_percent || + cache->fcull_percent >= cache->frun_percent || + cache->frun_percent >= 100) + return -ERANGE; + + if (cache->bstop_percent >= cache->bcull_percent || + cache->bcull_percent >= cache->brun_percent || + cache->brun_percent >= 100) + return -ERANGE; + + if (!cache->rootdirname) { + pr_err("No cache directory specified\n"); + return -EINVAL; + } + + /* Don't permit already bound caches to be re-bound */ + if (test_bit(CACHEFILES_READY, &cache->flags)) { + pr_err("Cache already bound\n"); + return -EBUSY; + } - //_enter("{%llu,%llu,%llu,%llu,%llu,%llu},%u,%u", - // (unsigned long long) cache->frun, - // (unsigned long long) cache->fcull, - // (unsigned long long) cache->fstop, - // (unsigned long long) cache->brun, - // (unsigned long long) cache->bcull, - // (unsigned long long) cache->bstop, - // fnr, bnr); - - /* find out how many pages of blockdev are available */ - memset(&stats, 0, sizeof(stats)); - - ret = vfs_statfs(&path, &stats); - if (ret < 0) { - if (ret == -EIO) - cachefiles_io_error(cache, "statfs failed"); - _leave(" = %d", ret); - return ret; + if (IS_ENABLED(CONFIG_CACHEFILES_ONDEMAND)) { + if (!strcmp(args, "ondemand")) { + set_bit(CACHEFILES_ONDEMAND_MODE, &cache->flags); + } else if (*args) { + pr_err("Invalid argument to the 'bind' command\n"); + return -EINVAL; + } + } else if (*args) { + pr_err("'bind' command doesn't take an argument\n"); + return -EINVAL; } - stats.f_bavail >>= cache->bshift; + /* Make sure we have copies of the tag string */ + if (!cache->tag) { + /* + * The tag string is released by the fops->release() + * function, so we don't release it on error here + */ + cache->tag = kstrdup("CacheFiles", GFP_KERNEL); + if (!cache->tag) + return -ENOMEM; + } - //_debug("avail %llu,%llu", - // (unsigned long long) stats.f_ffree, - // (unsigned long long) stats.f_bavail); + return cachefiles_add_cache(cache); +} - /* see if there is sufficient space */ - if (stats.f_ffree > fnr) - stats.f_ffree -= fnr; - else - stats.f_ffree = 0; +/* + * Unbind a cache. + */ +static void cachefiles_daemon_unbind(struct cachefiles_cache *cache) +{ + _enter(""); - if (stats.f_bavail > bnr) - stats.f_bavail -= bnr; - else - stats.f_bavail = 0; - - ret = -ENOBUFS; - if (stats.f_ffree < cache->fstop || - stats.f_bavail < cache->bstop) - goto begin_cull; - - ret = 0; - if (stats.f_ffree < cache->fcull || - stats.f_bavail < cache->bcull) - goto begin_cull; - - if (test_bit(CACHEFILES_CULLING, &cache->flags) && - stats.f_ffree >= cache->frun && - stats.f_bavail >= cache->brun && - test_and_clear_bit(CACHEFILES_CULLING, &cache->flags) - ) { - _debug("cease culling"); - cachefiles_state_changed(cache); - } + if (test_bit(CACHEFILES_READY, &cache->flags)) + cachefiles_withdraw_cache(cache); - //_leave(" = 0"); - return 0; + cachefiles_put_directory(cache->graveyard); + cachefiles_put_directory(cache->store); + mntput(cache->mnt); + put_cred(cache->cache_cred); -begin_cull: - if (!test_and_set_bit(CACHEFILES_CULLING, &cache->flags)) { - _debug("### CULL CACHE ###"); - cachefiles_state_changed(cache); - } + kfree(cache->rootdirname); + kfree(cache->tag); - _leave(" = %d", ret); - return ret; + _leave(""); } diff --git a/fs/cachefiles/error_inject.c b/fs/cachefiles/error_inject.c new file mode 100644 index 000000000000..e341ade47dd8 --- /dev/null +++ b/fs/cachefiles/error_inject.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Error injection handling. + * + * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/sysctl.h> +#include "internal.h" + +unsigned int cachefiles_error_injection_state; + +static struct ctl_table_header *cachefiles_sysctl; +static const struct ctl_table cachefiles_sysctls[] = { + { + .procname = "error_injection", + .data = &cachefiles_error_injection_state, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_douintvec, + }, +}; + +int __init cachefiles_register_error_injection(void) +{ + cachefiles_sysctl = register_sysctl("cachefiles", cachefiles_sysctls); + if (!cachefiles_sysctl) + return -ENOMEM; + return 0; + +} + +void cachefiles_unregister_error_injection(void) +{ + unregister_sysctl_table(cachefiles_sysctl); +} diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index d4c1206af9fc..a08250d244ea 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c @@ -1,525 +1,461 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* FS-Cache interface to CacheFiles * - * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. */ #include <linux/slab.h> #include <linux/mount.h> +#include <linux/xattr.h> +#include <linux/file.h> +#include <linux/namei.h> +#include <linux/falloc.h> +#include <trace/events/fscache.h> #include "internal.h" -struct cachefiles_lookup_data { - struct cachefiles_xattr *auxdata; /* auxiliary data */ - char *key; /* key path */ -}; - -static int cachefiles_attr_changed(struct fscache_object *_object); +static atomic_t cachefiles_object_debug_id; /* - * allocate an object record for a cookie lookup and prepare the lookup data + * Allocate a cache object record. */ -static struct fscache_object *cachefiles_alloc_object( - struct fscache_cache *_cache, - struct fscache_cookie *cookie) +static +struct cachefiles_object *cachefiles_alloc_object(struct fscache_cookie *cookie) { - struct cachefiles_lookup_data *lookup_data; + struct fscache_volume *vcookie = cookie->volume; + struct cachefiles_volume *volume = vcookie->cache_priv; struct cachefiles_object *object; - struct cachefiles_cache *cache; - struct cachefiles_xattr *auxdata; - unsigned keylen, auxlen; - void *buffer; - char *key; - - cache = container_of(_cache, struct cachefiles_cache, cache); - - _enter("{%s},%p,", cache->cache.identifier, cookie); - lookup_data = kmalloc(sizeof(*lookup_data), cachefiles_gfp); - if (!lookup_data) - goto nomem_lookup_data; + _enter("{%s},%x,", vcookie->key, cookie->debug_id); - /* create a new object record and a temporary leaf image */ - object = kmem_cache_alloc(cachefiles_object_jar, cachefiles_gfp); + object = kmem_cache_zalloc(cachefiles_object_jar, GFP_KERNEL); if (!object) - goto nomem_object; + return NULL; - ASSERTCMP(object->backer, ==, NULL); - - BUG_ON(test_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags)); - atomic_set(&object->usage, 1); - - fscache_object_init(&object->fscache, cookie, &cache->cache); + if (cachefiles_ondemand_init_obj_info(object, volume)) { + kmem_cache_free(cachefiles_object_jar, object); + return NULL; + } - object->type = cookie->def->type; + refcount_set(&object->ref, 1); - /* get hold of the raw key - * - stick the length on the front and leave space on the back for the - * encoder - */ - buffer = kmalloc((2 + 512) + 3, cachefiles_gfp); - if (!buffer) - goto nomem_buffer; - - keylen = cookie->def->get_key(cookie->netfs_data, buffer + 2, 512); - ASSERTCMP(keylen, <, 512); - - *(uint16_t *)buffer = keylen; - ((char *)buffer)[keylen + 2] = 0; - ((char *)buffer)[keylen + 3] = 0; - ((char *)buffer)[keylen + 4] = 0; - - /* turn the raw key into something that can work with as a filename */ - key = cachefiles_cook_key(buffer, keylen + 2, object->type); - if (!key) - goto nomem_key; - - /* get hold of the auxiliary data and prepend the object type */ - auxdata = buffer; - auxlen = 0; - if (cookie->def->get_aux) { - auxlen = cookie->def->get_aux(cookie->netfs_data, - auxdata->data, 511); - ASSERTCMP(auxlen, <, 511); - } + spin_lock_init(&object->lock); + INIT_LIST_HEAD(&object->cache_link); + object->volume = volume; + object->debug_id = atomic_inc_return(&cachefiles_object_debug_id); + object->cookie = fscache_get_cookie(cookie, fscache_cookie_get_attach_object); - auxdata->len = auxlen + 1; - auxdata->type = cookie->def->type; - - lookup_data->auxdata = auxdata; - lookup_data->key = key; - object->lookup_data = lookup_data; - - _leave(" = %p [%p]", &object->fscache, lookup_data); - return &object->fscache; - -nomem_key: - kfree(buffer); -nomem_buffer: - BUG_ON(test_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags)); - kmem_cache_free(cachefiles_object_jar, object); - fscache_object_destroyed(&cache->cache); -nomem_object: - kfree(lookup_data); -nomem_lookup_data: - _leave(" = -ENOMEM"); - return ERR_PTR(-ENOMEM); + fscache_count_object(vcookie->cache); + trace_cachefiles_ref(object->debug_id, cookie->debug_id, 1, + cachefiles_obj_new); + return object; } /* - * attempt to look up the nominated node in this cache - * - return -ETIMEDOUT to be scheduled again + * Note that an object has been seen. */ -static int cachefiles_lookup_object(struct fscache_object *_object) +void cachefiles_see_object(struct cachefiles_object *object, + enum cachefiles_obj_ref_trace why) { - struct cachefiles_lookup_data *lookup_data; - struct cachefiles_object *parent, *object; - struct cachefiles_cache *cache; - const struct cred *saved_cred; - int ret; - - _enter("{OBJ%x}", _object->debug_id); - - cache = container_of(_object->cache, struct cachefiles_cache, cache); - parent = container_of(_object->parent, - struct cachefiles_object, fscache); - object = container_of(_object, struct cachefiles_object, fscache); - lookup_data = object->lookup_data; - - ASSERTCMP(lookup_data, !=, NULL); - - /* look up the key, creating any missing bits */ - cachefiles_begin_secure(cache, &saved_cred); - ret = cachefiles_walk_to_object(parent, object, - lookup_data->key, - lookup_data->auxdata); - cachefiles_end_secure(cache, saved_cred); - - /* polish off by setting the attributes of non-index files */ - if (ret == 0 && - object->fscache.cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) - cachefiles_attr_changed(&object->fscache); - - if (ret < 0 && ret != -ETIMEDOUT) { - if (ret != -ENOBUFS) - printk(KERN_WARNING - "CacheFiles: Lookup failed error %d\n", ret); - fscache_object_lookup_error(&object->fscache); - } - - _leave(" [%d]", ret); - return ret; + trace_cachefiles_ref(object->debug_id, object->cookie->debug_id, + refcount_read(&object->ref), why); } /* - * indication of lookup completion + * Increment the usage count on an object; */ -static void cachefiles_lookup_complete(struct fscache_object *_object) +struct cachefiles_object *cachefiles_grab_object(struct cachefiles_object *object, + enum cachefiles_obj_ref_trace why) { - struct cachefiles_object *object; - - object = container_of(_object, struct cachefiles_object, fscache); - - _enter("{OBJ%x,%p}", object->fscache.debug_id, object->lookup_data); + int r; - if (object->lookup_data) { - kfree(object->lookup_data->key); - kfree(object->lookup_data->auxdata); - kfree(object->lookup_data); - object->lookup_data = NULL; - } + __refcount_inc(&object->ref, &r); + trace_cachefiles_ref(object->debug_id, object->cookie->debug_id, r, why); + return object; } /* - * increment the usage count on an inode object (may fail if unmounting) + * dispose of a reference to an object */ -static -struct fscache_object *cachefiles_grab_object(struct fscache_object *_object) +void cachefiles_put_object(struct cachefiles_object *object, + enum cachefiles_obj_ref_trace why) { - struct cachefiles_object *object = - container_of(_object, struct cachefiles_object, fscache); + unsigned int object_debug_id = object->debug_id; + unsigned int cookie_debug_id = object->cookie->debug_id; + struct fscache_cache *cache; + bool done; + int r; + + done = __refcount_dec_and_test(&object->ref, &r); + trace_cachefiles_ref(object_debug_id, cookie_debug_id, r, why); + if (done) { + _debug("- kill object OBJ%x", object_debug_id); - _enter("{OBJ%x,%d}", _object->debug_id, atomic_read(&object->usage)); + ASSERTCMP(object->file, ==, NULL); -#ifdef CACHEFILES_DEBUG_SLAB - ASSERT((atomic_read(&object->usage) & 0xffff0000) != 0x6b6b0000); -#endif + kfree(object->d_name); + cachefiles_ondemand_deinit_obj_info(object); + cache = object->volume->cache->cache; + fscache_put_cookie(object->cookie, fscache_cookie_put_object); + object->cookie = NULL; + kmem_cache_free(cachefiles_object_jar, object); + fscache_uncount_object(cache); + } - atomic_inc(&object->usage); - return &object->fscache; + _leave(""); } /* - * update the auxiliary data for an object object on disk + * Adjust the size of a cache file if necessary to match the DIO size. We keep + * the EOF marker a multiple of DIO blocks so that we don't fall back to doing + * non-DIO for a partial block straddling the EOF, but we also have to be + * careful of someone expanding the file and accidentally accreting the + * padding. */ -static void cachefiles_update_object(struct fscache_object *_object) +static int cachefiles_adjust_size(struct cachefiles_object *object) { - struct cachefiles_object *object; - struct cachefiles_xattr *auxdata; - struct cachefiles_cache *cache; - struct fscache_cookie *cookie; - const struct cred *saved_cred; - unsigned auxlen; + struct iattr newattrs; + struct file *file = object->file; + uint64_t ni_size; + loff_t oi_size; + int ret; - _enter("{OBJ%x}", _object->debug_id); + ni_size = object->cookie->object_size; + ni_size = round_up(ni_size, CACHEFILES_DIO_BLOCK_SIZE); - object = container_of(_object, struct cachefiles_object, fscache); - cache = container_of(object->fscache.cache, struct cachefiles_cache, - cache); + _enter("{OBJ%x},[%llu]", + object->debug_id, (unsigned long long) ni_size); - if (!fscache_use_cookie(_object)) { - _leave(" [relinq]"); - return; - } + if (!file) + return -ENOBUFS; - cookie = object->fscache.cookie; + oi_size = i_size_read(file_inode(file)); + if (oi_size == ni_size) + return 0; - if (!cookie->def->get_aux) { - fscache_unuse_cookie(_object); - _leave(" [no aux]"); - return; - } + inode_lock(file_inode(file)); - auxdata = kmalloc(2 + 512 + 3, cachefiles_gfp); - if (!auxdata) { - fscache_unuse_cookie(_object); - _leave(" [nomem]"); - return; + /* if there's an extension to a partial page at the end of the backing + * file, we need to discard the partial page so that we pick up new + * data after it */ + if (oi_size & ~PAGE_MASK && ni_size > oi_size) { + _debug("discard tail %llx", oi_size); + newattrs.ia_valid = ATTR_SIZE; + newattrs.ia_size = oi_size & PAGE_MASK; + ret = cachefiles_inject_remove_error(); + if (ret == 0) + ret = notify_change(&nop_mnt_idmap, file->f_path.dentry, + &newattrs, NULL); + if (ret < 0) + goto truncate_failed; } - auxlen = cookie->def->get_aux(cookie->netfs_data, auxdata->data, 511); - fscache_unuse_cookie(_object); - ASSERTCMP(auxlen, <, 511); + newattrs.ia_valid = ATTR_SIZE; + newattrs.ia_size = ni_size; + ret = cachefiles_inject_write_error(); + if (ret == 0) + ret = notify_change(&nop_mnt_idmap, file->f_path.dentry, + &newattrs, NULL); - auxdata->len = auxlen + 1; - auxdata->type = cookie->def->type; +truncate_failed: + inode_unlock(file_inode(file)); - cachefiles_begin_secure(cache, &saved_cred); - cachefiles_update_object_xattr(object, auxdata); - cachefiles_end_secure(cache, saved_cred); - kfree(auxdata); - _leave(""); + if (ret < 0) + trace_cachefiles_io_error(NULL, file_inode(file), ret, + cachefiles_trace_notify_change_error); + if (ret == -EIO) { + cachefiles_io_error_obj(object, "Size set failed"); + ret = -ENOBUFS; + } + + _leave(" = %d", ret); + return ret; } /* - * discard the resources pinned by an object and effect retirement if - * requested + * Attempt to look up the nominated node in this cache */ -static void cachefiles_drop_object(struct fscache_object *_object) +static bool cachefiles_lookup_cookie(struct fscache_cookie *cookie) { struct cachefiles_object *object; - struct cachefiles_cache *cache; + struct cachefiles_cache *cache = cookie->volume->cache->cache_priv; const struct cred *saved_cred; + bool success; - ASSERT(_object); + object = cachefiles_alloc_object(cookie); + if (!object) + goto fail; - object = container_of(_object, struct cachefiles_object, fscache); + _enter("{OBJ%x}", object->debug_id); - _enter("{OBJ%x,%d}", - object->fscache.debug_id, atomic_read(&object->usage)); + if (!cachefiles_cook_key(object)) + goto fail_put; - cache = container_of(object->fscache.cache, - struct cachefiles_cache, cache); + cookie->cache_priv = object; -#ifdef CACHEFILES_DEBUG_SLAB - ASSERT((atomic_read(&object->usage) & 0xffff0000) != 0x6b6b0000); -#endif + cachefiles_begin_secure(cache, &saved_cred); - /* delete retired objects */ - if (test_bit(FSCACHE_COOKIE_RETIRED, &object->fscache.cookie->flags) && - _object != cache->cache.fsdef - ) { - _debug("- retire object OBJ%x", object->fscache.debug_id); - cachefiles_begin_secure(cache, &saved_cred); - cachefiles_delete_object(cache, object); - cachefiles_end_secure(cache, saved_cred); - } + success = cachefiles_look_up_object(object); + if (!success) + goto fail_withdraw; - /* close the filesystem stuff attached to the object */ - if (object->backer != object->dentry) - dput(object->backer); - object->backer = NULL; - - /* note that the object is now inactive */ - if (test_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags)) { - write_lock(&cache->active_lock); - if (!test_and_clear_bit(CACHEFILES_OBJECT_ACTIVE, - &object->flags)) - BUG(); - rb_erase(&object->active_node, &cache->active_nodes); - wake_up_bit(&object->flags, CACHEFILES_OBJECT_ACTIVE); - write_unlock(&cache->active_lock); - } + cachefiles_see_object(object, cachefiles_obj_see_lookup_cookie); - dput(object->dentry); - object->dentry = NULL; + spin_lock(&cache->object_list_lock); + list_add(&object->cache_link, &cache->object_list); + spin_unlock(&cache->object_list_lock); + cachefiles_adjust_size(object); - _leave(""); + cachefiles_end_secure(cache, saved_cred); + _leave(" = t"); + return true; + +fail_withdraw: + cachefiles_end_secure(cache, saved_cred); + cachefiles_see_object(object, cachefiles_obj_see_lookup_failed); + fscache_caching_failed(cookie); + _debug("failed c=%08x o=%08x", cookie->debug_id, object->debug_id); + /* The caller holds an access count on the cookie, so we need them to + * drop it before we can withdraw the object. + */ + return false; + +fail_put: + cachefiles_put_object(object, cachefiles_obj_put_alloc_fail); +fail: + return false; } /* - * dispose of a reference to an object + * Shorten the backing object to discard any dirty data and free up + * any unused granules. */ -static void cachefiles_put_object(struct fscache_object *_object) +static bool cachefiles_shorten_object(struct cachefiles_object *object, + struct file *file, loff_t new_size) { - struct cachefiles_object *object; - struct fscache_cache *cache; - - ASSERT(_object); - - object = container_of(_object, struct cachefiles_object, fscache); - - _enter("{OBJ%x,%d}", - object->fscache.debug_id, atomic_read(&object->usage)); - -#ifdef CACHEFILES_DEBUG_SLAB - ASSERT((atomic_read(&object->usage) & 0xffff0000) != 0x6b6b0000); -#endif - - ASSERTIFCMP(object->fscache.parent, - object->fscache.parent->n_children, >, 0); - - if (atomic_dec_and_test(&object->usage)) { - _debug("- kill object OBJ%x", object->fscache.debug_id); + struct cachefiles_cache *cache = object->volume->cache; + struct inode *inode = file_inode(file); + loff_t i_size, dio_size; + int ret; - ASSERT(!test_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags)); - ASSERTCMP(object->fscache.parent, ==, NULL); - ASSERTCMP(object->backer, ==, NULL); - ASSERTCMP(object->dentry, ==, NULL); - ASSERTCMP(object->fscache.n_ops, ==, 0); - ASSERTCMP(object->fscache.n_children, ==, 0); + dio_size = round_up(new_size, CACHEFILES_DIO_BLOCK_SIZE); + i_size = i_size_read(inode); + + trace_cachefiles_trunc(object, inode, i_size, dio_size, + cachefiles_trunc_shrink); + ret = cachefiles_inject_remove_error(); + if (ret == 0) + ret = vfs_truncate(&file->f_path, dio_size); + if (ret < 0) { + trace_cachefiles_io_error(object, file_inode(file), ret, + cachefiles_trace_trunc_error); + cachefiles_io_error_obj(object, "Trunc-to-size failed %d", ret); + cachefiles_remove_object_xattr(cache, object, file->f_path.dentry); + return false; + } - if (object->lookup_data) { - kfree(object->lookup_data->key); - kfree(object->lookup_data->auxdata); - kfree(object->lookup_data); - object->lookup_data = NULL; + if (new_size < dio_size) { + trace_cachefiles_trunc(object, inode, dio_size, new_size, + cachefiles_trunc_dio_adjust); + ret = cachefiles_inject_write_error(); + if (ret == 0) + ret = vfs_fallocate(file, FALLOC_FL_ZERO_RANGE, + new_size, dio_size - new_size); + if (ret < 0) { + trace_cachefiles_io_error(object, file_inode(file), ret, + cachefiles_trace_fallocate_error); + cachefiles_io_error_obj(object, "Trunc-to-dio-size failed %d", ret); + cachefiles_remove_object_xattr(cache, object, file->f_path.dentry); + return false; } - - cache = object->fscache.cache; - fscache_object_destroy(&object->fscache); - kmem_cache_free(cachefiles_object_jar, object); - fscache_object_destroyed(cache); } - _leave(""); + return true; } /* - * sync a cache + * Resize the backing object. */ -static void cachefiles_sync_cache(struct fscache_cache *_cache) +static void cachefiles_resize_cookie(struct netfs_cache_resources *cres, + loff_t new_size) { - struct cachefiles_cache *cache; + struct cachefiles_object *object = cachefiles_cres_object(cres); + struct cachefiles_cache *cache = object->volume->cache; + struct fscache_cookie *cookie = object->cookie; const struct cred *saved_cred; - int ret; - - _enter("%p", _cache); + struct file *file = cachefiles_cres_file(cres); + loff_t old_size = cookie->object_size; - cache = container_of(_cache, struct cachefiles_cache, cache); + _enter("%llu->%llu", old_size, new_size); - /* make sure all pages pinned by operations on behalf of the netfs are - * written to disc */ - cachefiles_begin_secure(cache, &saved_cred); - down_read(&cache->mnt->mnt_sb->s_umount); - ret = sync_filesystem(cache->mnt->mnt_sb); - up_read(&cache->mnt->mnt_sb->s_umount); - cachefiles_end_secure(cache, saved_cred); + if (new_size < old_size) { + cachefiles_begin_secure(cache, &saved_cred); + cachefiles_shorten_object(object, file, new_size); + cachefiles_end_secure(cache, saved_cred); + object->cookie->object_size = new_size; + return; + } - if (ret == -EIO) - cachefiles_io_error(cache, - "Attempt to sync backing fs superblock" - " returned error %d", - ret); + /* The file is being expanded. We don't need to do anything + * particularly. cookie->initial_size doesn't change and so the point + * at which we have to download before doesn't change. + */ + cookie->object_size = new_size; } /* - * notification the attributes on an object have changed - * - called with reads/writes excluded by FS-Cache + * Commit changes to the object as we drop it. */ -static int cachefiles_attr_changed(struct fscache_object *_object) +static void cachefiles_commit_object(struct cachefiles_object *object, + struct cachefiles_cache *cache) { - struct cachefiles_object *object; - struct cachefiles_cache *cache; - const struct cred *saved_cred; - struct iattr newattrs; - uint64_t ni_size; - loff_t oi_size; - int ret; - - _object->cookie->def->get_attr(_object->cookie->netfs_data, &ni_size); + bool update = false; - _enter("{OBJ%x},[%llu]", - _object->debug_id, (unsigned long long) ni_size); + if (test_and_clear_bit(FSCACHE_COOKIE_LOCAL_WRITE, &object->cookie->flags)) + update = true; + if (test_and_clear_bit(FSCACHE_COOKIE_NEEDS_UPDATE, &object->cookie->flags)) + update = true; + if (update) + cachefiles_set_object_xattr(object); - object = container_of(_object, struct cachefiles_object, fscache); - cache = container_of(object->fscache.cache, - struct cachefiles_cache, cache); + if (test_bit(CACHEFILES_OBJECT_USING_TMPFILE, &object->flags)) + cachefiles_commit_tmpfile(cache, object); +} - if (ni_size == object->i_size) - return 0; +/* + * Finalise and object and close the VFS structs that we have. + */ +static void cachefiles_clean_up_object(struct cachefiles_object *object, + struct cachefiles_cache *cache) +{ + struct file *file; + + if (test_bit(FSCACHE_COOKIE_RETIRED, &object->cookie->flags)) { + if (!test_bit(CACHEFILES_OBJECT_USING_TMPFILE, &object->flags)) { + cachefiles_see_object(object, cachefiles_obj_see_clean_delete); + _debug("- inval object OBJ%x", object->debug_id); + cachefiles_delete_object(object, FSCACHE_OBJECT_WAS_RETIRED); + } else { + cachefiles_see_object(object, cachefiles_obj_see_clean_drop_tmp); + _debug("- inval object OBJ%x tmpfile", object->debug_id); + } + } else { + cachefiles_see_object(object, cachefiles_obj_see_clean_commit); + cachefiles_commit_object(object, cache); + } - if (!object->backer) - return -ENOBUFS; + cachefiles_unmark_inode_in_use(object, object->file); - ASSERT(S_ISREG(object->backer->d_inode->i_mode)); + spin_lock(&object->lock); + file = object->file; + object->file = NULL; + spin_unlock(&object->lock); - fscache_set_store_limit(&object->fscache, ni_size); + if (file) + fput(file); +} - oi_size = i_size_read(object->backer->d_inode); - if (oi_size == ni_size) - return 0; +/* + * Withdraw caching for a cookie. + */ +static void cachefiles_withdraw_cookie(struct fscache_cookie *cookie) +{ + struct cachefiles_object *object = cookie->cache_priv; + struct cachefiles_cache *cache = object->volume->cache; + const struct cred *saved_cred; - cachefiles_begin_secure(cache, &saved_cred); - mutex_lock(&object->backer->d_inode->i_mutex); + _enter("o=%x", object->debug_id); + cachefiles_see_object(object, cachefiles_obj_see_withdraw_cookie); - /* if there's an extension to a partial page at the end of the backing - * file, we need to discard the partial page so that we pick up new - * data after it */ - if (oi_size & ~PAGE_MASK && ni_size > oi_size) { - _debug("discard tail %llx", oi_size); - newattrs.ia_valid = ATTR_SIZE; - newattrs.ia_size = oi_size & PAGE_MASK; - ret = notify_change(object->backer, &newattrs); - if (ret < 0) - goto truncate_failed; + if (!list_empty(&object->cache_link)) { + spin_lock(&cache->object_list_lock); + cachefiles_see_object(object, cachefiles_obj_see_withdrawal); + list_del_init(&object->cache_link); + spin_unlock(&cache->object_list_lock); } - newattrs.ia_valid = ATTR_SIZE; - newattrs.ia_size = ni_size; - ret = notify_change(object->backer, &newattrs); + cachefiles_ondemand_clean_object(object); -truncate_failed: - mutex_unlock(&object->backer->d_inode->i_mutex); - cachefiles_end_secure(cache, saved_cred); - - if (ret == -EIO) { - fscache_set_store_limit(&object->fscache, 0); - cachefiles_io_error_obj(object, "Size set failed"); - ret = -ENOBUFS; + if (object->file) { + cachefiles_begin_secure(cache, &saved_cred); + cachefiles_clean_up_object(object, cache); + cachefiles_end_secure(cache, saved_cred); } - _leave(" = %d", ret); - return ret; + cookie->cache_priv = NULL; + cachefiles_put_object(object, cachefiles_obj_put_detach); } /* - * Invalidate an object + * Invalidate the storage associated with a cookie. */ -static void cachefiles_invalidate_object(struct fscache_operation *op) +static bool cachefiles_invalidate_cookie(struct fscache_cookie *cookie) { - struct cachefiles_object *object; - struct cachefiles_cache *cache; - const struct cred *saved_cred; - struct path path; - uint64_t ni_size; - int ret; - - object = container_of(op->object, struct cachefiles_object, fscache); - cache = container_of(object->fscache.cache, - struct cachefiles_cache, cache); - - op->object->cookie->def->get_attr(op->object->cookie->netfs_data, - &ni_size); - - _enter("{OBJ%x},[%llu]", - op->object->debug_id, (unsigned long long)ni_size); + struct cachefiles_object *object = cookie->cache_priv; + struct file *new_file, *old_file; + bool old_tmpfile; - if (object->backer) { - ASSERT(S_ISREG(object->backer->d_inode->i_mode)); + _enter("o=%x,[%llu]", object->debug_id, object->cookie->object_size); - fscache_set_store_limit(&object->fscache, ni_size); + old_tmpfile = test_bit(CACHEFILES_OBJECT_USING_TMPFILE, &object->flags); - path.dentry = object->backer; - path.mnt = cache->mnt; - - cachefiles_begin_secure(cache, &saved_cred); - ret = vfs_truncate(&path, 0); - if (ret == 0) - ret = vfs_truncate(&path, ni_size); - cachefiles_end_secure(cache, saved_cred); + if (!object->file) { + fscache_resume_after_invalidation(cookie); + _leave(" = t [light]"); + return true; + } - if (ret != 0) { - fscache_set_store_limit(&object->fscache, 0); - if (ret == -EIO) - cachefiles_io_error_obj(object, - "Invalidate failed"); + new_file = cachefiles_create_tmpfile(object); + if (IS_ERR(new_file)) + goto failed; + + /* Substitute the VFS target */ + _debug("sub"); + spin_lock(&object->lock); + + old_file = object->file; + object->file = new_file; + object->content_info = CACHEFILES_CONTENT_NO_DATA; + set_bit(CACHEFILES_OBJECT_USING_TMPFILE, &object->flags); + set_bit(FSCACHE_COOKIE_NEEDS_UPDATE, &object->cookie->flags); + + spin_unlock(&object->lock); + _debug("subbed"); + + /* Allow I/O to take place again */ + fscache_resume_after_invalidation(cookie); + + if (old_file) { + if (!old_tmpfile) { + struct cachefiles_volume *volume = object->volume; + struct dentry *fan = volume->fanout[(u8)cookie->key_hash]; + struct dentry *obj; + + obj = start_removing_dentry(fan, old_file->f_path.dentry); + if (!IS_ERR(obj)) + cachefiles_bury_object(volume->cache, object, + fan, obj, + FSCACHE_OBJECT_INVALIDATED); } + fput(old_file); } - fscache_op_complete(op, true); - _leave(""); -} + _leave(" = t"); + return true; -/* - * dissociate a cache from all the pages it was backing - */ -static void cachefiles_dissociate_pages(struct fscache_cache *cache) -{ - _enter(""); +failed: + _leave(" = f"); + return false; } const struct fscache_cache_ops cachefiles_cache_ops = { .name = "cachefiles", - .alloc_object = cachefiles_alloc_object, - .lookup_object = cachefiles_lookup_object, - .lookup_complete = cachefiles_lookup_complete, - .grab_object = cachefiles_grab_object, - .update_object = cachefiles_update_object, - .invalidate_object = cachefiles_invalidate_object, - .drop_object = cachefiles_drop_object, - .put_object = cachefiles_put_object, - .sync_cache = cachefiles_sync_cache, - .attr_changed = cachefiles_attr_changed, - .read_or_alloc_page = cachefiles_read_or_alloc_page, - .read_or_alloc_pages = cachefiles_read_or_alloc_pages, - .allocate_page = cachefiles_allocate_page, - .allocate_pages = cachefiles_allocate_pages, - .write_page = cachefiles_write_page, - .uncache_page = cachefiles_uncache_page, - .dissociate_pages = cachefiles_dissociate_pages, + .acquire_volume = cachefiles_acquire_volume, + .free_volume = cachefiles_free_volume, + .lookup_cookie = cachefiles_lookup_cookie, + .withdraw_cookie = cachefiles_withdraw_cookie, + .invalidate_cookie = cachefiles_invalidate_cookie, + .begin_operation = cachefiles_begin_operation, + .resize_cookie = cachefiles_resize_cookie, + .prepare_to_write = cachefiles_prepare_to_write, }; diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index 49382519907a..b62cd3e9a18e 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -1,65 +1,105 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* General netfs cache on cache files internal defs * - * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. */ +#ifdef pr_fmt +#undef pr_fmt +#endif + +#define pr_fmt(fmt) "CacheFiles: " fmt + + #include <linux/fscache-cache.h> -#include <linux/timer.h> -#include <linux/wait.h> -#include <linux/workqueue.h> +#include <linux/cred.h> #include <linux/security.h> +#include <linux/xarray.h> +#include <linux/cachefiles.h> + +#define CACHEFILES_DIO_BLOCK_SIZE 4096 struct cachefiles_cache; struct cachefiles_object; -extern unsigned cachefiles_debug; -#define CACHEFILES_DEBUG_KENTER 1 -#define CACHEFILES_DEBUG_KLEAVE 2 -#define CACHEFILES_DEBUG_KDEBUG 4 +enum cachefiles_content { + /* These values are saved on disk */ + CACHEFILES_CONTENT_NO_DATA = 0, /* No content stored */ + CACHEFILES_CONTENT_SINGLE = 1, /* Content is monolithic, all is present */ + CACHEFILES_CONTENT_ALL = 2, /* Content is all present, no map */ + CACHEFILES_CONTENT_BACKFS_MAP = 3, /* Content is piecemeal, mapped through backing fs */ + CACHEFILES_CONTENT_DIRTY = 4, /* Content is dirty (only seen on disk) */ + nr__cachefiles_content +}; -#define cachefiles_gfp (__GFP_WAIT | __GFP_NORETRY | __GFP_NOMEMALLOC) +/* + * Cached volume representation. + */ +struct cachefiles_volume { + struct cachefiles_cache *cache; + struct list_head cache_link; /* Link in cache->volumes */ + struct fscache_volume *vcookie; /* The netfs's representation */ + struct dentry *dentry; /* The volume dentry */ + struct dentry *fanout[256]; /* Fanout subdirs */ +}; + +enum cachefiles_object_state { + CACHEFILES_ONDEMAND_OBJSTATE_CLOSE, /* Anonymous fd closed by daemon or initial state */ + CACHEFILES_ONDEMAND_OBJSTATE_OPEN, /* Anonymous fd associated with object is available */ + CACHEFILES_ONDEMAND_OBJSTATE_REOPENING, /* Object that was closed and is being reopened. */ + CACHEFILES_ONDEMAND_OBJSTATE_DROPPING, /* Object is being dropped. */ +}; + +struct cachefiles_ondemand_info { + struct work_struct ondemand_work; + int ondemand_id; + enum cachefiles_object_state state; + struct cachefiles_object *object; + spinlock_t lock; +}; /* - * node records + * Backing file state. */ struct cachefiles_object { - struct fscache_object fscache; /* fscache handle */ - struct cachefiles_lookup_data *lookup_data; /* cached lookup data */ - struct dentry *dentry; /* the file/dir representing this object */ - struct dentry *backer; /* backing file */ - loff_t i_size; /* object size */ + struct fscache_cookie *cookie; /* Netfs data storage object cookie */ + struct cachefiles_volume *volume; /* Cache volume that holds this object */ + struct list_head cache_link; /* Link in cache->*_list */ + struct file *file; /* The file representing this object */ + char *d_name; /* Backing file name */ + int debug_id; + spinlock_t lock; + refcount_t ref; + enum cachefiles_content content_info:8; /* Info about content presence */ unsigned long flags; -#define CACHEFILES_OBJECT_ACTIVE 0 /* T if marked active */ -#define CACHEFILES_OBJECT_BURIED 1 /* T if preemptively buried */ - atomic_t usage; /* object usage count */ - uint8_t type; /* object type */ - uint8_t new; /* T if object new */ - spinlock_t work_lock; - struct rb_node active_node; /* link in active tree (dentry is key) */ +#define CACHEFILES_OBJECT_USING_TMPFILE 0 /* Have an unlinked tmpfile */ +#ifdef CONFIG_CACHEFILES_ONDEMAND + struct cachefiles_ondemand_info *ondemand; +#endif }; -extern struct kmem_cache *cachefiles_object_jar; +#define CACHEFILES_ONDEMAND_ID_CLOSED -1 /* * Cache files cache definition */ struct cachefiles_cache { - struct fscache_cache cache; /* FS-Cache record */ + struct fscache_cache *cache; /* Cache cookie */ struct vfsmount *mnt; /* mountpoint holding the cache */ + struct dentry *store; /* Directory into which live objects go */ struct dentry *graveyard; /* directory into which dead objects go */ struct file *cachefilesd; /* manager daemon handle */ + struct list_head volumes; /* List of volume objects */ + struct list_head object_list; /* List of active objects */ + spinlock_t object_list_lock; /* Lock for volumes and object_list */ const struct cred *cache_cred; /* security override for accessing cache */ struct mutex daemon_mutex; /* command serialisation mutex */ wait_queue_head_t daemon_pollwq; /* poll waitqueue for daemon */ - struct rb_root active_nodes; /* active nodes (can't be culled) */ - rwlock_t active_lock; /* lock for active_nodes */ atomic_t gravecounter; /* graveyard uniquifier */ + atomic_t f_released; /* number of objects released lately */ + atomic_long_t b_released; /* number of blocks released lately */ + atomic_long_t b_writing; /* Number of blocks being written */ unsigned frun_percent; /* when to stop culling (% files) */ unsigned fcull_percent; /* when to start culling (% files) */ unsigned fstop_percent; /* when to stop allocating (% files) */ @@ -67,7 +107,7 @@ struct cachefiles_cache { unsigned bcull_percent; /* when to start culling (% blocks) */ unsigned bstop_percent; /* when to stop allocating (% blocks) */ unsigned bsize; /* cache's block size */ - unsigned bshift; /* min(ilog2(PAGE_SIZE / bsize), 0) */ + unsigned bshift; /* ilog2(bsize) */ uint64_t frun; /* when to stop culling */ uint64_t fcull; /* when to start culling */ uint64_t fstop; /* when to stop allocating */ @@ -79,41 +119,48 @@ struct cachefiles_cache { #define CACHEFILES_DEAD 1 /* T if cache dead */ #define CACHEFILES_CULLING 2 /* T if cull engaged */ #define CACHEFILES_STATE_CHANGED 3 /* T if state changed (poll trigger) */ +#define CACHEFILES_ONDEMAND_MODE 4 /* T if in on-demand read mode */ char *rootdirname; /* name of cache root directory */ - char *secctx; /* LSM security context */ char *tag; /* cache binding tag */ + refcount_t unbind_pincount;/* refcount to do daemon unbind */ + struct xarray reqs; /* xarray of pending on-demand requests */ + unsigned long req_id_next; + struct xarray ondemand_ids; /* xarray for ondemand_id allocation */ + u32 ondemand_id_next; + u32 msg_id_next; + u32 secid; /* LSM security id */ + bool have_secid; /* whether "secid" was set */ }; -/* - * backing file read tracking - */ -struct cachefiles_one_read { - wait_queue_t monitor; /* link into monitored waitqueue */ - struct page *back_page; /* backing file page we're waiting for */ - struct page *netfs_page; /* netfs page we're going to fill */ - struct fscache_retrieval *op; /* retrieval op covering this */ - struct list_head op_link; /* link in op's todo list */ -}; +static inline bool cachefiles_in_ondemand_mode(struct cachefiles_cache *cache) +{ + return IS_ENABLED(CONFIG_CACHEFILES_ONDEMAND) && + test_bit(CACHEFILES_ONDEMAND_MODE, &cache->flags); +} -/* - * backing file write tracking - */ -struct cachefiles_one_write { - struct page *netfs_page; /* netfs page to copy */ - struct cachefiles_object *object; - struct list_head obj_link; /* link in object's lists */ - fscache_rw_complete_t end_io_func; - void *context; +struct cachefiles_req { + struct cachefiles_object *object; + struct completion done; + refcount_t ref; + int error; + struct cachefiles_msg msg; }; -/* - * auxiliary data xattr buffer - */ -struct cachefiles_xattr { - uint16_t len; - uint8_t type; - uint8_t data[]; -}; +#define CACHEFILES_REQ_NEW XA_MARK_1 + +#include <trace/events/cachefiles.h> + +static inline +struct file *cachefiles_cres_file(struct netfs_cache_resources *cres) +{ + return cres->cache_priv2; +} + +static inline +struct cachefiles_object *cachefiles_cres_object(struct netfs_cache_resources *cres) +{ + return fscache_cres_cookie(cres)->cache_priv; +} /* * note change of state for daemon @@ -125,87 +172,215 @@ static inline void cachefiles_state_changed(struct cachefiles_cache *cache) } /* - * bind.c + * cache.c */ -extern int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args); -extern void cachefiles_daemon_unbind(struct cachefiles_cache *cache); +extern int cachefiles_add_cache(struct cachefiles_cache *cache); +extern void cachefiles_withdraw_cache(struct cachefiles_cache *cache); + +enum cachefiles_has_space_for { + cachefiles_has_space_check, + cachefiles_has_space_for_write, + cachefiles_has_space_for_create, +}; +extern int cachefiles_has_space(struct cachefiles_cache *cache, + unsigned fnr, unsigned bnr, + enum cachefiles_has_space_for reason); /* * daemon.c */ extern const struct file_operations cachefiles_daemon_fops; +extern void cachefiles_flush_reqs(struct cachefiles_cache *cache); +extern void cachefiles_get_unbind_pincount(struct cachefiles_cache *cache); +extern void cachefiles_put_unbind_pincount(struct cachefiles_cache *cache); -extern int cachefiles_has_space(struct cachefiles_cache *cache, - unsigned fnr, unsigned bnr); +/* + * error_inject.c + */ +#ifdef CONFIG_CACHEFILES_ERROR_INJECTION +extern unsigned int cachefiles_error_injection_state; +extern int cachefiles_register_error_injection(void); +extern void cachefiles_unregister_error_injection(void); + +#else +#define cachefiles_error_injection_state 0 + +static inline int cachefiles_register_error_injection(void) +{ + return 0; +} + +static inline void cachefiles_unregister_error_injection(void) +{ +} +#endif + + +static inline int cachefiles_inject_read_error(void) +{ + return cachefiles_error_injection_state & 2 ? -EIO : 0; +} + +static inline int cachefiles_inject_write_error(void) +{ + return cachefiles_error_injection_state & 2 ? -EIO : + cachefiles_error_injection_state & 1 ? -ENOSPC : + 0; +} + +static inline int cachefiles_inject_remove_error(void) +{ + return cachefiles_error_injection_state & 2 ? -EIO : 0; +} /* * interface.c */ extern const struct fscache_cache_ops cachefiles_cache_ops; +extern void cachefiles_see_object(struct cachefiles_object *object, + enum cachefiles_obj_ref_trace why); +extern struct cachefiles_object *cachefiles_grab_object(struct cachefiles_object *object, + enum cachefiles_obj_ref_trace why); +extern void cachefiles_put_object(struct cachefiles_object *object, + enum cachefiles_obj_ref_trace why); + +/* + * io.c + */ +extern bool cachefiles_begin_operation(struct netfs_cache_resources *cres, + enum fscache_want_state want_state); +extern int __cachefiles_prepare_write(struct cachefiles_object *object, + struct file *file, + loff_t *_start, size_t *_len, size_t upper_len, + bool no_space_allocated_yet); +extern int __cachefiles_write(struct cachefiles_object *object, + struct file *file, + loff_t start_pos, + struct iov_iter *iter, + netfs_io_terminated_t term_func, + void *term_func_priv); /* * key.c */ -extern char *cachefiles_cook_key(const u8 *raw, int keylen, uint8_t type); +extern bool cachefiles_cook_key(struct cachefiles_object *object); + +/* + * main.c + */ +extern struct kmem_cache *cachefiles_object_jar; /* * namei.c */ -extern int cachefiles_delete_object(struct cachefiles_cache *cache, - struct cachefiles_object *object); -extern int cachefiles_walk_to_object(struct cachefiles_object *parent, - struct cachefiles_object *object, - const char *key, - struct cachefiles_xattr *auxdata); +extern void cachefiles_unmark_inode_in_use(struct cachefiles_object *object, + struct file *file); +extern int cachefiles_bury_object(struct cachefiles_cache *cache, + struct cachefiles_object *object, + struct dentry *dir, + struct dentry *rep, + enum fscache_why_object_killed why); +extern int cachefiles_delete_object(struct cachefiles_object *object, + enum fscache_why_object_killed why); +extern bool cachefiles_look_up_object(struct cachefiles_object *object); extern struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, struct dentry *dir, - const char *name); + const char *name, + bool *_is_new); +extern void cachefiles_put_directory(struct dentry *dir); extern int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir, char *filename); extern int cachefiles_check_in_use(struct cachefiles_cache *cache, struct dentry *dir, char *filename); +extern struct file *cachefiles_create_tmpfile(struct cachefiles_object *object); +extern bool cachefiles_commit_tmpfile(struct cachefiles_cache *cache, + struct cachefiles_object *object); /* - * proc.c + * ondemand.c */ -#ifdef CONFIG_CACHEFILES_HISTOGRAM -extern atomic_t cachefiles_lookup_histogram[HZ]; -extern atomic_t cachefiles_mkdir_histogram[HZ]; -extern atomic_t cachefiles_create_histogram[HZ]; +#ifdef CONFIG_CACHEFILES_ONDEMAND +extern ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, + char __user *_buffer, size_t buflen); + +extern int cachefiles_ondemand_copen(struct cachefiles_cache *cache, + char *args); + +extern int cachefiles_ondemand_restore(struct cachefiles_cache *cache, + char *args); + +extern int cachefiles_ondemand_init_object(struct cachefiles_object *object); +extern void cachefiles_ondemand_clean_object(struct cachefiles_object *object); + +extern int cachefiles_ondemand_read(struct cachefiles_object *object, + loff_t pos, size_t len); + +extern int cachefiles_ondemand_init_obj_info(struct cachefiles_object *obj, + struct cachefiles_volume *volume); +extern void cachefiles_ondemand_deinit_obj_info(struct cachefiles_object *obj); + +#define CACHEFILES_OBJECT_STATE_FUNCS(_state, _STATE) \ +static inline bool \ +cachefiles_ondemand_object_is_##_state(const struct cachefiles_object *object) \ +{ \ + return object->ondemand->state == CACHEFILES_ONDEMAND_OBJSTATE_##_STATE; \ +} \ + \ +static inline void \ +cachefiles_ondemand_set_object_##_state(struct cachefiles_object *object) \ +{ \ + object->ondemand->state = CACHEFILES_ONDEMAND_OBJSTATE_##_STATE; \ +} -extern int __init cachefiles_proc_init(void); -extern void cachefiles_proc_cleanup(void); -static inline -void cachefiles_hist(atomic_t histogram[], unsigned long start_jif) +CACHEFILES_OBJECT_STATE_FUNCS(open, OPEN); +CACHEFILES_OBJECT_STATE_FUNCS(close, CLOSE); +CACHEFILES_OBJECT_STATE_FUNCS(reopening, REOPENING); +CACHEFILES_OBJECT_STATE_FUNCS(dropping, DROPPING); + +static inline bool cachefiles_ondemand_is_reopening_read(struct cachefiles_req *req) { - unsigned long jif = jiffies - start_jif; - if (jif >= HZ) - jif = HZ - 1; - atomic_inc(&histogram[jif]); + return cachefiles_ondemand_object_is_reopening(req->object) && + req->msg.opcode == CACHEFILES_OP_READ; } #else -#define cachefiles_proc_init() (0) -#define cachefiles_proc_cleanup() do {} while (0) -#define cachefiles_hist(hist, start_jif) do {} while (0) -#endif +static inline ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, + char __user *_buffer, size_t buflen) +{ + return -EOPNOTSUPP; +} -/* - * rdwr.c - */ -extern int cachefiles_read_or_alloc_page(struct fscache_retrieval *, - struct page *, gfp_t); -extern int cachefiles_read_or_alloc_pages(struct fscache_retrieval *, - struct list_head *, unsigned *, - gfp_t); -extern int cachefiles_allocate_page(struct fscache_retrieval *, struct page *, - gfp_t); -extern int cachefiles_allocate_pages(struct fscache_retrieval *, - struct list_head *, unsigned *, gfp_t); -extern int cachefiles_write_page(struct fscache_storage *, struct page *); -extern void cachefiles_uncache_page(struct fscache_object *, struct page *); +static inline int cachefiles_ondemand_init_object(struct cachefiles_object *object) +{ + return 0; +} + +static inline void cachefiles_ondemand_clean_object(struct cachefiles_object *object) +{ +} + +static inline int cachefiles_ondemand_read(struct cachefiles_object *object, + loff_t pos, size_t len) +{ + return -EOPNOTSUPP; +} + +static inline int cachefiles_ondemand_init_obj_info(struct cachefiles_object *obj, + struct cachefiles_volume *volume) +{ + return 0; +} +static inline void cachefiles_ondemand_deinit_obj_info(struct cachefiles_object *obj) +{ +} + +static inline bool cachefiles_ondemand_is_reopening_read(struct cachefiles_req *req) +{ + return false; +} +#endif /* * security.c @@ -228,44 +403,55 @@ static inline void cachefiles_end_secure(struct cachefiles_cache *cache, } /* + * volume.c + */ +void cachefiles_acquire_volume(struct fscache_volume *volume); +void cachefiles_free_volume(struct fscache_volume *volume); +void cachefiles_withdraw_volume(struct cachefiles_volume *volume); + +/* * xattr.c */ -extern int cachefiles_check_object_type(struct cachefiles_object *object); -extern int cachefiles_set_object_xattr(struct cachefiles_object *object, - struct cachefiles_xattr *auxdata); -extern int cachefiles_update_object_xattr(struct cachefiles_object *object, - struct cachefiles_xattr *auxdata); -extern int cachefiles_check_object_xattr(struct cachefiles_object *object, - struct cachefiles_xattr *auxdata); +extern int cachefiles_set_object_xattr(struct cachefiles_object *object); +extern int cachefiles_check_auxdata(struct cachefiles_object *object, + struct file *file); extern int cachefiles_remove_object_xattr(struct cachefiles_cache *cache, + struct cachefiles_object *object, struct dentry *dentry); - +extern void cachefiles_prepare_to_write(struct fscache_cookie *cookie); +extern bool cachefiles_set_volume_xattr(struct cachefiles_volume *volume); +extern int cachefiles_check_volume_xattr(struct cachefiles_volume *volume); /* - * error handling + * Error handling */ -#define kerror(FMT, ...) printk(KERN_ERR "CacheFiles: "FMT"\n", ##__VA_ARGS__) - #define cachefiles_io_error(___cache, FMT, ...) \ do { \ - kerror("I/O Error: " FMT, ##__VA_ARGS__); \ - fscache_io_error(&(___cache)->cache); \ + pr_err("I/O Error: " FMT"\n", ##__VA_ARGS__); \ + fscache_io_error((___cache)->cache); \ set_bit(CACHEFILES_DEAD, &(___cache)->flags); \ + if (cachefiles_in_ondemand_mode(___cache)) \ + cachefiles_flush_reqs(___cache); \ } while (0) #define cachefiles_io_error_obj(object, FMT, ...) \ do { \ struct cachefiles_cache *___cache; \ \ - ___cache = container_of((object)->fscache.cache, \ - struct cachefiles_cache, cache); \ - cachefiles_io_error(___cache, FMT, ##__VA_ARGS__); \ + ___cache = (object)->volume->cache; \ + cachefiles_io_error(___cache, FMT " [o=%08x]", ##__VA_ARGS__, \ + (object)->debug_id); \ } while (0) /* - * debug tracing + * Debug tracing */ +extern unsigned cachefiles_debug; +#define CACHEFILES_DEBUG_KENTER 1 +#define CACHEFILES_DEBUG_KLEAVE 2 +#define CACHEFILES_DEBUG_KDEBUG 4 + #define dbgprintk(FMT, ...) \ printk(KERN_DEBUG "[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__) @@ -309,8 +495,8 @@ do { \ #define ASSERT(X) \ do { \ if (unlikely(!(X))) { \ - printk(KERN_ERR "\n"); \ - printk(KERN_ERR "CacheFiles: Assertion failed\n"); \ + pr_err("\n"); \ + pr_err("Assertion failed\n"); \ BUG(); \ } \ } while (0) @@ -318,9 +504,9 @@ do { \ #define ASSERTCMP(X, OP, Y) \ do { \ if (unlikely(!((X) OP (Y)))) { \ - printk(KERN_ERR "\n"); \ - printk(KERN_ERR "CacheFiles: Assertion failed\n"); \ - printk(KERN_ERR "%lx " #OP " %lx is false\n", \ + pr_err("\n"); \ + pr_err("Assertion failed\n"); \ + pr_err("%lx " #OP " %lx is false\n", \ (unsigned long)(X), (unsigned long)(Y)); \ BUG(); \ } \ @@ -329,8 +515,8 @@ do { \ #define ASSERTIF(C, X) \ do { \ if (unlikely((C) && !(X))) { \ - printk(KERN_ERR "\n"); \ - printk(KERN_ERR "CacheFiles: Assertion failed\n"); \ + pr_err("\n"); \ + pr_err("Assertion failed\n"); \ BUG(); \ } \ } while (0) @@ -338,9 +524,9 @@ do { \ #define ASSERTIFCMP(C, X, OP, Y) \ do { \ if (unlikely((C) && !((X) OP (Y)))) { \ - printk(KERN_ERR "\n"); \ - printk(KERN_ERR "CacheFiles: Assertion failed\n"); \ - printk(KERN_ERR "%lx " #OP " %lx is false\n", \ + pr_err("\n"); \ + pr_err("Assertion failed\n"); \ + pr_err("%lx " #OP " %lx is false\n", \ (unsigned long)(X), (unsigned long)(Y)); \ BUG(); \ } \ diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c new file mode 100644 index 000000000000..3e0576d9db1d --- /dev/null +++ b/fs/cachefiles/io.c @@ -0,0 +1,762 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* kiocb-using read/write + * + * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/mount.h> +#include <linux/slab.h> +#include <linux/file.h> +#include <linux/uio.h> +#include <linux/bio.h> +#include <linux/falloc.h> +#include <linux/sched/mm.h> +#include <trace/events/fscache.h> +#include <trace/events/netfs.h> +#include "internal.h" + +struct cachefiles_kiocb { + struct kiocb iocb; + refcount_t ki_refcnt; + loff_t start; + union { + size_t skipped; + size_t len; + }; + struct cachefiles_object *object; + netfs_io_terminated_t term_func; + void *term_func_priv; + bool was_async; + unsigned int inval_counter; /* Copy of cookie->inval_counter */ + u64 b_writing; +}; + +static inline void cachefiles_put_kiocb(struct cachefiles_kiocb *ki) +{ + if (refcount_dec_and_test(&ki->ki_refcnt)) { + cachefiles_put_object(ki->object, cachefiles_obj_put_ioreq); + fput(ki->iocb.ki_filp); + kfree(ki); + } +} + +/* + * Handle completion of a read from the cache. + */ +static void cachefiles_read_complete(struct kiocb *iocb, long ret) +{ + struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb); + struct inode *inode = file_inode(ki->iocb.ki_filp); + + _enter("%ld", ret); + + if (ret < 0) + trace_cachefiles_io_error(ki->object, inode, ret, + cachefiles_trace_read_error); + + if (ki->term_func) { + if (ret >= 0) { + if (ki->object->cookie->inval_counter == ki->inval_counter) + ki->skipped += ret; + else + ret = -ESTALE; + } + + ki->term_func(ki->term_func_priv, ret); + } + + cachefiles_put_kiocb(ki); +} + +/* + * Initiate a read from the cache. + */ +static int cachefiles_read(struct netfs_cache_resources *cres, + loff_t start_pos, + struct iov_iter *iter, + enum netfs_read_from_hole read_hole, + netfs_io_terminated_t term_func, + void *term_func_priv) +{ + struct cachefiles_object *object; + struct cachefiles_kiocb *ki; + struct file *file; + unsigned int old_nofs; + ssize_t ret = -ENOBUFS; + size_t len = iov_iter_count(iter), skipped = 0; + + if (!fscache_wait_for_operation(cres, FSCACHE_WANT_READ)) + goto presubmission_error; + + fscache_count_read(); + object = cachefiles_cres_object(cres); + file = cachefiles_cres_file(cres); + + _enter("%pD,%li,%llx,%zx/%llx", + file, file_inode(file)->i_ino, start_pos, len, + i_size_read(file_inode(file))); + + /* If the caller asked us to seek for data before doing the read, then + * we should do that now. If we find a gap, we fill it with zeros. + */ + if (read_hole != NETFS_READ_HOLE_IGNORE) { + loff_t off = start_pos, off2; + + off2 = cachefiles_inject_read_error(); + if (off2 == 0) + off2 = vfs_llseek(file, off, SEEK_DATA); + if (off2 < 0 && off2 >= (loff_t)-MAX_ERRNO && off2 != -ENXIO) { + skipped = 0; + ret = off2; + goto presubmission_error; + } + + if (off2 == -ENXIO || off2 >= start_pos + len) { + /* The region is beyond the EOF or there's no more data + * in the region, so clear the rest of the buffer and + * return success. + */ + ret = -ENODATA; + if (read_hole == NETFS_READ_HOLE_FAIL) + goto presubmission_error; + + iov_iter_zero(len, iter); + skipped = len; + ret = 0; + goto presubmission_error; + } + + skipped = off2 - off; + iov_iter_zero(skipped, iter); + } + + ret = -ENOMEM; + ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL); + if (!ki) + goto presubmission_error; + + refcount_set(&ki->ki_refcnt, 2); + ki->iocb.ki_filp = file; + ki->iocb.ki_pos = start_pos + skipped; + ki->iocb.ki_flags = IOCB_DIRECT; + ki->iocb.ki_ioprio = get_current_ioprio(); + ki->skipped = skipped; + ki->object = object; + ki->inval_counter = cres->inval_counter; + ki->term_func = term_func; + ki->term_func_priv = term_func_priv; + ki->was_async = true; + + if (ki->term_func) + ki->iocb.ki_complete = cachefiles_read_complete; + + get_file(ki->iocb.ki_filp); + cachefiles_grab_object(object, cachefiles_obj_get_ioreq); + + trace_cachefiles_read(object, file_inode(file), ki->iocb.ki_pos, len - skipped); + old_nofs = memalloc_nofs_save(); + ret = cachefiles_inject_read_error(); + if (ret == 0) + ret = vfs_iocb_iter_read(file, &ki->iocb, iter); + memalloc_nofs_restore(old_nofs); + switch (ret) { + case -EIOCBQUEUED: + goto in_progress; + + case -ERESTARTSYS: + case -ERESTARTNOINTR: + case -ERESTARTNOHAND: + case -ERESTART_RESTARTBLOCK: + /* There's no easy way to restart the syscall since other AIO's + * may be already running. Just fail this IO with EINTR. + */ + ret = -EINTR; + fallthrough; + default: + ki->was_async = false; + cachefiles_read_complete(&ki->iocb, ret); + if (ret > 0) + ret = 0; + break; + } + +in_progress: + cachefiles_put_kiocb(ki); + _leave(" = %zd", ret); + return ret; + +presubmission_error: + if (term_func) + term_func(term_func_priv, ret < 0 ? ret : skipped); + return ret; +} + +/* + * Query the occupancy of the cache in a region, returning where the next chunk + * of data starts and how long it is. + */ +static int cachefiles_query_occupancy(struct netfs_cache_resources *cres, + loff_t start, size_t len, size_t granularity, + loff_t *_data_start, size_t *_data_len) +{ + struct cachefiles_object *object; + struct file *file; + loff_t off, off2; + + *_data_start = -1; + *_data_len = 0; + + if (!fscache_wait_for_operation(cres, FSCACHE_WANT_READ)) + return -ENOBUFS; + + object = cachefiles_cres_object(cres); + file = cachefiles_cres_file(cres); + granularity = max_t(size_t, object->volume->cache->bsize, granularity); + + _enter("%pD,%li,%llx,%zx/%llx", + file, file_inode(file)->i_ino, start, len, + i_size_read(file_inode(file))); + + off = cachefiles_inject_read_error(); + if (off == 0) + off = vfs_llseek(file, start, SEEK_DATA); + if (off == -ENXIO) + return -ENODATA; /* Beyond EOF */ + if (off < 0 && off >= (loff_t)-MAX_ERRNO) + return -ENOBUFS; /* Error. */ + if (round_up(off, granularity) >= start + len) + return -ENODATA; /* No data in range */ + + off2 = cachefiles_inject_read_error(); + if (off2 == 0) + off2 = vfs_llseek(file, off, SEEK_HOLE); + if (off2 == -ENXIO) + return -ENODATA; /* Beyond EOF */ + if (off2 < 0 && off2 >= (loff_t)-MAX_ERRNO) + return -ENOBUFS; /* Error. */ + + /* Round away partial blocks */ + off = round_up(off, granularity); + off2 = round_down(off2, granularity); + if (off2 <= off) + return -ENODATA; + + *_data_start = off; + if (off2 > start + len) + *_data_len = len; + else + *_data_len = off2 - off; + return 0; +} + +/* + * Handle completion of a write to the cache. + */ +static void cachefiles_write_complete(struct kiocb *iocb, long ret) +{ + struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb); + struct cachefiles_object *object = ki->object; + struct inode *inode = file_inode(ki->iocb.ki_filp); + + _enter("%ld", ret); + + if (ki->was_async) + kiocb_end_write(iocb); + + if (ret < 0) + trace_cachefiles_io_error(object, inode, ret, + cachefiles_trace_write_error); + + atomic_long_sub(ki->b_writing, &object->volume->cache->b_writing); + set_bit(FSCACHE_COOKIE_HAVE_DATA, &object->cookie->flags); + if (ki->term_func) + ki->term_func(ki->term_func_priv, ret); + cachefiles_put_kiocb(ki); +} + +/* + * Initiate a write to the cache. + */ +int __cachefiles_write(struct cachefiles_object *object, + struct file *file, + loff_t start_pos, + struct iov_iter *iter, + netfs_io_terminated_t term_func, + void *term_func_priv) +{ + struct cachefiles_cache *cache; + struct cachefiles_kiocb *ki; + unsigned int old_nofs; + ssize_t ret; + size_t len = iov_iter_count(iter); + + fscache_count_write(); + cache = object->volume->cache; + + _enter("%pD,%li,%llx,%zx/%llx", + file, file_inode(file)->i_ino, start_pos, len, + i_size_read(file_inode(file))); + + ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL); + if (!ki) { + if (term_func) + term_func(term_func_priv, -ENOMEM); + return -ENOMEM; + } + + refcount_set(&ki->ki_refcnt, 2); + ki->iocb.ki_filp = file; + ki->iocb.ki_pos = start_pos; + ki->iocb.ki_flags = IOCB_DIRECT | IOCB_WRITE; + ki->iocb.ki_ioprio = get_current_ioprio(); + ki->object = object; + ki->start = start_pos; + ki->len = len; + ki->term_func = term_func; + ki->term_func_priv = term_func_priv; + ki->was_async = true; + ki->b_writing = (len + (1 << cache->bshift) - 1) >> cache->bshift; + + if (ki->term_func) + ki->iocb.ki_complete = cachefiles_write_complete; + atomic_long_add(ki->b_writing, &cache->b_writing); + + get_file(ki->iocb.ki_filp); + cachefiles_grab_object(object, cachefiles_obj_get_ioreq); + + trace_cachefiles_write(object, file_inode(file), ki->iocb.ki_pos, len); + old_nofs = memalloc_nofs_save(); + ret = cachefiles_inject_write_error(); + if (ret == 0) + ret = vfs_iocb_iter_write(file, &ki->iocb, iter); + memalloc_nofs_restore(old_nofs); + switch (ret) { + case -EIOCBQUEUED: + goto in_progress; + + case -ERESTARTSYS: + case -ERESTARTNOINTR: + case -ERESTARTNOHAND: + case -ERESTART_RESTARTBLOCK: + /* There's no easy way to restart the syscall since other AIO's + * may be already running. Just fail this IO with EINTR. + */ + ret = -EINTR; + fallthrough; + default: + ki->was_async = false; + cachefiles_write_complete(&ki->iocb, ret); + break; + } + +in_progress: + cachefiles_put_kiocb(ki); + _leave(" = %zd", ret); + return ret; +} + +static int cachefiles_write(struct netfs_cache_resources *cres, + loff_t start_pos, + struct iov_iter *iter, + netfs_io_terminated_t term_func, + void *term_func_priv) +{ + if (!fscache_wait_for_operation(cres, FSCACHE_WANT_WRITE)) { + if (term_func) + term_func(term_func_priv, -ENOBUFS); + trace_netfs_sreq(term_func_priv, netfs_sreq_trace_cache_nowrite); + return -ENOBUFS; + } + + return __cachefiles_write(cachefiles_cres_object(cres), + cachefiles_cres_file(cres), + start_pos, iter, + term_func, term_func_priv); +} + +static inline enum netfs_io_source +cachefiles_do_prepare_read(struct netfs_cache_resources *cres, + loff_t start, size_t *_len, loff_t i_size, + unsigned long *_flags, ino_t netfs_ino) +{ + enum cachefiles_prepare_read_trace why; + struct cachefiles_object *object = NULL; + struct cachefiles_cache *cache; + struct fscache_cookie *cookie = fscache_cres_cookie(cres); + const struct cred *saved_cred; + struct file *file = cachefiles_cres_file(cres); + enum netfs_io_source ret = NETFS_DOWNLOAD_FROM_SERVER; + size_t len = *_len; + loff_t off, to; + ino_t ino = file ? file_inode(file)->i_ino : 0; + int rc; + + _enter("%zx @%llx/%llx", len, start, i_size); + + if (start >= i_size) { + ret = NETFS_FILL_WITH_ZEROES; + why = cachefiles_trace_read_after_eof; + goto out_no_object; + } + + if (test_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags)) { + __set_bit(NETFS_SREQ_COPY_TO_CACHE, _flags); + why = cachefiles_trace_read_no_data; + if (!test_bit(NETFS_SREQ_ONDEMAND, _flags)) + goto out_no_object; + } + + /* The object and the file may be being created in the background. */ + if (!file) { + why = cachefiles_trace_read_no_file; + if (!fscache_wait_for_operation(cres, FSCACHE_WANT_READ)) + goto out_no_object; + file = cachefiles_cres_file(cres); + if (!file) + goto out_no_object; + ino = file_inode(file)->i_ino; + } + + object = cachefiles_cres_object(cres); + cache = object->volume->cache; + cachefiles_begin_secure(cache, &saved_cred); +retry: + off = cachefiles_inject_read_error(); + if (off == 0) + off = vfs_llseek(file, start, SEEK_DATA); + if (off < 0 && off >= (loff_t)-MAX_ERRNO) { + if (off == (loff_t)-ENXIO) { + why = cachefiles_trace_read_seek_nxio; + goto download_and_store; + } + trace_cachefiles_io_error(object, file_inode(file), off, + cachefiles_trace_seek_error); + why = cachefiles_trace_read_seek_error; + goto out; + } + + if (off >= start + len) { + why = cachefiles_trace_read_found_hole; + goto download_and_store; + } + + if (off > start) { + off = round_up(off, cache->bsize); + len = off - start; + *_len = len; + why = cachefiles_trace_read_found_part; + goto download_and_store; + } + + to = cachefiles_inject_read_error(); + if (to == 0) + to = vfs_llseek(file, start, SEEK_HOLE); + if (to < 0 && to >= (loff_t)-MAX_ERRNO) { + trace_cachefiles_io_error(object, file_inode(file), to, + cachefiles_trace_seek_error); + why = cachefiles_trace_read_seek_error; + goto out; + } + + if (to < start + len) { + if (start + len >= i_size) + to = round_up(to, cache->bsize); + else + to = round_down(to, cache->bsize); + len = to - start; + *_len = len; + } + + why = cachefiles_trace_read_have_data; + ret = NETFS_READ_FROM_CACHE; + goto out; + +download_and_store: + __set_bit(NETFS_SREQ_COPY_TO_CACHE, _flags); + if (test_bit(NETFS_SREQ_ONDEMAND, _flags)) { + rc = cachefiles_ondemand_read(object, start, len); + if (!rc) { + __clear_bit(NETFS_SREQ_ONDEMAND, _flags); + goto retry; + } + ret = NETFS_INVALID_READ; + } +out: + cachefiles_end_secure(cache, saved_cred); +out_no_object: + trace_cachefiles_prep_read(object, start, len, *_flags, ret, why, ino, netfs_ino); + return ret; +} + +/* + * Prepare a read operation, shortening it to a cached/uncached + * boundary as appropriate. + */ +static enum netfs_io_source cachefiles_prepare_read(struct netfs_io_subrequest *subreq, + unsigned long long i_size) +{ + return cachefiles_do_prepare_read(&subreq->rreq->cache_resources, + subreq->start, &subreq->len, i_size, + &subreq->flags, subreq->rreq->inode->i_ino); +} + +/* + * Prepare an on-demand read operation, shortening it to a cached/uncached + * boundary as appropriate. + */ +static enum netfs_io_source +cachefiles_prepare_ondemand_read(struct netfs_cache_resources *cres, + loff_t start, size_t *_len, loff_t i_size, + unsigned long *_flags, ino_t ino) +{ + return cachefiles_do_prepare_read(cres, start, _len, i_size, _flags, ino); +} + +/* + * Prepare for a write to occur. + */ +int __cachefiles_prepare_write(struct cachefiles_object *object, + struct file *file, + loff_t *_start, size_t *_len, size_t upper_len, + bool no_space_allocated_yet) +{ + struct cachefiles_cache *cache = object->volume->cache; + loff_t start = *_start, pos; + size_t len = *_len; + int ret; + + /* Round to DIO size */ + start = round_down(*_start, PAGE_SIZE); + if (start != *_start || *_len > upper_len) { + /* Probably asked to cache a streaming write written into the + * pagecache when the cookie was temporarily out of service to + * culling. + */ + fscache_count_dio_misfit(); + return -ENOBUFS; + } + + *_len = round_up(len, PAGE_SIZE); + + /* We need to work out whether there's sufficient disk space to perform + * the write - but we can skip that check if we have space already + * allocated. + */ + if (no_space_allocated_yet) + goto check_space; + + pos = cachefiles_inject_read_error(); + if (pos == 0) + pos = vfs_llseek(file, start, SEEK_DATA); + if (pos < 0 && pos >= (loff_t)-MAX_ERRNO) { + if (pos == -ENXIO) + goto check_space; /* Unallocated tail */ + trace_cachefiles_io_error(object, file_inode(file), pos, + cachefiles_trace_seek_error); + return pos; + } + if ((u64)pos >= (u64)start + *_len) + goto check_space; /* Unallocated region */ + + /* We have a block that's at least partially filled - if we're low on + * space, we need to see if it's fully allocated. If it's not, we may + * want to cull it. + */ + if (cachefiles_has_space(cache, 0, *_len / PAGE_SIZE, + cachefiles_has_space_check) == 0) + return 0; /* Enough space to simply overwrite the whole block */ + + pos = cachefiles_inject_read_error(); + if (pos == 0) + pos = vfs_llseek(file, start, SEEK_HOLE); + if (pos < 0 && pos >= (loff_t)-MAX_ERRNO) { + trace_cachefiles_io_error(object, file_inode(file), pos, + cachefiles_trace_seek_error); + return pos; + } + if ((u64)pos >= (u64)start + *_len) + return 0; /* Fully allocated */ + + /* Partially allocated, but insufficient space: cull. */ + fscache_count_no_write_space(); + ret = cachefiles_inject_remove_error(); + if (ret == 0) + ret = vfs_fallocate(file, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + start, *_len); + if (ret < 0) { + trace_cachefiles_io_error(object, file_inode(file), ret, + cachefiles_trace_fallocate_error); + cachefiles_io_error_obj(object, + "CacheFiles: fallocate failed (%d)\n", ret); + ret = -EIO; + } + + return ret; + +check_space: + return cachefiles_has_space(cache, 0, *_len / PAGE_SIZE, + cachefiles_has_space_for_write); +} + +static int cachefiles_prepare_write(struct netfs_cache_resources *cres, + loff_t *_start, size_t *_len, size_t upper_len, + loff_t i_size, bool no_space_allocated_yet) +{ + struct cachefiles_object *object = cachefiles_cres_object(cres); + struct cachefiles_cache *cache = object->volume->cache; + const struct cred *saved_cred; + int ret; + + if (!cachefiles_cres_file(cres)) { + if (!fscache_wait_for_operation(cres, FSCACHE_WANT_WRITE)) + return -ENOBUFS; + if (!cachefiles_cres_file(cres)) + return -ENOBUFS; + } + + cachefiles_begin_secure(cache, &saved_cred); + ret = __cachefiles_prepare_write(object, cachefiles_cres_file(cres), + _start, _len, upper_len, + no_space_allocated_yet); + cachefiles_end_secure(cache, saved_cred); + return ret; +} + +static void cachefiles_prepare_write_subreq(struct netfs_io_subrequest *subreq) +{ + struct netfs_io_request *wreq = subreq->rreq; + struct netfs_cache_resources *cres = &wreq->cache_resources; + struct netfs_io_stream *stream = &wreq->io_streams[subreq->stream_nr]; + + _enter("W=%x[%x] %llx", wreq->debug_id, subreq->debug_index, subreq->start); + + stream->sreq_max_len = MAX_RW_COUNT; + stream->sreq_max_segs = BIO_MAX_VECS; + + if (!cachefiles_cres_file(cres)) { + if (!fscache_wait_for_operation(cres, FSCACHE_WANT_WRITE)) + return netfs_prepare_write_failed(subreq); + if (!cachefiles_cres_file(cres)) + return netfs_prepare_write_failed(subreq); + } +} + +static void cachefiles_issue_write(struct netfs_io_subrequest *subreq) +{ + struct netfs_io_request *wreq = subreq->rreq; + struct netfs_cache_resources *cres = &wreq->cache_resources; + struct cachefiles_object *object = cachefiles_cres_object(cres); + struct cachefiles_cache *cache = object->volume->cache; + struct netfs_io_stream *stream = &wreq->io_streams[subreq->stream_nr]; + const struct cred *saved_cred; + size_t off, pre, post, len = subreq->len; + loff_t start = subreq->start; + int ret; + + _enter("W=%x[%x] %llx-%llx", + wreq->debug_id, subreq->debug_index, start, start + len - 1); + + /* We need to start on the cache granularity boundary */ + off = start & (CACHEFILES_DIO_BLOCK_SIZE - 1); + if (off) { + pre = CACHEFILES_DIO_BLOCK_SIZE - off; + if (pre >= len) { + fscache_count_dio_misfit(); + netfs_write_subrequest_terminated(subreq, len); + return; + } + subreq->transferred += pre; + start += pre; + len -= pre; + iov_iter_advance(&subreq->io_iter, pre); + } + + /* We also need to end on the cache granularity boundary */ + if (start + len == wreq->i_size) { + size_t part = len % CACHEFILES_DIO_BLOCK_SIZE; + size_t need = CACHEFILES_DIO_BLOCK_SIZE - part; + + if (part && stream->submit_extendable_to >= need) { + len += need; + subreq->len += need; + subreq->io_iter.count += need; + } + } + + post = len & (CACHEFILES_DIO_BLOCK_SIZE - 1); + if (post) { + len -= post; + if (len == 0) { + fscache_count_dio_misfit(); + netfs_write_subrequest_terminated(subreq, post); + return; + } + iov_iter_truncate(&subreq->io_iter, len); + } + + trace_netfs_sreq(subreq, netfs_sreq_trace_cache_prepare); + cachefiles_begin_secure(cache, &saved_cred); + ret = __cachefiles_prepare_write(object, cachefiles_cres_file(cres), + &start, &len, len, true); + cachefiles_end_secure(cache, saved_cred); + if (ret < 0) { + netfs_write_subrequest_terminated(subreq, ret); + return; + } + + trace_netfs_sreq(subreq, netfs_sreq_trace_cache_write); + cachefiles_write(&subreq->rreq->cache_resources, + subreq->start, &subreq->io_iter, + netfs_write_subrequest_terminated, subreq); +} + +/* + * Clean up an operation. + */ +static void cachefiles_end_operation(struct netfs_cache_resources *cres) +{ + struct file *file = cachefiles_cres_file(cres); + + if (file) + fput(file); + fscache_end_cookie_access(fscache_cres_cookie(cres), fscache_access_io_end); +} + +static const struct netfs_cache_ops cachefiles_netfs_cache_ops = { + .end_operation = cachefiles_end_operation, + .read = cachefiles_read, + .write = cachefiles_write, + .issue_write = cachefiles_issue_write, + .prepare_read = cachefiles_prepare_read, + .prepare_write = cachefiles_prepare_write, + .prepare_write_subreq = cachefiles_prepare_write_subreq, + .prepare_ondemand_read = cachefiles_prepare_ondemand_read, + .query_occupancy = cachefiles_query_occupancy, +}; + +/* + * Open the cache file when beginning a cache operation. + */ +bool cachefiles_begin_operation(struct netfs_cache_resources *cres, + enum fscache_want_state want_state) +{ + struct cachefiles_object *object = cachefiles_cres_object(cres); + + if (!cachefiles_cres_file(cres)) { + cres->ops = &cachefiles_netfs_cache_ops; + if (object->file) { + spin_lock(&object->lock); + if (!cres->cache_priv2 && object->file) + cres->cache_priv2 = get_file(object->file); + spin_unlock(&object->lock); + } + } + + if (!cachefiles_cres_file(cres) && want_state != FSCACHE_WANT_PARAMS) { + pr_err("failed to get cres->file\n"); + return false; + } + + return true; +} diff --git a/fs/cachefiles/key.c b/fs/cachefiles/key.c index 33b58c60f2d1..aae86af48ed5 100644 --- a/fs/cachefiles/key.c +++ b/fs/cachefiles/key.c @@ -1,18 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* Key to pathname encoder * - * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. */ #include <linux/slab.h> #include "internal.h" -static const char cachefiles_charmap[64] = +static const char cachefiles_charmap[64] __nonstring = "0123456789" /* 0 - 9 */ "abcdefghijklmnopqrstuvwxyz" /* 10 - 35 */ "ABCDEFGHIJKLMNOPQRSTUVWXYZ" /* 36 - 61 */ @@ -26,134 +22,116 @@ static const char cachefiles_filecharmap[256] = { [48 ... 127] = 1, /* '0' -> '~' */ }; +static inline unsigned int how_many_hex_digits(unsigned int x) +{ + return x ? round_up(ilog2(x) + 1, 4) / 4 : 0; +} + /* * turn the raw key into something cooked - * - the raw key should include the length in the two bytes at the front - * - the key may be up to 514 bytes in length (including the length word) + * - the key may be up to NAME_MAX in length (including the length word) * - "base64" encode the strange keys, mapping 3 bytes of raw to four of * cooked * - need to cut the cooked key into 252 char lengths (189 raw bytes) */ -char *cachefiles_cook_key(const u8 *raw, int keylen, uint8_t type) +bool cachefiles_cook_key(struct cachefiles_object *object) { - unsigned char csum, ch; - unsigned int acc; - char *key; - int loop, len, max, seg, mark, print; + const u8 *key = fscache_get_key(object->cookie), *kend; + unsigned char ch; + unsigned int acc, i, n, nle, nbe, keylen = object->cookie->key_len; + unsigned int b64len, len, print, pad; + char *name, sep; - _enter(",%d", keylen); + _enter(",%u,%*phN", keylen, keylen, key); - BUG_ON(keylen < 2 || keylen > 514); + BUG_ON(keylen > NAME_MAX - 3); - csum = raw[0] + raw[1]; print = 1; - for (loop = 2; loop < keylen; loop++) { - ch = raw[loop]; - csum += ch; + for (i = 0; i < keylen; i++) { + ch = key[i]; print &= cachefiles_filecharmap[ch]; } + /* If the path is usable ASCII, then we render it directly */ if (print) { - /* if the path is usable ASCII, then we render it directly */ - max = keylen - 2; - max += 2; /* two base64'd length chars on the front */ - max += 5; /* @checksum/M */ - max += 3 * 2; /* maximum number of segment dividers (".../M") - * is ((514 + 251) / 252) = 3 - */ - max += 1; /* NUL on end */ - } else { - /* calculate the maximum length of the cooked key */ - keylen = (keylen + 2) / 3; - - max = keylen * 4; - max += 5; /* @checksum/M */ - max += 3 * 2; /* maximum number of segment dividers (".../M") - * is ((514 + 188) / 189) = 3 - */ - max += 1; /* NUL on end */ + len = 1 + keylen; + name = kmalloc(len + 1, GFP_KERNEL); + if (!name) + return false; + + name[0] = 'D'; /* Data object type, string encoding */ + memcpy(name + 1, key, keylen); + goto success; } - max += 1; /* 2nd NUL on end */ - - _debug("max: %d", max); - - key = kmalloc(max, cachefiles_gfp); - if (!key) - return NULL; - - len = 0; - - /* build the cooked key */ - sprintf(key, "@%02x%c+", (unsigned) csum, 0); - len = 5; - mark = len - 1; - - if (print) { - acc = *(uint16_t *) raw; - raw += 2; - - key[len + 1] = cachefiles_charmap[acc & 63]; - acc >>= 6; - key[len] = cachefiles_charmap[acc & 63]; - len += 2; - - seg = 250; - for (loop = keylen; loop > 0; loop--) { - if (seg <= 0) { - key[len++] = '\0'; - mark = len; - key[len++] = '+'; - seg = 252; - } - - key[len++] = *raw++; - ASSERT(len < max); - } - - switch (type) { - case FSCACHE_COOKIE_TYPE_INDEX: type = 'I'; break; - case FSCACHE_COOKIE_TYPE_DATAFILE: type = 'D'; break; - default: type = 'S'; break; - } - } else { - seg = 252; - for (loop = keylen; loop > 0; loop--) { - if (seg <= 0) { - key[len++] = '\0'; - mark = len; - key[len++] = '+'; - seg = 252; - } - - acc = *raw++; - acc |= *raw++ << 8; - acc |= *raw++ << 16; - - _debug("acc: %06x", acc); - - key[len++] = cachefiles_charmap[acc & 63]; - acc >>= 6; - key[len++] = cachefiles_charmap[acc & 63]; - acc >>= 6; - key[len++] = cachefiles_charmap[acc & 63]; - acc >>= 6; - key[len++] = cachefiles_charmap[acc & 63]; - - ASSERT(len < max); - } + /* See if it makes sense to encode it as "hex,hex,hex" for each 32-bit + * chunk. We rely on the key having been padded out to a whole number + * of 32-bit words. + */ + n = round_up(keylen, 4); + nbe = nle = 0; + for (i = 0; i < n; i += 4) { + u32 be = be32_to_cpu(*(__be32 *)(key + i)); + u32 le = le32_to_cpu(*(__le32 *)(key + i)); + + nbe += 1 + how_many_hex_digits(be); + nle += 1 + how_many_hex_digits(le); + } - switch (type) { - case FSCACHE_COOKIE_TYPE_INDEX: type = 'J'; break; - case FSCACHE_COOKIE_TYPE_DATAFILE: type = 'E'; break; - default: type = 'T'; break; + b64len = DIV_ROUND_UP(keylen, 3); + pad = b64len * 3 - keylen; + b64len = 2 + b64len * 4; /* Length if we base64-encode it */ + _debug("len=%u nbe=%u nle=%u b64=%u", keylen, nbe, nle, b64len); + if (nbe < b64len || nle < b64len) { + unsigned int nlen = min(nbe, nle) + 1; + name = kmalloc(nlen, GFP_KERNEL); + if (!name) + return false; + sep = (nbe <= nle) ? 'S' : 'T'; /* Encoding indicator */ + len = 0; + for (i = 0; i < n; i += 4) { + u32 x; + if (nbe <= nle) + x = be32_to_cpu(*(__be32 *)(key + i)); + else + x = le32_to_cpu(*(__le32 *)(key + i)); + name[len++] = sep; + if (x != 0) + len += snprintf(name + len, nlen - len, "%x", x); + sep = ','; } + goto success; } - key[mark] = type; - key[len++] = 0; - key[len] = 0; + /* We need to base64-encode it */ + name = kmalloc(b64len + 1, GFP_KERNEL); + if (!name) + return false; + + name[0] = 'E'; + name[1] = '0' + pad; + len = 2; + kend = key + keylen; + do { + acc = *key++; + if (key < kend) { + acc |= *key++ << 8; + if (key < kend) + acc |= *key++ << 16; + } - _leave(" = %p %d", key, len); - return key; + name[len++] = cachefiles_charmap[acc & 63]; + acc >>= 6; + name[len++] = cachefiles_charmap[acc & 63]; + acc >>= 6; + name[len++] = cachefiles_charmap[acc & 63]; + acc >>= 6; + name[len++] = cachefiles_charmap[acc & 63]; + } while (key < kend); + +success: + name[len] = 0; + object->d_name = name; + _leave(" = %s", object->d_name); + return true; } diff --git a/fs/cachefiles/main.c b/fs/cachefiles/main.c index 4bfa8cf43bf5..3f369c6f816d 100644 --- a/fs/cachefiles/main.c +++ b/fs/cachefiles/main.c @@ -1,13 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* Network filesystem caching backend to use cache files on a premounted * filesystem * - * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. */ #include <linux/module.h> @@ -22,6 +18,9 @@ #include <linux/statfs.h> #include <linux/sysctl.h> #include <linux/miscdevice.h> +#include <linux/netfs.h> +#include <trace/events/netfs.h> +#define CREATE_TRACE_POINTS #include "internal.h" unsigned cachefiles_debug; @@ -40,14 +39,6 @@ static struct miscdevice cachefiles_dev = { .fops = &cachefiles_daemon_fops, }; -static void cachefiles_object_init_once(void *_object) -{ - struct cachefiles_object *object = _object; - - memset(object, 0, sizeof(*object)); - spin_lock_init(&object->work_lock); -} - /* * initialise the fs caching module */ @@ -55,6 +46,9 @@ static int __init cachefiles_init(void) { int ret; + ret = cachefiles_register_error_injection(); + if (ret < 0) + goto error_einj; ret = misc_register(&cachefiles_dev); if (ret < 0) goto error_dev; @@ -64,28 +58,21 @@ static int __init cachefiles_init(void) cachefiles_object_jar = kmem_cache_create("cachefiles_object_jar", sizeof(struct cachefiles_object), - 0, - SLAB_HWCACHE_ALIGN, - cachefiles_object_init_once); + 0, SLAB_HWCACHE_ALIGN, NULL); if (!cachefiles_object_jar) { - printk(KERN_NOTICE - "CacheFiles: Failed to allocate an object jar\n"); + pr_notice("Failed to allocate an object jar\n"); goto error_object_jar; } - ret = cachefiles_proc_init(); - if (ret < 0) - goto error_proc; - - printk(KERN_INFO "CacheFiles: Loaded\n"); + pr_info("Loaded\n"); return 0; -error_proc: - kmem_cache_destroy(cachefiles_object_jar); error_object_jar: misc_deregister(&cachefiles_dev); error_dev: - kerror("failed to register: %d", ret); + cachefiles_unregister_error_injection(); +error_einj: + pr_err("failed to register: %d\n", ret); return ret; } @@ -96,11 +83,11 @@ fs_initcall(cachefiles_init); */ static void __exit cachefiles_exit(void) { - printk(KERN_INFO "CacheFiles: Unloading\n"); + pr_info("Unloading\n"); - cachefiles_proc_cleanup(); kmem_cache_destroy(cachefiles_object_jar); misc_deregister(&cachefiles_dev); + cachefiles_unregister_error_injection(); } module_exit(cachefiles_exit); diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index 25badd1aec5c..e5ec90dccc27 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -1,309 +1,292 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* CacheFiles path walking and related routines * - * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. */ -#include <linux/module.h> -#include <linux/sched.h> -#include <linux/file.h> #include <linux/fs.h> -#include <linux/fsnotify.h> -#include <linux/quotaops.h> -#include <linux/xattr.h> -#include <linux/mount.h> #include <linux/namei.h> -#include <linux/security.h> -#include <linux/slab.h> #include "internal.h" -#define CACHEFILES_KEYBUF_SIZE 512 - /* - * dump debugging info about an object + * Mark the backing file as being a cache file if it's not already in use. The + * mark tells the culling request command that it's not allowed to cull the + * file or directory. The caller must hold the inode lock. */ -static noinline -void __cachefiles_printk_object(struct cachefiles_object *object, - const char *prefix, - u8 *keybuf) +static bool __cachefiles_mark_inode_in_use(struct cachefiles_object *object, + struct inode *inode) { - struct fscache_cookie *cookie; - unsigned keylen, loop; - - printk(KERN_ERR "%sobject: OBJ%x\n", - prefix, object->fscache.debug_id); - printk(KERN_ERR "%sobjstate=%s fl=%lx wbusy=%x ev=%lx[%lx]\n", - prefix, object->fscache.state->name, - object->fscache.flags, work_busy(&object->fscache.work), - object->fscache.events, object->fscache.event_mask); - printk(KERN_ERR "%sops=%u inp=%u exc=%u\n", - prefix, object->fscache.n_ops, object->fscache.n_in_progress, - object->fscache.n_exclusive); - printk(KERN_ERR "%sparent=%p\n", - prefix, object->fscache.parent); - - spin_lock(&object->fscache.lock); - cookie = object->fscache.cookie; - if (cookie) { - printk(KERN_ERR "%scookie=%p [pr=%p nd=%p fl=%lx]\n", - prefix, - object->fscache.cookie, - object->fscache.cookie->parent, - object->fscache.cookie->netfs_data, - object->fscache.cookie->flags); - if (keybuf) - keylen = cookie->def->get_key(cookie->netfs_data, keybuf, - CACHEFILES_KEYBUF_SIZE); - else - keylen = 0; + bool can_use = false; + + if (!(inode->i_flags & S_KERNEL_FILE)) { + inode->i_flags |= S_KERNEL_FILE; + trace_cachefiles_mark_active(object, inode); + can_use = true; } else { - printk(KERN_ERR "%scookie=NULL\n", prefix); - keylen = 0; + trace_cachefiles_mark_failed(object, inode); } - spin_unlock(&object->fscache.lock); - if (keylen) { - printk(KERN_ERR "%skey=[%u] '", prefix, keylen); - for (loop = 0; loop < keylen; loop++) - printk("%02x", keybuf[loop]); - printk("'\n"); - } + return can_use; +} + +static bool cachefiles_mark_inode_in_use(struct cachefiles_object *object, + struct inode *inode) +{ + bool can_use; + + inode_lock(inode); + can_use = __cachefiles_mark_inode_in_use(object, inode); + inode_unlock(inode); + return can_use; } /* - * dump debugging info about a pair of objects + * Unmark a backing inode. The caller must hold the inode lock. */ -static noinline void cachefiles_printk_object(struct cachefiles_object *object, - struct cachefiles_object *xobject) +static void __cachefiles_unmark_inode_in_use(struct cachefiles_object *object, + struct inode *inode) { - u8 *keybuf; - - keybuf = kmalloc(CACHEFILES_KEYBUF_SIZE, GFP_NOIO); - if (object) - __cachefiles_printk_object(object, "", keybuf); - if (xobject) - __cachefiles_printk_object(xobject, "x", keybuf); - kfree(keybuf); + inode->i_flags &= ~S_KERNEL_FILE; + trace_cachefiles_mark_inactive(object, inode); +} + +static void cachefiles_do_unmark_inode_in_use(struct cachefiles_object *object, + struct inode *inode) +{ + inode_lock(inode); + __cachefiles_unmark_inode_in_use(object, inode); + inode_unlock(inode); } /* - * mark the owner of a dentry, if there is one, to indicate that that dentry - * has been preemptively deleted - * - the caller must hold the i_mutex on the dentry's parent as required to - * call vfs_unlink(), vfs_rmdir() or vfs_rename() + * Unmark a backing inode and tell cachefilesd that there's something that can + * be culled. */ -static void cachefiles_mark_object_buried(struct cachefiles_cache *cache, - struct dentry *dentry) +void cachefiles_unmark_inode_in_use(struct cachefiles_object *object, + struct file *file) { - struct cachefiles_object *object; - struct rb_node *p; + struct cachefiles_cache *cache = object->volume->cache; + struct inode *inode = file_inode(file); - _enter(",'%*.*s'", - dentry->d_name.len, dentry->d_name.len, dentry->d_name.name); + cachefiles_do_unmark_inode_in_use(object, inode); - write_lock(&cache->active_lock); - - p = cache->active_nodes.rb_node; - while (p) { - object = rb_entry(p, struct cachefiles_object, active_node); - if (object->dentry > dentry) - p = p->rb_left; - else if (object->dentry < dentry) - p = p->rb_right; - else - goto found_dentry; + if (!test_bit(CACHEFILES_OBJECT_USING_TMPFILE, &object->flags)) { + atomic_long_add(inode->i_blocks, &cache->b_released); + if (atomic_inc_return(&cache->f_released)) + cachefiles_state_changed(cache); } +} - write_unlock(&cache->active_lock); - _leave(" [no owner]"); - return; +/* + * get a subdirectory + */ +struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, + struct dentry *dir, + const char *dirname, + bool *_is_new) +{ + struct dentry *subdir; + struct path path; + int ret; + + _enter(",,%s", dirname); - /* found the dentry for */ -found_dentry: - kdebug("preemptive burial: OBJ%x [%s] %p", - object->fscache.debug_id, - object->fscache.state->name, - dentry); + /* search the current directory for the element name */ - if (fscache_object_is_live(&object->fscache)) { - printk(KERN_ERR "\n"); - printk(KERN_ERR "CacheFiles: Error:" - " Can't preemptively bury live object\n"); - cachefiles_printk_object(object, NULL); - } else if (test_and_set_bit(CACHEFILES_OBJECT_BURIED, &object->flags)) { - printk(KERN_ERR "CacheFiles: Error:" - " Object already preemptively buried\n"); +retry: + ret = cachefiles_inject_read_error(); + if (ret == 0) + subdir = start_creating(&nop_mnt_idmap, dir, &QSTR(dirname)); + else + subdir = ERR_PTR(ret); + trace_cachefiles_lookup(NULL, dir, subdir); + if (IS_ERR(subdir)) { + trace_cachefiles_vfs_error(NULL, d_backing_inode(dir), + PTR_ERR(subdir), + cachefiles_trace_lookup_error); + if (PTR_ERR(subdir) == -ENOMEM) + goto nomem_d_alloc; + goto lookup_error; } - write_unlock(&cache->active_lock); - _leave(" [owner marked]"); -} + _debug("subdir -> %pd %s", + subdir, d_backing_inode(subdir) ? "positive" : "negative"); -/* - * record the fact that an object is now active - */ -static int cachefiles_mark_object_active(struct cachefiles_cache *cache, - struct cachefiles_object *object) -{ - struct cachefiles_object *xobject; - struct rb_node **_p, *_parent = NULL; - struct dentry *dentry; + /* we need to create the subdir if it doesn't exist yet */ + if (d_is_negative(subdir)) { + ret = cachefiles_has_space(cache, 1, 0, + cachefiles_has_space_for_create); + if (ret < 0) + goto mkdir_error; - _enter(",%p", object); + _debug("attempt mkdir"); -try_again: - write_lock(&cache->active_lock); + path.mnt = cache->mnt; + path.dentry = dir; + ret = security_path_mkdir(&path, subdir, 0700); + if (ret < 0) + goto mkdir_error; + ret = cachefiles_inject_write_error(); + if (ret == 0) { + subdir = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), subdir, 0700, NULL); + } else { + end_creating(subdir); + subdir = ERR_PTR(ret); + } + if (IS_ERR(subdir)) { + trace_cachefiles_vfs_error(NULL, d_inode(dir), ret, + cachefiles_trace_mkdir_error); + goto mkdir_error; + } + trace_cachefiles_mkdir(dir, subdir); + + if (unlikely(d_unhashed(subdir) || d_is_negative(subdir))) { + end_creating(subdir); + goto retry; + } + ASSERT(d_backing_inode(subdir)); + + _debug("mkdir -> %pd{ino=%lu}", + subdir, d_backing_inode(subdir)->i_ino); + if (_is_new) + *_is_new = true; + } + + /* Tell rmdir() it's not allowed to delete the subdir */ + inode_lock(d_inode(subdir)); + end_creating_keep(subdir); - if (test_and_set_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags)) { - printk(KERN_ERR "CacheFiles: Error: Object already active\n"); - cachefiles_printk_object(object, NULL); - BUG(); + if (!__cachefiles_mark_inode_in_use(NULL, d_inode(subdir))) { + pr_notice("cachefiles: Inode already in use: %pd (B=%lx)\n", + subdir, d_inode(subdir)->i_ino); + goto mark_error; } - dentry = object->dentry; - _p = &cache->active_nodes.rb_node; - while (*_p) { - _parent = *_p; - xobject = rb_entry(_parent, - struct cachefiles_object, active_node); + inode_unlock(d_inode(subdir)); - ASSERT(xobject != object); + /* we need to make sure the subdir is a directory */ + ASSERT(d_backing_inode(subdir)); - if (xobject->dentry > dentry) - _p = &(*_p)->rb_left; - else if (xobject->dentry < dentry) - _p = &(*_p)->rb_right; - else - goto wait_for_old_object; + if (!d_can_lookup(subdir)) { + pr_err("%s is not a directory\n", dirname); + ret = -EIO; + goto check_error; } - rb_link_node(&object->active_node, _parent, _p); - rb_insert_color(&object->active_node, &cache->active_nodes); + ret = -EPERM; + if (!(d_backing_inode(subdir)->i_opflags & IOP_XATTR) || + !d_backing_inode(subdir)->i_op->lookup || + !d_backing_inode(subdir)->i_op->mkdir || + !d_backing_inode(subdir)->i_op->rename || + !d_backing_inode(subdir)->i_op->rmdir || + !d_backing_inode(subdir)->i_op->unlink) + goto check_error; - write_unlock(&cache->active_lock); - _leave(" = 0"); - return 0; + _leave(" = [%lu]", d_backing_inode(subdir)->i_ino); + return subdir; - /* an old object from a previous incarnation is hogging the slot - we - * need to wait for it to be destroyed */ -wait_for_old_object: - if (fscache_object_is_live(&object->fscache)) { - printk(KERN_ERR "\n"); - printk(KERN_ERR "CacheFiles: Error:" - " Unexpected object collision\n"); - cachefiles_printk_object(object, xobject); - BUG(); - } - atomic_inc(&xobject->usage); - write_unlock(&cache->active_lock); - - if (test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags)) { - wait_queue_head_t *wq; - - signed long timeout = 60 * HZ; - wait_queue_t wait; - bool requeue; - - /* if the object we're waiting for is queued for processing, - * then just put ourselves on the queue behind it */ - if (work_pending(&xobject->fscache.work)) { - _debug("queue OBJ%x behind OBJ%x immediately", - object->fscache.debug_id, - xobject->fscache.debug_id); - goto requeue; - } +check_error: + cachefiles_put_directory(subdir); + _leave(" = %d [check]", ret); + return ERR_PTR(ret); - /* otherwise we sleep until either the object we're waiting for - * is done, or the fscache_object is congested */ - wq = bit_waitqueue(&xobject->flags, CACHEFILES_OBJECT_ACTIVE); - init_wait(&wait); - requeue = false; - do { - prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE); - if (!test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags)) - break; - - requeue = fscache_object_sleep_till_congested(&timeout); - } while (timeout > 0 && !requeue); - finish_wait(wq, &wait); - - if (requeue && - test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags)) { - _debug("queue OBJ%x behind OBJ%x after wait", - object->fscache.debug_id, - xobject->fscache.debug_id); - goto requeue; - } +mark_error: + inode_unlock(d_inode(subdir)); + dput(subdir); + return ERR_PTR(-EBUSY); - if (timeout <= 0) { - printk(KERN_ERR "\n"); - printk(KERN_ERR "CacheFiles: Error: Overlong" - " wait for old active object to go away\n"); - cachefiles_printk_object(object, xobject); - goto requeue; - } +mkdir_error: + end_creating(subdir); + pr_err("mkdir %s failed with error %d\n", dirname, ret); + return ERR_PTR(ret); + +lookup_error: + ret = PTR_ERR(subdir); + pr_err("Lookup %s failed with error %d\n", dirname, ret); + return ERR_PTR(ret); + +nomem_d_alloc: + inode_unlock(d_inode(dir)); + _leave(" = -ENOMEM"); + return ERR_PTR(-ENOMEM); +} + +/* + * Put a subdirectory. + */ +void cachefiles_put_directory(struct dentry *dir) +{ + if (dir) { + cachefiles_do_unmark_inode_in_use(NULL, d_inode(dir)); + dput(dir); } +} - ASSERT(!test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags)); +/* + * Remove a regular file from the cache. + */ +static int cachefiles_unlink(struct cachefiles_cache *cache, + struct cachefiles_object *object, + struct dentry *dir, struct dentry *dentry, + enum fscache_why_object_killed why) +{ + struct path path = { + .mnt = cache->mnt, + .dentry = dir, + }; + int ret; - cache->cache.ops->put_object(&xobject->fscache); - goto try_again; + trace_cachefiles_unlink(object, d_inode(dentry)->i_ino, why); + ret = security_path_unlink(&path, dentry); + if (ret < 0) { + cachefiles_io_error(cache, "Unlink security error"); + return ret; + } -requeue: - clear_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags); - cache->cache.ops->put_object(&xobject->fscache); - _leave(" = -ETIMEDOUT"); - return -ETIMEDOUT; + ret = cachefiles_inject_remove_error(); + if (ret == 0) { + ret = vfs_unlink(&nop_mnt_idmap, d_backing_inode(dir), dentry, NULL); + if (ret == -EIO) + cachefiles_io_error(cache, "Unlink failed"); + } + if (ret != 0) + trace_cachefiles_vfs_error(object, d_backing_inode(dir), ret, + cachefiles_trace_unlink_error); + return ret; } /* - * delete an object representation from the cache - * - file backed objects are unlinked - * - directory backed objects are stuffed into the graveyard for userspace to + * Delete an object representation from the cache + * - File backed objects are unlinked + * - Directory backed objects are stuffed into the graveyard for userspace to * delete - * - unlocks the directory mutex + * On entry dir must be locked. It will be unlocked on exit. + * On entry there must be at least 2 refs on rep, one will be dropped on exit. */ -static int cachefiles_bury_object(struct cachefiles_cache *cache, - struct dentry *dir, - struct dentry *rep, - bool preemptive) +int cachefiles_bury_object(struct cachefiles_cache *cache, + struct cachefiles_object *object, + struct dentry *dir, + struct dentry *rep, + enum fscache_why_object_killed why) { struct dentry *grave, *trap; struct path path, path_to_graveyard; char nbuffer[8 + 8 + 1]; int ret; - _enter(",'%*.*s','%*.*s'", - dir->d_name.len, dir->d_name.len, dir->d_name.name, - rep->d_name.len, rep->d_name.len, rep->d_name.name); + _enter(",'%pd','%pd'", dir, rep); - _debug("remove %p from %p", rep, dir); + if (rep->d_parent != dir) { + end_removing(rep); + _leave(" = -ESTALE"); + return -ESTALE; + } /* non-directories can just be unlinked */ - if (!S_ISDIR(rep->d_inode->i_mode)) { - _debug("unlink stale object"); - - path.mnt = cache->mnt; - path.dentry = dir; - ret = security_path_unlink(&path, rep); - if (ret < 0) { - cachefiles_io_error(cache, "Unlink security error"); - } else { - ret = vfs_unlink(dir->d_inode, rep); - - if (preemptive) - cachefiles_mark_object_buried(cache, rep); - } - - mutex_unlock(&dir->d_inode->i_mutex); - - if (ret == -EIO) - cachefiles_io_error(cache, "Unlink failed"); + if (!d_is_dir(rep)) { + ret = cachefiles_unlink(cache, object, dir, rep, why); + end_removing(rep); _leave(" = %d", ret); return ret; @@ -311,19 +294,21 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache, /* directories have to be moved to the graveyard */ _debug("move stale object to graveyard"); - mutex_unlock(&dir->d_inode->i_mutex); + end_removing(rep); try_again: /* first step is to make up a grave dentry in the graveyard */ sprintf(nbuffer, "%08x%08x", - (uint32_t) get_seconds(), + (uint32_t) ktime_get_real_seconds(), (uint32_t) atomic_inc_return(&cache->gravecounter)); /* do the multiway lock magic */ trap = lock_rename(cache->graveyard, dir); + if (IS_ERR(trap)) + return PTR_ERR(trap); /* do some checks before getting the grave dentry */ - if (rep->d_parent != dir) { + if (rep->d_parent != dir || IS_DEADDIR(d_inode(rep))) { /* the entry was probably culled when we dropped the parent dir * lock */ unlock_rename(cache->graveyard, dir); @@ -331,7 +316,7 @@ try_again: return 0; } - if (!S_ISDIR(cache->graveyard->d_inode->i_mode)) { + if (!d_can_lookup(cache->graveyard)) { unlock_rename(cache->graveyard, dir); cachefiles_io_error(cache, "Graveyard no longer a directory"); return -EIO; @@ -349,21 +334,23 @@ try_again: return -EIO; } - grave = lookup_one_len(nbuffer, cache->graveyard, strlen(nbuffer)); + grave = lookup_one(&nop_mnt_idmap, &QSTR(nbuffer), cache->graveyard); if (IS_ERR(grave)) { unlock_rename(cache->graveyard, dir); + trace_cachefiles_vfs_error(object, d_inode(cache->graveyard), + PTR_ERR(grave), + cachefiles_trace_lookup_error); if (PTR_ERR(grave) == -ENOMEM) { _leave(" = -ENOMEM"); return -ENOMEM; } - cachefiles_io_error(cache, "Lookup error %ld", - PTR_ERR(grave)); + cachefiles_io_error(cache, "Lookup error %ld", PTR_ERR(grave)); return -EIO; } - if (grave->d_inode) { + if (d_is_positive(grave)) { unlock_rename(cache->graveyard, dir); dput(grave); grave = NULL; @@ -391,20 +378,30 @@ try_again: path.dentry = dir; path_to_graveyard.mnt = cache->mnt; path_to_graveyard.dentry = cache->graveyard; - ret = security_path_rename(&path, rep, &path_to_graveyard, grave); + ret = security_path_rename(&path, rep, &path_to_graveyard, grave, 0); if (ret < 0) { cachefiles_io_error(cache, "Rename security error %d", ret); } else { - ret = vfs_rename(dir->d_inode, rep, - cache->graveyard->d_inode, grave); + struct renamedata rd = { + .mnt_idmap = &nop_mnt_idmap, + .old_parent = dir, + .old_dentry = rep, + .new_parent = cache->graveyard, + .new_dentry = grave, + }; + trace_cachefiles_rename(object, d_inode(rep)->i_ino, why); + ret = cachefiles_inject_read_error(); + if (ret == 0) + ret = vfs_rename(&rd); + if (ret != 0) + trace_cachefiles_vfs_error(object, d_inode(dir), ret, + cachefiles_trace_rename_error); if (ret != 0 && ret != -ENOMEM) cachefiles_io_error(cache, "Rename failed with error %d", ret); - - if (preemptive) - cachefiles_mark_object_buried(cache, rep); } + __cachefiles_unmark_inode_in_use(object, d_inode(rep)); unlock_rename(cache->graveyard, dir); dput(grave); _leave(" = 0"); @@ -412,551 +409,425 @@ try_again: } /* - * delete an object representation from the cache + * Delete a cache file. */ -int cachefiles_delete_object(struct cachefiles_cache *cache, - struct cachefiles_object *object) +int cachefiles_delete_object(struct cachefiles_object *object, + enum fscache_why_object_killed why) { - struct dentry *dir; + struct cachefiles_volume *volume = object->volume; + struct dentry *dentry = object->file->f_path.dentry; + struct dentry *fan = volume->fanout[(u8)object->cookie->key_hash]; int ret; - _enter(",OBJ%x{%p}", object->fscache.debug_id, object->dentry); - - ASSERT(object->dentry); - ASSERT(object->dentry->d_inode); - ASSERT(object->dentry->d_parent); - - dir = dget_parent(object->dentry); - - mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); - - if (test_bit(CACHEFILES_OBJECT_BURIED, &object->flags)) { - /* object allocation for the same key preemptively deleted this - * object's file so that it could create its own file */ - _debug("object preemptively buried"); - mutex_unlock(&dir->d_inode->i_mutex); - ret = 0; - } else { - /* we need to check that our parent is _still_ our parent - it - * may have been renamed */ - if (dir == object->dentry->d_parent) { - ret = cachefiles_bury_object(cache, dir, - object->dentry, false); - } else { - /* it got moved, presumably by cachefilesd culling it, - * so it's no longer in the key path and we can ignore - * it */ - mutex_unlock(&dir->d_inode->i_mutex); - ret = 0; - } - } + _enter(",OBJ%x{%pD}", object->debug_id, object->file); - dput(dir); - _leave(" = %d", ret); + dentry = start_removing_dentry(fan, dentry); + if (IS_ERR(dentry)) + ret = PTR_ERR(dentry); + else + ret = cachefiles_unlink(volume->cache, object, fan, dentry, why); + end_removing(dentry); return ret; } /* - * walk from the parent object to the child object through the backing - * filesystem, creating directories as we go + * Create a temporary file and leave it unattached and un-xattr'd until the + * time comes to discard the object from memory. */ -int cachefiles_walk_to_object(struct cachefiles_object *parent, - struct cachefiles_object *object, - const char *key, - struct cachefiles_xattr *auxdata) +struct file *cachefiles_create_tmpfile(struct cachefiles_object *object) { - struct cachefiles_cache *cache; - struct dentry *dir, *next = NULL; - struct path path; - unsigned long start; - const char *name; - int ret, nlen; - - _enter("OBJ%x{%p},OBJ%x,%s,", - parent->fscache.debug_id, parent->dentry, - object->fscache.debug_id, key); - - cache = container_of(parent->fscache.cache, - struct cachefiles_cache, cache); - path.mnt = cache->mnt; - - ASSERT(parent->dentry); - ASSERT(parent->dentry->d_inode); - - if (!(S_ISDIR(parent->dentry->d_inode->i_mode))) { - // TODO: convert file to dir - _leave("looking up in none directory"); - return -ENOBUFS; + struct cachefiles_volume *volume = object->volume; + struct cachefiles_cache *cache = volume->cache; + const struct cred *saved_cred; + struct dentry *fan = volume->fanout[(u8)object->cookie->key_hash]; + struct file *file; + const struct path parentpath = { .mnt = cache->mnt, .dentry = fan }; + uint64_t ni_size; + long ret; + + + cachefiles_begin_secure(cache, &saved_cred); + + ret = cachefiles_inject_write_error(); + if (ret == 0) { + file = kernel_tmpfile_open(&nop_mnt_idmap, &parentpath, + S_IFREG | 0600, + O_RDWR | O_LARGEFILE | O_DIRECT, + cache->cache_cred); + ret = PTR_ERR_OR_ZERO(file); + } + if (ret) { + trace_cachefiles_vfs_error(object, d_inode(fan), ret, + cachefiles_trace_tmpfile_error); + if (ret == -EIO) + cachefiles_io_error_obj(object, "Failed to create tmpfile"); + goto err; } - dir = dget(parent->dentry); - -advance: - /* attempt to transit the first directory component */ - name = key; - nlen = strlen(key); - - /* key ends in a double NUL */ - key = key + nlen + 1; - if (!*key) - key = NULL; - -lookup_again: - /* search the current directory for the element name */ - _debug("lookup '%s'", name); + trace_cachefiles_tmpfile(object, file_inode(file)); - mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); + /* This is a newly created file with no other possible user */ + if (!cachefiles_mark_inode_in_use(object, file_inode(file))) + WARN_ON(1); - start = jiffies; - next = lookup_one_len(name, dir, nlen); - cachefiles_hist(cachefiles_lookup_histogram, start); - if (IS_ERR(next)) - goto lookup_error; + ret = cachefiles_ondemand_init_object(object); + if (ret < 0) + goto err_unuse; - _debug("next -> %p %s", next, next->d_inode ? "positive" : "negative"); - - if (!key) - object->new = !next->d_inode; - - /* if this element of the path doesn't exist, then the lookup phase - * failed, and we can release any readers in the certain knowledge that - * there's nothing for them to actually read */ - if (!next->d_inode) - fscache_object_lookup_negative(&object->fscache); - - /* we need to create the object if it's negative */ - if (key || object->type == FSCACHE_COOKIE_TYPE_INDEX) { - /* index objects and intervening tree levels must be subdirs */ - if (!next->d_inode) { - ret = cachefiles_has_space(cache, 1, 0); - if (ret < 0) - goto create_error; - - path.dentry = dir; - ret = security_path_mkdir(&path, next, 0); - if (ret < 0) - goto create_error; - start = jiffies; - ret = vfs_mkdir(dir->d_inode, next, 0); - cachefiles_hist(cachefiles_mkdir_histogram, start); - if (ret < 0) - goto create_error; - - ASSERT(next->d_inode); - - _debug("mkdir -> %p{%p{ino=%lu}}", - next, next->d_inode, next->d_inode->i_ino); - - } else if (!S_ISDIR(next->d_inode->i_mode)) { - kerror("inode %lu is not a directory", - next->d_inode->i_ino); - ret = -ENOBUFS; - goto error; - } + ni_size = object->cookie->object_size; + ni_size = round_up(ni_size, CACHEFILES_DIO_BLOCK_SIZE); - } else { - /* non-index objects start out life as files */ - if (!next->d_inode) { - ret = cachefiles_has_space(cache, 1, 0); - if (ret < 0) - goto create_error; - - path.dentry = dir; - ret = security_path_mknod(&path, next, S_IFREG, 0); - if (ret < 0) - goto create_error; - start = jiffies; - ret = vfs_create(dir->d_inode, next, S_IFREG, true); - cachefiles_hist(cachefiles_create_histogram, start); - if (ret < 0) - goto create_error; - - ASSERT(next->d_inode); - - _debug("create -> %p{%p{ino=%lu}}", - next, next->d_inode, next->d_inode->i_ino); - - } else if (!S_ISDIR(next->d_inode->i_mode) && - !S_ISREG(next->d_inode->i_mode) - ) { - kerror("inode %lu is not a file or directory", - next->d_inode->i_ino); - ret = -ENOBUFS; - goto error; + if (ni_size > 0) { + trace_cachefiles_trunc(object, file_inode(file), 0, ni_size, + cachefiles_trunc_expand_tmpfile); + ret = cachefiles_inject_write_error(); + if (ret == 0) + ret = vfs_truncate(&file->f_path, ni_size); + if (ret < 0) { + trace_cachefiles_vfs_error( + object, file_inode(file), ret, + cachefiles_trace_trunc_error); + goto err_unuse; } } - /* process the next component */ - if (key) { - _debug("advance"); - mutex_unlock(&dir->d_inode->i_mutex); - dput(dir); - dir = next; - next = NULL; - goto advance; + ret = -EINVAL; + if (unlikely(!file->f_op->read_iter) || + unlikely(!file->f_op->write_iter)) { + fput(file); + pr_notice("Cache does not support read_iter and write_iter\n"); + goto err_unuse; } +out: + cachefiles_end_secure(cache, saved_cred); + return file; - /* we've found the object we were looking for */ - object->dentry = next; - - /* if we've found that the terminal object exists, then we need to - * check its attributes and delete it if it's out of date */ - if (!object->new) { - _debug("validate '%*.*s'", - next->d_name.len, next->d_name.len, next->d_name.name); - - ret = cachefiles_check_object_xattr(object, auxdata); - if (ret == -ESTALE) { - /* delete the object (the deleter drops the directory - * mutex) */ - object->dentry = NULL; - - ret = cachefiles_bury_object(cache, dir, next, true); - dput(next); - next = NULL; +err_unuse: + cachefiles_do_unmark_inode_in_use(object, file_inode(file)); + fput(file); +err: + file = ERR_PTR(ret); + goto out; +} - if (ret < 0) - goto delete_error; +/* + * Create a new file. + */ +static bool cachefiles_create_file(struct cachefiles_object *object) +{ + struct file *file; + int ret; - _debug("redo lookup"); - goto lookup_again; - } - } + ret = cachefiles_has_space(object->volume->cache, 1, 0, + cachefiles_has_space_for_create); + if (ret < 0) + return false; - /* note that we're now using this object */ - ret = cachefiles_mark_object_active(cache, object); + file = cachefiles_create_tmpfile(object); + if (IS_ERR(file)) + return false; - mutex_unlock(&dir->d_inode->i_mutex); - dput(dir); - dir = NULL; + set_bit(FSCACHE_COOKIE_NEEDS_UPDATE, &object->cookie->flags); + set_bit(CACHEFILES_OBJECT_USING_TMPFILE, &object->flags); + _debug("create -> %pD{ino=%lu}", file, file_inode(file)->i_ino); + object->file = file; + return true; +} - if (ret == -ETIMEDOUT) - goto mark_active_timed_out; +/* + * Open an existing file, checking its attributes and replacing it if it is + * stale. + */ +static bool cachefiles_open_file(struct cachefiles_object *object, + struct dentry *dentry) +{ + struct cachefiles_cache *cache = object->volume->cache; + struct file *file; + struct path path; + int ret; - _debug("=== OBTAINED_OBJECT ==="); + _enter("%pd", dentry); - if (object->new) { - /* attach data to a newly constructed terminal object */ - ret = cachefiles_set_object_xattr(object, auxdata); - if (ret < 0) - goto check_error; - } else { - /* always update the atime on an object we've just looked up - * (this is used to keep track of culling, and atimes are only - * updated by read, write and readdir but not lookup or - * open) */ - path.dentry = next; - touch_atime(&path); + if (!cachefiles_mark_inode_in_use(object, d_inode(dentry))) { + pr_notice("cachefiles: Inode already in use: %pd (B=%lx)\n", + dentry, d_inode(dentry)->i_ino); + return false; } - /* open a file interface onto a data file */ - if (object->type != FSCACHE_COOKIE_TYPE_INDEX) { - if (S_ISREG(object->dentry->d_inode->i_mode)) { - const struct address_space_operations *aops; - - ret = -EPERM; - aops = object->dentry->d_inode->i_mapping->a_ops; - if (!aops->bmap) - goto check_error; - - object->backer = object->dentry; - } else { - BUG(); // TODO: open file in data-class subdir - } + /* We need to open a file interface onto a data file now as we can't do + * it on demand because writeback called from do_exit() sees + * current->fs == NULL - which breaks d_path() called from ext4 open. + */ + path.mnt = cache->mnt; + path.dentry = dentry; + file = kernel_file_open(&path, O_RDWR | O_LARGEFILE | O_DIRECT, cache->cache_cred); + if (IS_ERR(file)) { + trace_cachefiles_vfs_error(object, d_backing_inode(dentry), + PTR_ERR(file), + cachefiles_trace_open_error); + goto error; } - object->new = 0; - fscache_obtained_object(&object->fscache); + if (unlikely(!file->f_op->read_iter) || + unlikely(!file->f_op->write_iter)) { + pr_notice("Cache does not support read_iter and write_iter\n"); + goto error_fput; + } + _debug("file -> %pd positive", dentry); - _leave(" = 0 [%lu]", object->dentry->d_inode->i_ino); - return 0; + ret = cachefiles_ondemand_init_object(object); + if (ret < 0) + goto error_fput; -create_error: - _debug("create error %d", ret); - if (ret == -EIO) - cachefiles_io_error(cache, "Create/mkdir failed"); - goto error; + ret = cachefiles_check_auxdata(object, file); + if (ret < 0) + goto check_failed; -mark_active_timed_out: - _debug("mark active timed out"); - goto release_dentry; + clear_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &object->cookie->flags); -check_error: - _debug("check error %d", ret); - write_lock(&cache->active_lock); - rb_erase(&object->active_node, &cache->active_nodes); - clear_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags); - wake_up_bit(&object->flags, CACHEFILES_OBJECT_ACTIVE); - write_unlock(&cache->active_lock); -release_dentry: - dput(object->dentry); - object->dentry = NULL; - goto error_out; - -delete_error: - _debug("delete error %d", ret); - goto error_out2; + object->file = file; -lookup_error: - _debug("lookup error %ld", PTR_ERR(next)); - ret = PTR_ERR(next); - if (ret == -EIO) - cachefiles_io_error(cache, "Lookup failed"); - next = NULL; + /* Always update the atime on an object we've just looked up (this is + * used to keep track of culling, and atimes are only updated by read, + * write and readdir but not lookup or open). + */ + touch_atime(&file->f_path); + return true; + +check_failed: + fscache_cookie_lookup_negative(object->cookie); + cachefiles_unmark_inode_in_use(object, file); + fput(file); + if (ret == -ESTALE) + return cachefiles_create_file(object); + return false; + +error_fput: + fput(file); error: - mutex_unlock(&dir->d_inode->i_mutex); - dput(next); -error_out2: - dput(dir); -error_out: - _leave(" = error %d", -ret); - return ret; + cachefiles_do_unmark_inode_in_use(object, d_inode(dentry)); + return false; } /* - * get a subdirectory + * walk from the parent object to the child object through the backing + * filesystem, creating directories as we go */ -struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, - struct dentry *dir, - const char *dirname) +bool cachefiles_look_up_object(struct cachefiles_object *object) { - struct dentry *subdir; - unsigned long start; - struct path path; + struct cachefiles_volume *volume = object->volume; + struct dentry *dentry, *fan = volume->fanout[(u8)object->cookie->key_hash]; int ret; - _enter(",,%s", dirname); - - /* search the current directory for the element name */ - mutex_lock(&dir->d_inode->i_mutex); - - start = jiffies; - subdir = lookup_one_len(dirname, dir, strlen(dirname)); - cachefiles_hist(cachefiles_lookup_histogram, start); - if (IS_ERR(subdir)) { - if (PTR_ERR(subdir) == -ENOMEM) - goto nomem_d_alloc; - goto lookup_error; + _enter("OBJ%x,%s,", object->debug_id, object->d_name); + + /* Look up path "cache/vol/fanout/file". */ + ret = cachefiles_inject_read_error(); + if (ret == 0) + dentry = lookup_one_positive_unlocked(&nop_mnt_idmap, + &QSTR(object->d_name), fan); + else + dentry = ERR_PTR(ret); + trace_cachefiles_lookup(object, fan, dentry); + if (IS_ERR(dentry)) { + if (dentry == ERR_PTR(-ENOENT)) + goto new_file; + if (dentry == ERR_PTR(-EIO)) + cachefiles_io_error_obj(object, "Lookup failed"); + return false; + } + + if (!d_is_reg(dentry)) { + pr_err("%pd is not a file\n", dentry); + struct dentry *de = start_removing_dentry(fan, dentry); + if (IS_ERR(de)) + ret = PTR_ERR(de); + else + ret = cachefiles_bury_object(volume->cache, object, + fan, de, + FSCACHE_OBJECT_IS_WEIRD); + dput(dentry); + if (ret < 0) + return false; + goto new_file; } - _debug("subdir -> %p %s", - subdir, subdir->d_inode ? "positive" : "negative"); + ret = cachefiles_open_file(object, dentry); + dput(dentry); + if (!ret) + return false; - /* we need to create the subdir if it doesn't exist yet */ - if (!subdir->d_inode) { - ret = cachefiles_has_space(cache, 1, 0); - if (ret < 0) - goto mkdir_error; + _leave(" = t [%lu]", file_inode(object->file)->i_ino); + return true; - _debug("attempt mkdir"); +new_file: + fscache_cookie_lookup_negative(object->cookie); + return cachefiles_create_file(object); +} - path.mnt = cache->mnt; - path.dentry = dir; - ret = security_path_mkdir(&path, subdir, 0700); - if (ret < 0) - goto mkdir_error; - ret = vfs_mkdir(dir->d_inode, subdir, 0700); - if (ret < 0) - goto mkdir_error; +/* + * Attempt to link a temporary file into its rightful place in the cache. + */ +bool cachefiles_commit_tmpfile(struct cachefiles_cache *cache, + struct cachefiles_object *object) +{ + struct cachefiles_volume *volume = object->volume; + struct dentry *dentry, *fan = volume->fanout[(u8)object->cookie->key_hash]; + bool success = false; + int ret; - ASSERT(subdir->d_inode); + _enter(",%pD", object->file); - _debug("mkdir -> %p{%p{ino=%lu}}", - subdir, - subdir->d_inode, - subdir->d_inode->i_ino); + ret = cachefiles_inject_read_error(); + if (ret == 0) + dentry = start_creating(&nop_mnt_idmap, fan, &QSTR(object->d_name)); + else + dentry = ERR_PTR(ret); + if (IS_ERR(dentry)) { + trace_cachefiles_vfs_error(object, d_inode(fan), PTR_ERR(dentry), + cachefiles_trace_lookup_error); + _debug("lookup fail %ld", PTR_ERR(dentry)); + goto out; } - mutex_unlock(&dir->d_inode->i_mutex); + /* + * This loop will only execute more than once if some other thread + * races to create the object we are trying to create. + */ + while (!d_is_negative(dentry)) { + ret = cachefiles_unlink(volume->cache, object, fan, dentry, + FSCACHE_OBJECT_IS_STALE); + if (ret < 0) + goto out_end; - /* we need to make sure the subdir is a directory */ - ASSERT(subdir->d_inode); + end_creating(dentry); - if (!S_ISDIR(subdir->d_inode->i_mode)) { - kerror("%s is not a directory", dirname); - ret = -EIO; - goto check_error; + ret = cachefiles_inject_read_error(); + if (ret == 0) + dentry = start_creating(&nop_mnt_idmap, fan, + &QSTR(object->d_name)); + else + dentry = ERR_PTR(ret); + if (IS_ERR(dentry)) { + trace_cachefiles_vfs_error(object, d_inode(fan), PTR_ERR(dentry), + cachefiles_trace_lookup_error); + _debug("lookup fail %ld", PTR_ERR(dentry)); + goto out; + } } - ret = -EPERM; - if (!subdir->d_inode->i_op || - !subdir->d_inode->i_op->setxattr || - !subdir->d_inode->i_op->getxattr || - !subdir->d_inode->i_op->lookup || - !subdir->d_inode->i_op->mkdir || - !subdir->d_inode->i_op->create || - !subdir->d_inode->i_op->rename || - !subdir->d_inode->i_op->rmdir || - !subdir->d_inode->i_op->unlink) - goto check_error; - - _leave(" = [%lu]", subdir->d_inode->i_ino); - return subdir; - -check_error: - dput(subdir); - _leave(" = %d [check]", ret); - return ERR_PTR(ret); - -mkdir_error: - mutex_unlock(&dir->d_inode->i_mutex); - dput(subdir); - kerror("mkdir %s failed with error %d", dirname, ret); - return ERR_PTR(ret); - -lookup_error: - mutex_unlock(&dir->d_inode->i_mutex); - ret = PTR_ERR(subdir); - kerror("Lookup %s failed with error %d", dirname, ret); - return ERR_PTR(ret); - -nomem_d_alloc: - mutex_unlock(&dir->d_inode->i_mutex); - _leave(" = -ENOMEM"); - return ERR_PTR(-ENOMEM); + ret = cachefiles_inject_read_error(); + if (ret == 0) + ret = vfs_link(object->file->f_path.dentry, &nop_mnt_idmap, + d_inode(fan), dentry, NULL); + if (ret < 0) { + trace_cachefiles_vfs_error(object, d_inode(fan), ret, + cachefiles_trace_link_error); + _debug("link fail %d", ret); + } else { + trace_cachefiles_link(object, file_inode(object->file)); + spin_lock(&object->lock); + /* TODO: Do we want to switch the file pointer to the new dentry? */ + clear_bit(CACHEFILES_OBJECT_USING_TMPFILE, &object->flags); + spin_unlock(&object->lock); + success = true; + } + +out_end: + end_creating(dentry); +out: + _leave(" = %u", success); + return success; } /* - * find out if an object is in use or not - * - if finds object and it's not in use: - * - returns a pointer to the object and a reference on it - * - returns with the directory locked + * Look up an inode to be checked or culled. Return -EBUSY if the inode is + * marked in use. */ -static struct dentry *cachefiles_check_active(struct cachefiles_cache *cache, - struct dentry *dir, - char *filename) +static struct dentry *cachefiles_lookup_for_cull(struct cachefiles_cache *cache, + struct dentry *dir, + char *filename) { - struct cachefiles_object *object; - struct rb_node *_n; struct dentry *victim; - unsigned long start; - int ret; - - //_enter(",%*.*s/,%s", - // dir->d_name.len, dir->d_name.len, dir->d_name.name, filename); + int ret = -ENOENT; - /* look up the victim */ - mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); + victim = start_removing(&nop_mnt_idmap, dir, &QSTR(filename)); - start = jiffies; - victim = lookup_one_len(filename, dir, strlen(filename)); - cachefiles_hist(cachefiles_lookup_histogram, start); if (IS_ERR(victim)) goto lookup_error; - - //_debug("victim -> %p %s", - // victim, victim->d_inode ? "positive" : "negative"); - - /* if the object is no longer there then we probably retired the object - * at the netfs's request whilst the cull was in progress - */ - if (!victim->d_inode) { - mutex_unlock(&dir->d_inode->i_mutex); - dput(victim); - _leave(" = -ENOENT [absent]"); - return ERR_PTR(-ENOENT); - } - - /* check to see if we're using this object */ - read_lock(&cache->active_lock); - - _n = cache->active_nodes.rb_node; - - while (_n) { - object = rb_entry(_n, struct cachefiles_object, active_node); - - if (object->dentry > victim) - _n = _n->rb_left; - else if (object->dentry < victim) - _n = _n->rb_right; - else - goto object_in_use; - } - - read_unlock(&cache->active_lock); - - //_leave(" = %p", victim); + if (d_inode(victim)->i_flags & S_KERNEL_FILE) + goto lookup_busy; return victim; -object_in_use: - read_unlock(&cache->active_lock); - mutex_unlock(&dir->d_inode->i_mutex); - dput(victim); - //_leave(" = -EBUSY [in use]"); - return ERR_PTR(-EBUSY); +lookup_busy: + ret = -EBUSY; + end_removing(victim); + return ERR_PTR(ret); lookup_error: - mutex_unlock(&dir->d_inode->i_mutex); ret = PTR_ERR(victim); - if (ret == -ENOENT) { - /* file or dir now absent - probably retired by netfs */ - _leave(" = -ESTALE [absent]"); - return ERR_PTR(-ESTALE); - } + if (ret == -ENOENT) + return ERR_PTR(-ESTALE); /* Probably got retired by the netfs */ if (ret == -EIO) { cachefiles_io_error(cache, "Lookup failed"); } else if (ret != -ENOMEM) { - kerror("Internal error: %d", ret); + pr_err("Internal error: %d\n", ret); ret = -EIO; } - _leave(" = %d", ret); return ERR_PTR(ret); } /* - * cull an object if it's not in use + * Cull an object if it's not in use * - called only by cache manager daemon */ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir, char *filename) { struct dentry *victim; + struct inode *inode; int ret; - _enter(",%*.*s/,%s", - dir->d_name.len, dir->d_name.len, dir->d_name.name, filename); + _enter(",%pd/,%s", dir, filename); - victim = cachefiles_check_active(cache, dir, filename); + victim = cachefiles_lookup_for_cull(cache, dir, filename); if (IS_ERR(victim)) return PTR_ERR(victim); - _debug("victim -> %p %s", - victim, victim->d_inode ? "positive" : "negative"); - - /* okay... the victim is not being used so we can cull it - * - start by marking it as stale - */ - _debug("victim is cullable"); - - ret = cachefiles_remove_object_xattr(cache, victim); + /* check to see if someone is using this object */ + inode = d_inode(victim); + inode_lock(inode); + if (inode->i_flags & S_KERNEL_FILE) { + ret = -EBUSY; + } else { + /* Stop the cache from picking it back up */ + inode->i_flags |= S_KERNEL_FILE; + ret = 0; + } + inode_unlock(inode); if (ret < 0) goto error_unlock; - /* actually remove the victim (drops the dir mutex) */ - _debug("bury"); - - ret = cachefiles_bury_object(cache, dir, victim, false); + ret = cachefiles_bury_object(cache, NULL, dir, victim, + FSCACHE_OBJECT_WAS_CULLED); + dput(victim); if (ret < 0) goto error; - dput(victim); + fscache_count_culled(); _leave(" = 0"); return 0; error_unlock: - mutex_unlock(&dir->d_inode->i_mutex); + end_removing(victim); error: - dput(victim); - if (ret == -ENOENT) { - /* file or dir now absent - probably retired by netfs */ - _leave(" = -ESTALE [absent]"); - return -ESTALE; - } + if (ret == -ENOENT) + return -ESTALE; /* Probably got retired by the netfs */ if (ret != -ENOMEM) { - kerror("Internal error: %d", ret); + pr_err("Internal error: %d\n", ret); ret = -EIO; } @@ -965,7 +836,7 @@ error: } /* - * find out if an object is in use or not + * Find out if an object is in use or not * - called only by cache manager daemon * - returns -EBUSY or 0 to indicate whether an object is in use or not */ @@ -973,16 +844,13 @@ int cachefiles_check_in_use(struct cachefiles_cache *cache, struct dentry *dir, char *filename) { struct dentry *victim; + int ret = 0; - //_enter(",%*.*s/,%s", - // dir->d_name.len, dir->d_name.len, dir->d_name.name, filename); - - victim = cachefiles_check_active(cache, dir, filename); + victim = cachefiles_lookup_for_cull(cache, dir, filename); if (IS_ERR(victim)) return PTR_ERR(victim); - mutex_unlock(&dir->d_inode->i_mutex); + inode_unlock(d_inode(dir)); dput(victim); - //_leave(" = 0"); - return 0; + return ret; } diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c new file mode 100644 index 000000000000..a7ed86fa98bb --- /dev/null +++ b/fs/cachefiles/ondemand.c @@ -0,0 +1,762 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include <linux/anon_inodes.h> +#include <linux/uio.h> +#include "internal.h" + +struct ondemand_anon_file { + struct file *file; + int fd; +}; + +static inline void cachefiles_req_put(struct cachefiles_req *req) +{ + if (refcount_dec_and_test(&req->ref)) + kfree(req); +} + +static int cachefiles_ondemand_fd_release(struct inode *inode, + struct file *file) +{ + struct cachefiles_object *object = file->private_data; + struct cachefiles_cache *cache; + struct cachefiles_ondemand_info *info; + int object_id; + struct cachefiles_req *req; + XA_STATE(xas, NULL, 0); + + if (!object) + return 0; + + info = object->ondemand; + cache = object->volume->cache; + xas.xa = &cache->reqs; + + xa_lock(&cache->reqs); + spin_lock(&info->lock); + object_id = info->ondemand_id; + info->ondemand_id = CACHEFILES_ONDEMAND_ID_CLOSED; + cachefiles_ondemand_set_object_close(object); + spin_unlock(&info->lock); + + /* Only flush CACHEFILES_REQ_NEW marked req to avoid race with daemon_read */ + xas_for_each_marked(&xas, req, ULONG_MAX, CACHEFILES_REQ_NEW) { + if (req->msg.object_id == object_id && + req->msg.opcode == CACHEFILES_OP_CLOSE) { + complete(&req->done); + xas_store(&xas, NULL); + } + } + xa_unlock(&cache->reqs); + + xa_erase(&cache->ondemand_ids, object_id); + trace_cachefiles_ondemand_fd_release(object, object_id); + cachefiles_put_object(object, cachefiles_obj_put_ondemand_fd); + cachefiles_put_unbind_pincount(cache); + return 0; +} + +static ssize_t cachefiles_ondemand_fd_write_iter(struct kiocb *kiocb, + struct iov_iter *iter) +{ + struct cachefiles_object *object = kiocb->ki_filp->private_data; + struct cachefiles_cache *cache = object->volume->cache; + struct file *file; + size_t len = iter->count, aligned_len = len; + loff_t pos = kiocb->ki_pos; + const struct cred *saved_cred; + int ret; + + spin_lock(&object->lock); + file = object->file; + if (!file) { + spin_unlock(&object->lock); + return -ENOBUFS; + } + get_file(file); + spin_unlock(&object->lock); + + cachefiles_begin_secure(cache, &saved_cred); + ret = __cachefiles_prepare_write(object, file, &pos, &aligned_len, len, true); + cachefiles_end_secure(cache, saved_cred); + if (ret < 0) + goto out; + + trace_cachefiles_ondemand_fd_write(object, file_inode(file), pos, len); + ret = __cachefiles_write(object, file, pos, iter, NULL, NULL); + if (ret > 0) + kiocb->ki_pos += ret; + +out: + fput(file); + return ret; +} + +static loff_t cachefiles_ondemand_fd_llseek(struct file *filp, loff_t pos, + int whence) +{ + struct cachefiles_object *object = filp->private_data; + struct file *file; + loff_t ret; + + spin_lock(&object->lock); + file = object->file; + if (!file) { + spin_unlock(&object->lock); + return -ENOBUFS; + } + get_file(file); + spin_unlock(&object->lock); + + ret = vfs_llseek(file, pos, whence); + fput(file); + + return ret; +} + +static long cachefiles_ondemand_fd_ioctl(struct file *filp, unsigned int ioctl, + unsigned long id) +{ + struct cachefiles_object *object = filp->private_data; + struct cachefiles_cache *cache = object->volume->cache; + struct cachefiles_req *req; + XA_STATE(xas, &cache->reqs, id); + + if (ioctl != CACHEFILES_IOC_READ_COMPLETE) + return -EINVAL; + + if (!test_bit(CACHEFILES_ONDEMAND_MODE, &cache->flags)) + return -EOPNOTSUPP; + + xa_lock(&cache->reqs); + req = xas_load(&xas); + if (!req || req->msg.opcode != CACHEFILES_OP_READ || + req->object != object) { + xa_unlock(&cache->reqs); + return -EINVAL; + } + xas_store(&xas, NULL); + xa_unlock(&cache->reqs); + + trace_cachefiles_ondemand_cread(object, id); + complete(&req->done); + return 0; +} + +static const struct file_operations cachefiles_ondemand_fd_fops = { + .owner = THIS_MODULE, + .release = cachefiles_ondemand_fd_release, + .write_iter = cachefiles_ondemand_fd_write_iter, + .llseek = cachefiles_ondemand_fd_llseek, + .unlocked_ioctl = cachefiles_ondemand_fd_ioctl, +}; + +/* + * OPEN request Completion (copen) + * - command: "copen <id>,<cache_size>" + * <cache_size> indicates the object size if >=0, error code if negative + */ +int cachefiles_ondemand_copen(struct cachefiles_cache *cache, char *args) +{ + struct cachefiles_req *req; + struct fscache_cookie *cookie; + struct cachefiles_ondemand_info *info; + char *pid, *psize; + unsigned long id; + long size; + int ret; + XA_STATE(xas, &cache->reqs, 0); + + if (!test_bit(CACHEFILES_ONDEMAND_MODE, &cache->flags)) + return -EOPNOTSUPP; + + if (!*args) { + pr_err("Empty id specified\n"); + return -EINVAL; + } + + pid = args; + psize = strchr(args, ','); + if (!psize) { + pr_err("Cache size is not specified\n"); + return -EINVAL; + } + + *psize = 0; + psize++; + + ret = kstrtoul(pid, 0, &id); + if (ret) + return ret; + + xa_lock(&cache->reqs); + xas.xa_index = id; + req = xas_load(&xas); + if (!req || req->msg.opcode != CACHEFILES_OP_OPEN || + !req->object->ondemand->ondemand_id) { + xa_unlock(&cache->reqs); + return -EINVAL; + } + xas_store(&xas, NULL); + xa_unlock(&cache->reqs); + + info = req->object->ondemand; + /* fail OPEN request if copen format is invalid */ + ret = kstrtol(psize, 0, &size); + if (ret) { + req->error = ret; + goto out; + } + + /* fail OPEN request if daemon reports an error */ + if (size < 0) { + if (!IS_ERR_VALUE(size)) { + req->error = -EINVAL; + ret = -EINVAL; + } else { + req->error = size; + ret = 0; + } + goto out; + } + + spin_lock(&info->lock); + /* + * The anonymous fd was closed before copen ? Fail the request. + * + * t1 | t2 + * --------------------------------------------------------- + * cachefiles_ondemand_copen + * req = xa_erase(&cache->reqs, id) + * // Anon fd is maliciously closed. + * cachefiles_ondemand_fd_release + * xa_lock(&cache->reqs) + * cachefiles_ondemand_set_object_close(object) + * xa_unlock(&cache->reqs) + * cachefiles_ondemand_set_object_open + * // No one will ever close it again. + * cachefiles_ondemand_daemon_read + * cachefiles_ondemand_select_req + * + * Get a read req but its fd is already closed. The daemon can't + * issue a cread ioctl with an closed fd, then hung. + */ + if (info->ondemand_id == CACHEFILES_ONDEMAND_ID_CLOSED) { + spin_unlock(&info->lock); + req->error = -EBADFD; + goto out; + } + cookie = req->object->cookie; + cookie->object_size = size; + if (size) + clear_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags); + else + set_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags); + trace_cachefiles_ondemand_copen(req->object, id, size); + + cachefiles_ondemand_set_object_open(req->object); + spin_unlock(&info->lock); + wake_up_all(&cache->daemon_pollwq); + +out: + spin_lock(&info->lock); + /* Need to set object close to avoid reopen status continuing */ + if (info->ondemand_id == CACHEFILES_ONDEMAND_ID_CLOSED) + cachefiles_ondemand_set_object_close(req->object); + spin_unlock(&info->lock); + complete(&req->done); + return ret; +} + +int cachefiles_ondemand_restore(struct cachefiles_cache *cache, char *args) +{ + struct cachefiles_req *req; + + XA_STATE(xas, &cache->reqs, 0); + + if (!test_bit(CACHEFILES_ONDEMAND_MODE, &cache->flags)) + return -EOPNOTSUPP; + + /* + * Reset the requests to CACHEFILES_REQ_NEW state, so that the + * requests have been processed halfway before the crash of the + * user daemon could be reprocessed after the recovery. + */ + xas_lock(&xas); + xas_for_each(&xas, req, ULONG_MAX) + xas_set_mark(&xas, CACHEFILES_REQ_NEW); + xas_unlock(&xas); + + wake_up_all(&cache->daemon_pollwq); + return 0; +} + +static int cachefiles_ondemand_get_fd(struct cachefiles_req *req, + struct ondemand_anon_file *anon_file) +{ + struct cachefiles_object *object; + struct cachefiles_cache *cache; + struct cachefiles_open *load; + u32 object_id; + int ret; + + object = cachefiles_grab_object(req->object, + cachefiles_obj_get_ondemand_fd); + cache = object->volume->cache; + + ret = xa_alloc_cyclic(&cache->ondemand_ids, &object_id, NULL, + XA_LIMIT(1, INT_MAX), + &cache->ondemand_id_next, GFP_KERNEL); + if (ret < 0) + goto err; + + anon_file->fd = get_unused_fd_flags(O_WRONLY); + if (anon_file->fd < 0) { + ret = anon_file->fd; + goto err_free_id; + } + + anon_file->file = anon_inode_getfile_fmode("[cachefiles]", + &cachefiles_ondemand_fd_fops, object, + O_WRONLY, FMODE_PWRITE | FMODE_LSEEK); + if (IS_ERR(anon_file->file)) { + ret = PTR_ERR(anon_file->file); + goto err_put_fd; + } + + spin_lock(&object->ondemand->lock); + if (object->ondemand->ondemand_id > 0) { + spin_unlock(&object->ondemand->lock); + /* Pair with check in cachefiles_ondemand_fd_release(). */ + anon_file->file->private_data = NULL; + ret = -EEXIST; + goto err_put_file; + } + + load = (void *)req->msg.data; + load->fd = anon_file->fd; + object->ondemand->ondemand_id = object_id; + spin_unlock(&object->ondemand->lock); + + cachefiles_get_unbind_pincount(cache); + trace_cachefiles_ondemand_open(object, &req->msg, load); + return 0; + +err_put_file: + fput(anon_file->file); + anon_file->file = NULL; +err_put_fd: + put_unused_fd(anon_file->fd); + anon_file->fd = ret; +err_free_id: + xa_erase(&cache->ondemand_ids, object_id); +err: + spin_lock(&object->ondemand->lock); + /* Avoid marking an opened object as closed. */ + if (object->ondemand->ondemand_id <= 0) + cachefiles_ondemand_set_object_close(object); + spin_unlock(&object->ondemand->lock); + cachefiles_put_object(object, cachefiles_obj_put_ondemand_fd); + return ret; +} + +static void ondemand_object_worker(struct work_struct *work) +{ + struct cachefiles_ondemand_info *info = + container_of(work, struct cachefiles_ondemand_info, ondemand_work); + + cachefiles_ondemand_init_object(info->object); +} + +/* + * If there are any inflight or subsequent READ requests on the + * closed object, reopen it. + * Skip read requests whose related object is reopening. + */ +static struct cachefiles_req *cachefiles_ondemand_select_req(struct xa_state *xas, + unsigned long xa_max) +{ + struct cachefiles_req *req; + struct cachefiles_object *object; + struct cachefiles_ondemand_info *info; + + xas_for_each_marked(xas, req, xa_max, CACHEFILES_REQ_NEW) { + if (req->msg.opcode != CACHEFILES_OP_READ) + return req; + object = req->object; + info = object->ondemand; + if (cachefiles_ondemand_object_is_close(object)) { + cachefiles_ondemand_set_object_reopening(object); + queue_work(fscache_wq, &info->ondemand_work); + continue; + } + if (cachefiles_ondemand_object_is_reopening(object)) + continue; + return req; + } + return NULL; +} + +static inline bool cachefiles_ondemand_finish_req(struct cachefiles_req *req, + struct xa_state *xas, int err) +{ + if (unlikely(!xas || !req)) + return false; + + if (xa_cmpxchg(xas->xa, xas->xa_index, req, NULL, 0) != req) + return false; + + req->error = err; + complete(&req->done); + return true; +} + +ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, + char __user *_buffer, size_t buflen) +{ + struct cachefiles_req *req; + struct cachefiles_msg *msg; + size_t n; + int ret = 0; + struct ondemand_anon_file anon_file; + XA_STATE(xas, &cache->reqs, cache->req_id_next); + + xa_lock(&cache->reqs); + /* + * Cyclically search for a request that has not ever been processed, + * to prevent requests from being processed repeatedly, and make + * request distribution fair. + */ + req = cachefiles_ondemand_select_req(&xas, ULONG_MAX); + if (!req && cache->req_id_next > 0) { + xas_set(&xas, 0); + req = cachefiles_ondemand_select_req(&xas, cache->req_id_next - 1); + } + if (!req) { + xa_unlock(&cache->reqs); + return 0; + } + + msg = &req->msg; + n = msg->len; + + if (n > buflen) { + xa_unlock(&cache->reqs); + return -EMSGSIZE; + } + + xas_clear_mark(&xas, CACHEFILES_REQ_NEW); + cache->req_id_next = xas.xa_index + 1; + refcount_inc(&req->ref); + cachefiles_grab_object(req->object, cachefiles_obj_get_read_req); + xa_unlock(&cache->reqs); + + if (msg->opcode == CACHEFILES_OP_OPEN) { + ret = cachefiles_ondemand_get_fd(req, &anon_file); + if (ret) + goto out; + } + + msg->msg_id = xas.xa_index; + msg->object_id = req->object->ondemand->ondemand_id; + + if (copy_to_user(_buffer, msg, n) != 0) + ret = -EFAULT; + + if (msg->opcode == CACHEFILES_OP_OPEN) { + if (ret < 0) { + fput(anon_file.file); + put_unused_fd(anon_file.fd); + goto out; + } + fd_install(anon_file.fd, anon_file.file); + } +out: + cachefiles_put_object(req->object, cachefiles_obj_put_read_req); + /* Remove error request and CLOSE request has no reply */ + if (ret || msg->opcode == CACHEFILES_OP_CLOSE) + cachefiles_ondemand_finish_req(req, &xas, ret); + cachefiles_req_put(req); + return ret ? ret : n; +} + +typedef int (*init_req_fn)(struct cachefiles_req *req, void *private); + +static int cachefiles_ondemand_send_req(struct cachefiles_object *object, + enum cachefiles_opcode opcode, + size_t data_len, + init_req_fn init_req, + void *private) +{ + struct cachefiles_cache *cache = object->volume->cache; + struct cachefiles_req *req = NULL; + XA_STATE(xas, &cache->reqs, 0); + int ret; + + if (!test_bit(CACHEFILES_ONDEMAND_MODE, &cache->flags)) + return 0; + + if (test_bit(CACHEFILES_DEAD, &cache->flags)) { + ret = -EIO; + goto out; + } + + req = kzalloc(sizeof(*req) + data_len, GFP_KERNEL); + if (!req) { + ret = -ENOMEM; + goto out; + } + + refcount_set(&req->ref, 1); + req->object = object; + init_completion(&req->done); + req->msg.opcode = opcode; + req->msg.len = sizeof(struct cachefiles_msg) + data_len; + + ret = init_req(req, private); + if (ret) + goto out; + + do { + /* + * Stop enqueuing the request when daemon is dying. The + * following two operations need to be atomic as a whole. + * 1) check cache state, and + * 2) enqueue request if cache is alive. + * Otherwise the request may be enqueued after xarray has been + * flushed, leaving the orphan request never being completed. + * + * CPU 1 CPU 2 + * ===== ===== + * test CACHEFILES_DEAD bit + * set CACHEFILES_DEAD bit + * flush requests in the xarray + * enqueue the request + */ + xas_lock(&xas); + + if (test_bit(CACHEFILES_DEAD, &cache->flags) || + cachefiles_ondemand_object_is_dropping(object)) { + xas_unlock(&xas); + ret = -EIO; + goto out; + } + + /* coupled with the barrier in cachefiles_flush_reqs() */ + smp_mb(); + + if (opcode == CACHEFILES_OP_CLOSE && + !cachefiles_ondemand_object_is_open(object)) { + WARN_ON_ONCE(object->ondemand->ondemand_id == 0); + xas_unlock(&xas); + ret = -EIO; + goto out; + } + + /* + * Cyclically find a free xas to avoid msg_id reuse that would + * cause the daemon to successfully copen a stale msg_id. + */ + xas.xa_index = cache->msg_id_next; + xas_find_marked(&xas, UINT_MAX, XA_FREE_MARK); + if (xas.xa_node == XAS_RESTART) { + xas.xa_index = 0; + xas_find_marked(&xas, cache->msg_id_next - 1, XA_FREE_MARK); + } + if (xas.xa_node == XAS_RESTART) + xas_set_err(&xas, -EBUSY); + + xas_store(&xas, req); + if (xas_valid(&xas)) { + cache->msg_id_next = xas.xa_index + 1; + xas_clear_mark(&xas, XA_FREE_MARK); + xas_set_mark(&xas, CACHEFILES_REQ_NEW); + } + xas_unlock(&xas); + } while (xas_nomem(&xas, GFP_KERNEL)); + + ret = xas_error(&xas); + if (ret) + goto out; + + wake_up_all(&cache->daemon_pollwq); +wait: + ret = wait_for_completion_killable(&req->done); + if (!ret) { + ret = req->error; + } else { + ret = -EINTR; + if (!cachefiles_ondemand_finish_req(req, &xas, ret)) { + /* Someone will complete it soon. */ + cpu_relax(); + goto wait; + } + } + cachefiles_req_put(req); + return ret; +out: + /* Reset the object to close state in error handling path. + * If error occurs after creating the anonymous fd, + * cachefiles_ondemand_fd_release() will set object to close. + */ + if (opcode == CACHEFILES_OP_OPEN && + !cachefiles_ondemand_object_is_dropping(object)) + cachefiles_ondemand_set_object_close(object); + kfree(req); + return ret; +} + +static int cachefiles_ondemand_init_open_req(struct cachefiles_req *req, + void *private) +{ + struct cachefiles_object *object = req->object; + struct fscache_cookie *cookie = object->cookie; + struct fscache_volume *volume = object->volume->vcookie; + struct cachefiles_open *load = (void *)req->msg.data; + size_t volume_key_size, cookie_key_size; + void *volume_key, *cookie_key; + + /* + * Volume key is a NUL-terminated string. key[0] stores strlen() of the + * string, followed by the content of the string (excluding '\0'). + */ + volume_key_size = volume->key[0] + 1; + volume_key = volume->key + 1; + + /* Cookie key is binary data, which is netfs specific. */ + cookie_key_size = cookie->key_len; + cookie_key = fscache_get_key(cookie); + + if (!(object->cookie->advice & FSCACHE_ADV_WANT_CACHE_SIZE)) { + pr_err("WANT_CACHE_SIZE is needed for on-demand mode\n"); + return -EINVAL; + } + + load->volume_key_size = volume_key_size; + load->cookie_key_size = cookie_key_size; + memcpy(load->data, volume_key, volume_key_size); + memcpy(load->data + volume_key_size, cookie_key, cookie_key_size); + + return 0; +} + +static int cachefiles_ondemand_init_close_req(struct cachefiles_req *req, + void *private) +{ + struct cachefiles_object *object = req->object; + + if (!cachefiles_ondemand_object_is_open(object)) + return -ENOENT; + + trace_cachefiles_ondemand_close(object, &req->msg); + return 0; +} + +struct cachefiles_read_ctx { + loff_t off; + size_t len; +}; + +static int cachefiles_ondemand_init_read_req(struct cachefiles_req *req, + void *private) +{ + struct cachefiles_object *object = req->object; + struct cachefiles_read *load = (void *)req->msg.data; + struct cachefiles_read_ctx *read_ctx = private; + + load->off = read_ctx->off; + load->len = read_ctx->len; + trace_cachefiles_ondemand_read(object, &req->msg, load); + return 0; +} + +int cachefiles_ondemand_init_object(struct cachefiles_object *object) +{ + struct fscache_cookie *cookie = object->cookie; + struct fscache_volume *volume = object->volume->vcookie; + size_t volume_key_size, cookie_key_size, data_len; + + if (!object->ondemand) + return 0; + + /* + * CacheFiles will firstly check the cache file under the root cache + * directory. If the coherency check failed, it will fallback to + * creating a new tmpfile as the cache file. Reuse the previously + * allocated object ID if any. + */ + if (cachefiles_ondemand_object_is_open(object)) + return 0; + + volume_key_size = volume->key[0] + 1; + cookie_key_size = cookie->key_len; + data_len = sizeof(struct cachefiles_open) + + volume_key_size + cookie_key_size; + + return cachefiles_ondemand_send_req(object, CACHEFILES_OP_OPEN, + data_len, cachefiles_ondemand_init_open_req, NULL); +} + +void cachefiles_ondemand_clean_object(struct cachefiles_object *object) +{ + unsigned long index; + struct cachefiles_req *req; + struct cachefiles_cache *cache; + + if (!object->ondemand) + return; + + cachefiles_ondemand_send_req(object, CACHEFILES_OP_CLOSE, 0, + cachefiles_ondemand_init_close_req, NULL); + + if (!object->ondemand->ondemand_id) + return; + + /* Cancel all requests for the object that is being dropped. */ + cache = object->volume->cache; + xa_lock(&cache->reqs); + cachefiles_ondemand_set_object_dropping(object); + xa_for_each(&cache->reqs, index, req) { + if (req->object == object) { + req->error = -EIO; + complete(&req->done); + __xa_erase(&cache->reqs, index); + } + } + xa_unlock(&cache->reqs); + + /* Wait for ondemand_object_worker() to finish to avoid UAF. */ + cancel_work_sync(&object->ondemand->ondemand_work); +} + +int cachefiles_ondemand_init_obj_info(struct cachefiles_object *object, + struct cachefiles_volume *volume) +{ + if (!cachefiles_in_ondemand_mode(volume->cache)) + return 0; + + object->ondemand = kzalloc(sizeof(struct cachefiles_ondemand_info), + GFP_KERNEL); + if (!object->ondemand) + return -ENOMEM; + + object->ondemand->object = object; + spin_lock_init(&object->ondemand->lock); + INIT_WORK(&object->ondemand->ondemand_work, ondemand_object_worker); + return 0; +} + +void cachefiles_ondemand_deinit_obj_info(struct cachefiles_object *object) +{ + kfree(object->ondemand); + object->ondemand = NULL; +} + +int cachefiles_ondemand_read(struct cachefiles_object *object, + loff_t pos, size_t len) +{ + struct cachefiles_read_ctx read_ctx = {pos, len}; + + return cachefiles_ondemand_send_req(object, CACHEFILES_OP_READ, + sizeof(struct cachefiles_read), + cachefiles_ondemand_init_read_req, &read_ctx); +} diff --git a/fs/cachefiles/proc.c b/fs/cachefiles/proc.c deleted file mode 100644 index eccd33941199..000000000000 --- a/fs/cachefiles/proc.c +++ /dev/null @@ -1,134 +0,0 @@ -/* CacheFiles statistics - * - * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. - */ - -#include <linux/module.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#include "internal.h" - -atomic_t cachefiles_lookup_histogram[HZ]; -atomic_t cachefiles_mkdir_histogram[HZ]; -atomic_t cachefiles_create_histogram[HZ]; - -/* - * display the latency histogram - */ -static int cachefiles_histogram_show(struct seq_file *m, void *v) -{ - unsigned long index; - unsigned x, y, z, t; - - switch ((unsigned long) v) { - case 1: - seq_puts(m, "JIFS SECS LOOKUPS MKDIRS CREATES\n"); - return 0; - case 2: - seq_puts(m, "===== ===== ========= ========= =========\n"); - return 0; - default: - index = (unsigned long) v - 3; - x = atomic_read(&cachefiles_lookup_histogram[index]); - y = atomic_read(&cachefiles_mkdir_histogram[index]); - z = atomic_read(&cachefiles_create_histogram[index]); - if (x == 0 && y == 0 && z == 0) - return 0; - - t = (index * 1000) / HZ; - - seq_printf(m, "%4lu 0.%03u %9u %9u %9u\n", index, t, x, y, z); - return 0; - } -} - -/* - * set up the iterator to start reading from the first line - */ -static void *cachefiles_histogram_start(struct seq_file *m, loff_t *_pos) -{ - if ((unsigned long long)*_pos >= HZ + 2) - return NULL; - if (*_pos == 0) - *_pos = 1; - return (void *)(unsigned long) *_pos; -} - -/* - * move to the next line - */ -static void *cachefiles_histogram_next(struct seq_file *m, void *v, loff_t *pos) -{ - (*pos)++; - return (unsigned long long)*pos > HZ + 2 ? - NULL : (void *)(unsigned long) *pos; -} - -/* - * clean up after reading - */ -static void cachefiles_histogram_stop(struct seq_file *m, void *v) -{ -} - -static const struct seq_operations cachefiles_histogram_ops = { - .start = cachefiles_histogram_start, - .stop = cachefiles_histogram_stop, - .next = cachefiles_histogram_next, - .show = cachefiles_histogram_show, -}; - -/* - * open "/proc/fs/cachefiles/XXX" which provide statistics summaries - */ -static int cachefiles_histogram_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &cachefiles_histogram_ops); -} - -static const struct file_operations cachefiles_histogram_fops = { - .owner = THIS_MODULE, - .open = cachefiles_histogram_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -/* - * initialise the /proc/fs/cachefiles/ directory - */ -int __init cachefiles_proc_init(void) -{ - _enter(""); - - if (!proc_mkdir("fs/cachefiles", NULL)) - goto error_dir; - - if (!proc_create("fs/cachefiles/histogram", S_IFREG | 0444, NULL, - &cachefiles_histogram_fops)) - goto error_histogram; - - _leave(" = 0"); - return 0; - -error_histogram: - remove_proc_entry("fs/cachefiles", NULL); -error_dir: - _leave(" = -ENOMEM"); - return -ENOMEM; -} - -/* - * clean up the /proc/fs/cachefiles/ directory - */ -void cachefiles_proc_cleanup(void) -{ - remove_proc_entry("fs/cachefiles/histogram", NULL); - remove_proc_entry("fs/cachefiles", NULL); -} diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c deleted file mode 100644 index ebaff368120d..000000000000 --- a/fs/cachefiles/rdwr.c +++ /dev/null @@ -1,990 +0,0 @@ -/* Storage object read/write - * - * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. - */ - -#include <linux/mount.h> -#include <linux/slab.h> -#include <linux/file.h> -#include <linux/swap.h> -#include "internal.h" - -/* - * detect wake up events generated by the unlocking of pages in which we're - * interested - * - we use this to detect read completion of backing pages - * - the caller holds the waitqueue lock - */ -static int cachefiles_read_waiter(wait_queue_t *wait, unsigned mode, - int sync, void *_key) -{ - struct cachefiles_one_read *monitor = - container_of(wait, struct cachefiles_one_read, monitor); - struct cachefiles_object *object; - struct wait_bit_key *key = _key; - struct page *page = wait->private; - - ASSERT(key); - - _enter("{%lu},%u,%d,{%p,%u}", - monitor->netfs_page->index, mode, sync, - key->flags, key->bit_nr); - - if (key->flags != &page->flags || - key->bit_nr != PG_locked) - return 0; - - _debug("--- monitor %p %lx ---", page, page->flags); - - if (!PageUptodate(page) && !PageError(page)) { - /* unlocked, not uptodate and not erronous? */ - _debug("page probably truncated"); - } - - /* remove from the waitqueue */ - list_del(&wait->task_list); - - /* move onto the action list and queue for FS-Cache thread pool */ - ASSERT(monitor->op); - - object = container_of(monitor->op->op.object, - struct cachefiles_object, fscache); - - spin_lock(&object->work_lock); - list_add_tail(&monitor->op_link, &monitor->op->to_do); - spin_unlock(&object->work_lock); - - fscache_enqueue_retrieval(monitor->op); - return 0; -} - -/* - * handle a probably truncated page - * - check to see if the page is still relevant and reissue the read if - * possible - * - return -EIO on error, -ENODATA if the page is gone, -EINPROGRESS if we - * must wait again and 0 if successful - */ -static int cachefiles_read_reissue(struct cachefiles_object *object, - struct cachefiles_one_read *monitor) -{ - struct address_space *bmapping = object->backer->d_inode->i_mapping; - struct page *backpage = monitor->back_page, *backpage2; - int ret; - - _enter("{ino=%lx},{%lx,%lx}", - object->backer->d_inode->i_ino, - backpage->index, backpage->flags); - - /* skip if the page was truncated away completely */ - if (backpage->mapping != bmapping) { - _leave(" = -ENODATA [mapping]"); - return -ENODATA; - } - - backpage2 = find_get_page(bmapping, backpage->index); - if (!backpage2) { - _leave(" = -ENODATA [gone]"); - return -ENODATA; - } - - if (backpage != backpage2) { - put_page(backpage2); - _leave(" = -ENODATA [different]"); - return -ENODATA; - } - - /* the page is still there and we already have a ref on it, so we don't - * need a second */ - put_page(backpage2); - - INIT_LIST_HEAD(&monitor->op_link); - add_page_wait_queue(backpage, &monitor->monitor); - - if (trylock_page(backpage)) { - ret = -EIO; - if (PageError(backpage)) - goto unlock_discard; - ret = 0; - if (PageUptodate(backpage)) - goto unlock_discard; - - _debug("reissue read"); - ret = bmapping->a_ops->readpage(NULL, backpage); - if (ret < 0) - goto unlock_discard; - } - - /* but the page may have been read before the monitor was installed, so - * the monitor may miss the event - so we have to ensure that we do get - * one in such a case */ - if (trylock_page(backpage)) { - _debug("jumpstart %p {%lx}", backpage, backpage->flags); - unlock_page(backpage); - } - - /* it'll reappear on the todo list */ - _leave(" = -EINPROGRESS"); - return -EINPROGRESS; - -unlock_discard: - unlock_page(backpage); - spin_lock_irq(&object->work_lock); - list_del(&monitor->op_link); - spin_unlock_irq(&object->work_lock); - _leave(" = %d", ret); - return ret; -} - -/* - * copy data from backing pages to netfs pages to complete a read operation - * - driven by FS-Cache's thread pool - */ -static void cachefiles_read_copier(struct fscache_operation *_op) -{ - struct cachefiles_one_read *monitor; - struct cachefiles_object *object; - struct fscache_retrieval *op; - struct pagevec pagevec; - int error, max; - - op = container_of(_op, struct fscache_retrieval, op); - object = container_of(op->op.object, - struct cachefiles_object, fscache); - - _enter("{ino=%lu}", object->backer->d_inode->i_ino); - - pagevec_init(&pagevec, 0); - - max = 8; - spin_lock_irq(&object->work_lock); - - while (!list_empty(&op->to_do)) { - monitor = list_entry(op->to_do.next, - struct cachefiles_one_read, op_link); - list_del(&monitor->op_link); - - spin_unlock_irq(&object->work_lock); - - _debug("- copy {%lu}", monitor->back_page->index); - - recheck: - if (test_bit(FSCACHE_COOKIE_INVALIDATING, - &object->fscache.cookie->flags)) { - error = -ESTALE; - } else if (PageUptodate(monitor->back_page)) { - copy_highpage(monitor->netfs_page, monitor->back_page); - fscache_mark_page_cached(monitor->op, - monitor->netfs_page); - error = 0; - } else if (!PageError(monitor->back_page)) { - /* the page has probably been truncated */ - error = cachefiles_read_reissue(object, monitor); - if (error == -EINPROGRESS) - goto next; - goto recheck; - } else { - cachefiles_io_error_obj( - object, - "Readpage failed on backing file %lx", - (unsigned long) monitor->back_page->flags); - error = -EIO; - } - - page_cache_release(monitor->back_page); - - fscache_end_io(op, monitor->netfs_page, error); - page_cache_release(monitor->netfs_page); - fscache_retrieval_complete(op, 1); - fscache_put_retrieval(op); - kfree(monitor); - - next: - /* let the thread pool have some air occasionally */ - max--; - if (max < 0 || need_resched()) { - if (!list_empty(&op->to_do)) - fscache_enqueue_retrieval(op); - _leave(" [maxed out]"); - return; - } - - spin_lock_irq(&object->work_lock); - } - - spin_unlock_irq(&object->work_lock); - _leave(""); -} - -/* - * read the corresponding page to the given set from the backing file - * - an uncertain page is simply discarded, to be tried again another time - */ -static int cachefiles_read_backing_file_one(struct cachefiles_object *object, - struct fscache_retrieval *op, - struct page *netpage) -{ - struct cachefiles_one_read *monitor; - struct address_space *bmapping; - struct page *newpage, *backpage; - int ret; - - _enter(""); - - _debug("read back %p{%lu,%d}", - netpage, netpage->index, page_count(netpage)); - - monitor = kzalloc(sizeof(*monitor), cachefiles_gfp); - if (!monitor) - goto nomem; - - monitor->netfs_page = netpage; - monitor->op = fscache_get_retrieval(op); - - init_waitqueue_func_entry(&monitor->monitor, cachefiles_read_waiter); - - /* attempt to get hold of the backing page */ - bmapping = object->backer->d_inode->i_mapping; - newpage = NULL; - - for (;;) { - backpage = find_get_page(bmapping, netpage->index); - if (backpage) - goto backing_page_already_present; - - if (!newpage) { - newpage = __page_cache_alloc(cachefiles_gfp | - __GFP_COLD); - if (!newpage) - goto nomem_monitor; - } - - ret = add_to_page_cache(newpage, bmapping, - netpage->index, cachefiles_gfp); - if (ret == 0) - goto installed_new_backing_page; - if (ret != -EEXIST) - goto nomem_page; - } - - /* we've installed a new backing page, so now we need to add it - * to the LRU list and start it reading */ -installed_new_backing_page: - _debug("- new %p", newpage); - - backpage = newpage; - newpage = NULL; - - lru_cache_add_file(backpage); - -read_backing_page: - ret = bmapping->a_ops->readpage(NULL, backpage); - if (ret < 0) - goto read_error; - - /* set the monitor to transfer the data across */ -monitor_backing_page: - _debug("- monitor add"); - - /* install the monitor */ - page_cache_get(monitor->netfs_page); - page_cache_get(backpage); - monitor->back_page = backpage; - monitor->monitor.private = backpage; - add_page_wait_queue(backpage, &monitor->monitor); - monitor = NULL; - - /* but the page may have been read before the monitor was installed, so - * the monitor may miss the event - so we have to ensure that we do get - * one in such a case */ - if (trylock_page(backpage)) { - _debug("jumpstart %p {%lx}", backpage, backpage->flags); - unlock_page(backpage); - } - goto success; - - /* if the backing page is already present, it can be in one of - * three states: read in progress, read failed or read okay */ -backing_page_already_present: - _debug("- present"); - - if (newpage) { - page_cache_release(newpage); - newpage = NULL; - } - - if (PageError(backpage)) - goto io_error; - - if (PageUptodate(backpage)) - goto backing_page_already_uptodate; - - if (!trylock_page(backpage)) - goto monitor_backing_page; - _debug("read %p {%lx}", backpage, backpage->flags); - goto read_backing_page; - - /* the backing page is already up to date, attach the netfs - * page to the pagecache and LRU and copy the data across */ -backing_page_already_uptodate: - _debug("- uptodate"); - - fscache_mark_page_cached(op, netpage); - - copy_highpage(netpage, backpage); - fscache_end_io(op, netpage, 0); - fscache_retrieval_complete(op, 1); - -success: - _debug("success"); - ret = 0; - -out: - if (backpage) - page_cache_release(backpage); - if (monitor) { - fscache_put_retrieval(monitor->op); - kfree(monitor); - } - _leave(" = %d", ret); - return ret; - -read_error: - _debug("read error %d", ret); - if (ret == -ENOMEM) { - fscache_retrieval_complete(op, 1); - goto out; - } -io_error: - cachefiles_io_error_obj(object, "Page read error on backing file"); - fscache_retrieval_complete(op, 1); - ret = -ENOBUFS; - goto out; - -nomem_page: - page_cache_release(newpage); -nomem_monitor: - fscache_put_retrieval(monitor->op); - kfree(monitor); -nomem: - fscache_retrieval_complete(op, 1); - _leave(" = -ENOMEM"); - return -ENOMEM; -} - -/* - * read a page from the cache or allocate a block in which to store it - * - cache withdrawal is prevented by the caller - * - returns -EINTR if interrupted - * - returns -ENOMEM if ran out of memory - * - returns -ENOBUFS if no buffers can be made available - * - returns -ENOBUFS if page is beyond EOF - * - if the page is backed by a block in the cache: - * - a read will be started which will call the callback on completion - * - 0 will be returned - * - else if the page is unbacked: - * - the metadata will be retained - * - -ENODATA will be returned - */ -int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, - struct page *page, - gfp_t gfp) -{ - struct cachefiles_object *object; - struct cachefiles_cache *cache; - struct pagevec pagevec; - struct inode *inode; - sector_t block0, block; - unsigned shift; - int ret; - - object = container_of(op->op.object, - struct cachefiles_object, fscache); - cache = container_of(object->fscache.cache, - struct cachefiles_cache, cache); - - _enter("{%p},{%lx},,,", object, page->index); - - if (!object->backer) - goto enobufs; - - inode = object->backer->d_inode; - ASSERT(S_ISREG(inode->i_mode)); - ASSERT(inode->i_mapping->a_ops->bmap); - ASSERT(inode->i_mapping->a_ops->readpages); - - /* calculate the shift required to use bmap */ - if (inode->i_sb->s_blocksize > PAGE_SIZE) - goto enobufs; - - shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits; - - op->op.flags &= FSCACHE_OP_KEEP_FLAGS; - op->op.flags |= FSCACHE_OP_ASYNC; - op->op.processor = cachefiles_read_copier; - - pagevec_init(&pagevec, 0); - - /* we assume the absence or presence of the first block is a good - * enough indication for the page as a whole - * - TODO: don't use bmap() for this as it is _not_ actually good - * enough for this as it doesn't indicate errors, but it's all we've - * got for the moment - */ - block0 = page->index; - block0 <<= shift; - - block = inode->i_mapping->a_ops->bmap(inode->i_mapping, block0); - _debug("%llx -> %llx", - (unsigned long long) block0, - (unsigned long long) block); - - if (block) { - /* submit the apparently valid page to the backing fs to be - * read from disk */ - ret = cachefiles_read_backing_file_one(object, op, page); - } else if (cachefiles_has_space(cache, 0, 1) == 0) { - /* there's space in the cache we can use */ - fscache_mark_page_cached(op, page); - fscache_retrieval_complete(op, 1); - ret = -ENODATA; - } else { - goto enobufs; - } - - _leave(" = %d", ret); - return ret; - -enobufs: - fscache_retrieval_complete(op, 1); - _leave(" = -ENOBUFS"); - return -ENOBUFS; -} - -/* - * read the corresponding pages to the given set from the backing file - * - any uncertain pages are simply discarded, to be tried again another time - */ -static int cachefiles_read_backing_file(struct cachefiles_object *object, - struct fscache_retrieval *op, - struct list_head *list) -{ - struct cachefiles_one_read *monitor = NULL; - struct address_space *bmapping = object->backer->d_inode->i_mapping; - struct page *newpage = NULL, *netpage, *_n, *backpage = NULL; - int ret = 0; - - _enter(""); - - list_for_each_entry_safe(netpage, _n, list, lru) { - list_del(&netpage->lru); - - _debug("read back %p{%lu,%d}", - netpage, netpage->index, page_count(netpage)); - - if (!monitor) { - monitor = kzalloc(sizeof(*monitor), cachefiles_gfp); - if (!monitor) - goto nomem; - - monitor->op = fscache_get_retrieval(op); - init_waitqueue_func_entry(&monitor->monitor, - cachefiles_read_waiter); - } - - for (;;) { - backpage = find_get_page(bmapping, netpage->index); - if (backpage) - goto backing_page_already_present; - - if (!newpage) { - newpage = __page_cache_alloc(cachefiles_gfp | - __GFP_COLD); - if (!newpage) - goto nomem; - } - - ret = add_to_page_cache(newpage, bmapping, - netpage->index, cachefiles_gfp); - if (ret == 0) - goto installed_new_backing_page; - if (ret != -EEXIST) - goto nomem; - } - - /* we've installed a new backing page, so now we need to add it - * to the LRU list and start it reading */ - installed_new_backing_page: - _debug("- new %p", newpage); - - backpage = newpage; - newpage = NULL; - - lru_cache_add_file(backpage); - - reread_backing_page: - ret = bmapping->a_ops->readpage(NULL, backpage); - if (ret < 0) - goto read_error; - - /* add the netfs page to the pagecache and LRU, and set the - * monitor to transfer the data across */ - monitor_backing_page: - _debug("- monitor add"); - - ret = add_to_page_cache(netpage, op->mapping, netpage->index, - cachefiles_gfp); - if (ret < 0) { - if (ret == -EEXIST) { - page_cache_release(netpage); - fscache_retrieval_complete(op, 1); - continue; - } - goto nomem; - } - - lru_cache_add_file(netpage); - - /* install a monitor */ - page_cache_get(netpage); - monitor->netfs_page = netpage; - - page_cache_get(backpage); - monitor->back_page = backpage; - monitor->monitor.private = backpage; - add_page_wait_queue(backpage, &monitor->monitor); - monitor = NULL; - - /* but the page may have been read before the monitor was - * installed, so the monitor may miss the event - so we have to - * ensure that we do get one in such a case */ - if (trylock_page(backpage)) { - _debug("2unlock %p {%lx}", backpage, backpage->flags); - unlock_page(backpage); - } - - page_cache_release(backpage); - backpage = NULL; - - page_cache_release(netpage); - netpage = NULL; - continue; - - /* if the backing page is already present, it can be in one of - * three states: read in progress, read failed or read okay */ - backing_page_already_present: - _debug("- present %p", backpage); - - if (PageError(backpage)) - goto io_error; - - if (PageUptodate(backpage)) - goto backing_page_already_uptodate; - - _debug("- not ready %p{%lx}", backpage, backpage->flags); - - if (!trylock_page(backpage)) - goto monitor_backing_page; - - if (PageError(backpage)) { - _debug("error %lx", backpage->flags); - unlock_page(backpage); - goto io_error; - } - - if (PageUptodate(backpage)) - goto backing_page_already_uptodate_unlock; - - /* we've locked a page that's neither up to date nor erroneous, - * so we need to attempt to read it again */ - goto reread_backing_page; - - /* the backing page is already up to date, attach the netfs - * page to the pagecache and LRU and copy the data across */ - backing_page_already_uptodate_unlock: - _debug("uptodate %lx", backpage->flags); - unlock_page(backpage); - backing_page_already_uptodate: - _debug("- uptodate"); - - ret = add_to_page_cache(netpage, op->mapping, netpage->index, - cachefiles_gfp); - if (ret < 0) { - if (ret == -EEXIST) { - page_cache_release(netpage); - fscache_retrieval_complete(op, 1); - continue; - } - goto nomem; - } - - copy_highpage(netpage, backpage); - - page_cache_release(backpage); - backpage = NULL; - - fscache_mark_page_cached(op, netpage); - - lru_cache_add_file(netpage); - - /* the netpage is unlocked and marked up to date here */ - fscache_end_io(op, netpage, 0); - page_cache_release(netpage); - netpage = NULL; - fscache_retrieval_complete(op, 1); - continue; - } - - netpage = NULL; - - _debug("out"); - -out: - /* tidy up */ - if (newpage) - page_cache_release(newpage); - if (netpage) - page_cache_release(netpage); - if (backpage) - page_cache_release(backpage); - if (monitor) { - fscache_put_retrieval(op); - kfree(monitor); - } - - list_for_each_entry_safe(netpage, _n, list, lru) { - list_del(&netpage->lru); - page_cache_release(netpage); - fscache_retrieval_complete(op, 1); - } - - _leave(" = %d", ret); - return ret; - -nomem: - _debug("nomem"); - ret = -ENOMEM; - goto record_page_complete; - -read_error: - _debug("read error %d", ret); - if (ret == -ENOMEM) - goto record_page_complete; -io_error: - cachefiles_io_error_obj(object, "Page read error on backing file"); - ret = -ENOBUFS; -record_page_complete: - fscache_retrieval_complete(op, 1); - goto out; -} - -/* - * read a list of pages from the cache or allocate blocks in which to store - * them - */ -int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op, - struct list_head *pages, - unsigned *nr_pages, - gfp_t gfp) -{ - struct cachefiles_object *object; - struct cachefiles_cache *cache; - struct list_head backpages; - struct pagevec pagevec; - struct inode *inode; - struct page *page, *_n; - unsigned shift, nrbackpages; - int ret, ret2, space; - - object = container_of(op->op.object, - struct cachefiles_object, fscache); - cache = container_of(object->fscache.cache, - struct cachefiles_cache, cache); - - _enter("{OBJ%x,%d},,%d,,", - object->fscache.debug_id, atomic_read(&op->op.usage), - *nr_pages); - - if (!object->backer) - goto all_enobufs; - - space = 1; - if (cachefiles_has_space(cache, 0, *nr_pages) < 0) - space = 0; - - inode = object->backer->d_inode; - ASSERT(S_ISREG(inode->i_mode)); - ASSERT(inode->i_mapping->a_ops->bmap); - ASSERT(inode->i_mapping->a_ops->readpages); - - /* calculate the shift required to use bmap */ - if (inode->i_sb->s_blocksize > PAGE_SIZE) - goto all_enobufs; - - shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits; - - pagevec_init(&pagevec, 0); - - op->op.flags &= FSCACHE_OP_KEEP_FLAGS; - op->op.flags |= FSCACHE_OP_ASYNC; - op->op.processor = cachefiles_read_copier; - - INIT_LIST_HEAD(&backpages); - nrbackpages = 0; - - ret = space ? -ENODATA : -ENOBUFS; - list_for_each_entry_safe(page, _n, pages, lru) { - sector_t block0, block; - - /* we assume the absence or presence of the first block is a - * good enough indication for the page as a whole - * - TODO: don't use bmap() for this as it is _not_ actually - * good enough for this as it doesn't indicate errors, but - * it's all we've got for the moment - */ - block0 = page->index; - block0 <<= shift; - - block = inode->i_mapping->a_ops->bmap(inode->i_mapping, - block0); - _debug("%llx -> %llx", - (unsigned long long) block0, - (unsigned long long) block); - - if (block) { - /* we have data - add it to the list to give to the - * backing fs */ - list_move(&page->lru, &backpages); - (*nr_pages)--; - nrbackpages++; - } else if (space && pagevec_add(&pagevec, page) == 0) { - fscache_mark_pages_cached(op, &pagevec); - fscache_retrieval_complete(op, 1); - ret = -ENODATA; - } else { - fscache_retrieval_complete(op, 1); - } - } - - if (pagevec_count(&pagevec) > 0) - fscache_mark_pages_cached(op, &pagevec); - - if (list_empty(pages)) - ret = 0; - - /* submit the apparently valid pages to the backing fs to be read from - * disk */ - if (nrbackpages > 0) { - ret2 = cachefiles_read_backing_file(object, op, &backpages); - if (ret2 == -ENOMEM || ret2 == -EINTR) - ret = ret2; - } - - _leave(" = %d [nr=%u%s]", - ret, *nr_pages, list_empty(pages) ? " empty" : ""); - return ret; - -all_enobufs: - fscache_retrieval_complete(op, *nr_pages); - return -ENOBUFS; -} - -/* - * allocate a block in the cache in which to store a page - * - cache withdrawal is prevented by the caller - * - returns -EINTR if interrupted - * - returns -ENOMEM if ran out of memory - * - returns -ENOBUFS if no buffers can be made available - * - returns -ENOBUFS if page is beyond EOF - * - otherwise: - * - the metadata will be retained - * - 0 will be returned - */ -int cachefiles_allocate_page(struct fscache_retrieval *op, - struct page *page, - gfp_t gfp) -{ - struct cachefiles_object *object; - struct cachefiles_cache *cache; - int ret; - - object = container_of(op->op.object, - struct cachefiles_object, fscache); - cache = container_of(object->fscache.cache, - struct cachefiles_cache, cache); - - _enter("%p,{%lx},", object, page->index); - - ret = cachefiles_has_space(cache, 0, 1); - if (ret == 0) - fscache_mark_page_cached(op, page); - else - ret = -ENOBUFS; - - fscache_retrieval_complete(op, 1); - _leave(" = %d", ret); - return ret; -} - -/* - * allocate blocks in the cache in which to store a set of pages - * - cache withdrawal is prevented by the caller - * - returns -EINTR if interrupted - * - returns -ENOMEM if ran out of memory - * - returns -ENOBUFS if some buffers couldn't be made available - * - returns -ENOBUFS if some pages are beyond EOF - * - otherwise: - * - -ENODATA will be returned - * - metadata will be retained for any page marked - */ -int cachefiles_allocate_pages(struct fscache_retrieval *op, - struct list_head *pages, - unsigned *nr_pages, - gfp_t gfp) -{ - struct cachefiles_object *object; - struct cachefiles_cache *cache; - struct pagevec pagevec; - struct page *page; - int ret; - - object = container_of(op->op.object, - struct cachefiles_object, fscache); - cache = container_of(object->fscache.cache, - struct cachefiles_cache, cache); - - _enter("%p,,,%d,", object, *nr_pages); - - ret = cachefiles_has_space(cache, 0, *nr_pages); - if (ret == 0) { - pagevec_init(&pagevec, 0); - - list_for_each_entry(page, pages, lru) { - if (pagevec_add(&pagevec, page) == 0) - fscache_mark_pages_cached(op, &pagevec); - } - - if (pagevec_count(&pagevec) > 0) - fscache_mark_pages_cached(op, &pagevec); - ret = -ENODATA; - } else { - ret = -ENOBUFS; - } - - fscache_retrieval_complete(op, *nr_pages); - _leave(" = %d", ret); - return ret; -} - -/* - * request a page be stored in the cache - * - cache withdrawal is prevented by the caller - * - this request may be ignored if there's no cache block available, in which - * case -ENOBUFS will be returned - * - if the op is in progress, 0 will be returned - */ -int cachefiles_write_page(struct fscache_storage *op, struct page *page) -{ - struct cachefiles_object *object; - struct cachefiles_cache *cache; - mm_segment_t old_fs; - struct file *file; - struct path path; - loff_t pos, eof; - size_t len; - void *data; - int ret; - - ASSERT(op != NULL); - ASSERT(page != NULL); - - object = container_of(op->op.object, - struct cachefiles_object, fscache); - - _enter("%p,%p{%lx},,,", object, page, page->index); - - if (!object->backer) { - _leave(" = -ENOBUFS"); - return -ENOBUFS; - } - - ASSERT(S_ISREG(object->backer->d_inode->i_mode)); - - cache = container_of(object->fscache.cache, - struct cachefiles_cache, cache); - - /* write the page to the backing filesystem and let it store it in its - * own time */ - path.mnt = cache->mnt; - path.dentry = object->backer; - file = dentry_open(&path, O_RDWR | O_LARGEFILE, cache->cache_cred); - if (IS_ERR(file)) { - ret = PTR_ERR(file); - } else { - ret = -EIO; - if (file->f_op->write) { - pos = (loff_t) page->index << PAGE_SHIFT; - - /* we mustn't write more data than we have, so we have - * to beware of a partial page at EOF */ - eof = object->fscache.store_limit_l; - len = PAGE_SIZE; - if (eof & ~PAGE_MASK) { - ASSERTCMP(pos, <, eof); - if (eof - pos < PAGE_SIZE) { - _debug("cut short %llx to %llx", - pos, eof); - len = eof - pos; - ASSERTCMP(pos + len, ==, eof); - } - } - - data = kmap(page); - file_start_write(file); - old_fs = get_fs(); - set_fs(KERNEL_DS); - ret = file->f_op->write( - file, (const void __user *) data, len, &pos); - set_fs(old_fs); - kunmap(page); - file_end_write(file); - if (ret != len) - ret = -EIO; - } - fput(file); - } - - if (ret < 0) { - if (ret == -EIO) - cachefiles_io_error_obj( - object, "Write page to backing file failed"); - ret = -ENOBUFS; - } - - _leave(" = %d", ret); - return ret; -} - -/* - * detach a backing block from a page - * - cache withdrawal is prevented by the caller - */ -void cachefiles_uncache_page(struct fscache_object *_object, struct page *page) -{ - struct cachefiles_object *object; - struct cachefiles_cache *cache; - - object = container_of(_object, struct cachefiles_object, fscache); - cache = container_of(object->fscache.cache, - struct cachefiles_cache, cache); - - _enter("%p,{%lu}", object, page->index); - - spin_unlock(&object->fscache.cookie->lock); -} diff --git a/fs/cachefiles/security.c b/fs/cachefiles/security.c index 039b5011d83b..fc6611886b3b 100644 --- a/fs/cachefiles/security.c +++ b/fs/cachefiles/security.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* CacheFiles security management * - * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2007, 2021 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. */ #include <linux/fs.h> @@ -22,7 +18,7 @@ int cachefiles_get_security_ID(struct cachefiles_cache *cache) struct cred *new; int ret; - _enter("{%s}", cache->secctx); + _enter("{%u}", cache->have_secid ? cache->secid : 0); new = prepare_kernel_cred(current); if (!new) { @@ -30,13 +26,11 @@ int cachefiles_get_security_ID(struct cachefiles_cache *cache) goto error; } - if (cache->secctx) { - ret = set_security_override_from_ctx(new, cache->secctx); + if (cache->have_secid) { + ret = set_security_override(new, cache->secid); if (ret < 0) { put_cred(new); - printk(KERN_ERR "CacheFiles:" - " Security denies permission to nominate" - " security context: error %d\n", + pr_err("Security denies permission to nominate security context: error %d\n", ret); goto error; } @@ -57,18 +51,16 @@ static int cachefiles_check_cache_dir(struct cachefiles_cache *cache, { int ret; - ret = security_inode_mkdir(root->d_inode, root, 0); + ret = security_inode_mkdir(d_backing_inode(root), root, 0); if (ret < 0) { - printk(KERN_ERR "CacheFiles:" - " Security denies permission to make dirs: error %d", + pr_err("Security denies permission to make dirs: error %d", ret); return ret; } - ret = security_inode_create(root->d_inode, root, 0); + ret = security_inode_create(d_backing_inode(root), root, 0); if (ret < 0) - printk(KERN_ERR "CacheFiles:" - " Security denies permission to create files: error %d", + pr_err("Security denies permission to create files: error %d", ret); return ret; @@ -99,7 +91,7 @@ int cachefiles_determine_cache_security(struct cachefiles_cache *cache, /* use the cache root dir's security context as the basis with * which create files */ - ret = set_create_files_as(new, root->d_inode); + ret = set_create_files_as(new, d_backing_inode(root)); if (ret < 0) { abort_creds(new); cachefiles_begin_secure(cache, _saved_cred); diff --git a/fs/cachefiles/volume.c b/fs/cachefiles/volume.c new file mode 100644 index 000000000000..90ba926f488e --- /dev/null +++ b/fs/cachefiles/volume.c @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Volume handling. + * + * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/fs.h> +#include <linux/slab.h> +#include <linux/namei.h> +#include "internal.h" +#include <trace/events/fscache.h> + +/* + * Allocate and set up a volume representation. We make sure all the fanout + * directories are created and pinned. + */ +void cachefiles_acquire_volume(struct fscache_volume *vcookie) +{ + struct cachefiles_volume *volume; + struct cachefiles_cache *cache = vcookie->cache->cache_priv; + const struct cred *saved_cred; + struct dentry *vdentry, *fan; + size_t len; + char *name; + bool is_new = false; + int ret, n_accesses, i; + + _enter(""); + + volume = kzalloc(sizeof(struct cachefiles_volume), GFP_KERNEL); + if (!volume) + return; + volume->vcookie = vcookie; + volume->cache = cache; + INIT_LIST_HEAD(&volume->cache_link); + + cachefiles_begin_secure(cache, &saved_cred); + + len = vcookie->key[0]; + name = kmalloc(len + 3, GFP_NOFS); + if (!name) + goto error_vol; + name[0] = 'I'; + memcpy(name + 1, vcookie->key + 1, len); + name[len + 1] = 0; + +retry: + vdentry = cachefiles_get_directory(cache, cache->store, name, &is_new); + if (IS_ERR(vdentry)) + goto error_name; + volume->dentry = vdentry; + + if (is_new) { + if (!cachefiles_set_volume_xattr(volume)) + goto error_dir; + } else { + ret = cachefiles_check_volume_xattr(volume); + if (ret < 0) { + if (ret != -ESTALE) + goto error_dir; + vdentry = start_removing_dentry(cache->store, vdentry); + if (!IS_ERR(vdentry)) + cachefiles_bury_object(cache, NULL, cache->store, + vdentry, + FSCACHE_VOLUME_IS_WEIRD); + cachefiles_put_directory(volume->dentry); + cond_resched(); + goto retry; + } + } + + for (i = 0; i < 256; i++) { + sprintf(name, "@%02x", i); + fan = cachefiles_get_directory(cache, vdentry, name, NULL); + if (IS_ERR(fan)) + goto error_fan; + volume->fanout[i] = fan; + } + + cachefiles_end_secure(cache, saved_cred); + + vcookie->cache_priv = volume; + n_accesses = atomic_inc_return(&vcookie->n_accesses); /* Stop wakeups on dec-to-0 */ + trace_fscache_access_volume(vcookie->debug_id, 0, + refcount_read(&vcookie->ref), + n_accesses, fscache_access_cache_pin); + + spin_lock(&cache->object_list_lock); + list_add(&volume->cache_link, &volume->cache->volumes); + spin_unlock(&cache->object_list_lock); + + kfree(name); + return; + +error_fan: + for (i = 0; i < 256; i++) + cachefiles_put_directory(volume->fanout[i]); +error_dir: + cachefiles_put_directory(volume->dentry); +error_name: + kfree(name); +error_vol: + kfree(volume); + cachefiles_end_secure(cache, saved_cred); +} + +/* + * Release a volume representation. + */ +static void __cachefiles_free_volume(struct cachefiles_volume *volume) +{ + int i; + + _enter(""); + + volume->vcookie->cache_priv = NULL; + + for (i = 0; i < 256; i++) + cachefiles_put_directory(volume->fanout[i]); + cachefiles_put_directory(volume->dentry); + kfree(volume); +} + +void cachefiles_free_volume(struct fscache_volume *vcookie) +{ + struct cachefiles_volume *volume = vcookie->cache_priv; + + if (volume) { + spin_lock(&volume->cache->object_list_lock); + list_del_init(&volume->cache_link); + spin_unlock(&volume->cache->object_list_lock); + __cachefiles_free_volume(volume); + } +} + +void cachefiles_withdraw_volume(struct cachefiles_volume *volume) +{ + cachefiles_set_volume_xattr(volume); + __cachefiles_free_volume(volume); +} diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c index 2476e5162609..52383b1d0ba6 100644 --- a/fs/cachefiles/xattr.c +++ b/fs/cachefiles/xattr.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* CacheFiles extended attribute management * - * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. */ #include <linux/module.h> @@ -19,272 +15,290 @@ #include <linux/slab.h> #include "internal.h" +#define CACHEFILES_COOKIE_TYPE_DATA 1 + +struct cachefiles_xattr { + __be64 object_size; /* Actual size of the object */ + __be64 zero_point; /* Size after which server has no data not written by us */ + __u8 type; /* Type of object */ + __u8 content; /* Content presence (enum cachefiles_content) */ + __u8 data[]; /* netfs coherency data */ +} __packed; + static const char cachefiles_xattr_cache[] = XATTR_USER_PREFIX "CacheFiles.cache"; +struct cachefiles_vol_xattr { + __be32 reserved; /* Reserved, should be 0 */ + __u8 data[]; /* netfs volume coherency data */ +} __packed; + /* - * check the type label on an object - * - done using xattrs + * set the state xattr on a cache file */ -int cachefiles_check_object_type(struct cachefiles_object *object) +int cachefiles_set_object_xattr(struct cachefiles_object *object) { - struct dentry *dentry = object->dentry; - char type[3], xtype[3]; + struct cachefiles_xattr *buf; + struct dentry *dentry; + struct file *file = object->file; + unsigned int len = object->cookie->aux_len; int ret; - ASSERT(dentry); - ASSERT(dentry->d_inode); + if (!file) + return -ESTALE; + dentry = file->f_path.dentry; - if (!object->fscache.cookie) - strcpy(type, "C3"); - else - snprintf(type, 3, "%02x", object->fscache.cookie->def->type); + _enter("%x,#%d", object->debug_id, len); - _enter("%p{%s}", object, type); + buf = kmalloc(sizeof(struct cachefiles_xattr) + len, GFP_KERNEL); + if (!buf) + return -ENOMEM; - /* attempt to install a type label directly */ - ret = vfs_setxattr(dentry, cachefiles_xattr_cache, type, 2, - XATTR_CREATE); - if (ret == 0) { - _debug("SET"); /* we succeeded */ - goto error; - } + buf->object_size = cpu_to_be64(object->cookie->object_size); + buf->zero_point = 0; + buf->type = CACHEFILES_COOKIE_TYPE_DATA; + buf->content = object->content_info; + if (test_bit(FSCACHE_COOKIE_LOCAL_WRITE, &object->cookie->flags)) + buf->content = CACHEFILES_CONTENT_DIRTY; + if (len > 0) + memcpy(buf->data, fscache_get_aux(object->cookie), len); - if (ret != -EEXIST) { - kerror("Can't set xattr on %*.*s [%lu] (err %d)", - dentry->d_name.len, dentry->d_name.len, - dentry->d_name.name, dentry->d_inode->i_ino, - -ret); - goto error; + ret = cachefiles_inject_write_error(); + if (ret == 0) { + ret = mnt_want_write_file(file); + if (ret == 0) { + ret = vfs_setxattr(&nop_mnt_idmap, dentry, + cachefiles_xattr_cache, buf, + sizeof(struct cachefiles_xattr) + len, 0); + mnt_drop_write_file(file); + } } - - /* read the current type label */ - ret = vfs_getxattr(dentry, cachefiles_xattr_cache, xtype, 3); if (ret < 0) { - if (ret == -ERANGE) - goto bad_type_length; - - kerror("Can't read xattr on %*.*s [%lu] (err %d)", - dentry->d_name.len, dentry->d_name.len, - dentry->d_name.name, dentry->d_inode->i_ino, - -ret); - goto error; + trace_cachefiles_vfs_error(object, file_inode(file), ret, + cachefiles_trace_setxattr_error); + trace_cachefiles_coherency(object, file_inode(file)->i_ino, + be64_to_cpup((__be64 *)buf->data), + buf->content, + cachefiles_coherency_set_fail); + if (ret != -ENOMEM) + cachefiles_io_error_obj( + object, + "Failed to set xattr with error %d", ret); + } else { + trace_cachefiles_coherency(object, file_inode(file)->i_ino, + be64_to_cpup((__be64 *)buf->data), + buf->content, + cachefiles_coherency_set_ok); } - /* check the type is what we're expecting */ - if (ret != 2) - goto bad_type_length; - - if (xtype[0] != type[0] || xtype[1] != type[1]) - goto bad_type; - - ret = 0; - -error: + kfree(buf); _leave(" = %d", ret); return ret; - -bad_type_length: - kerror("Cache object %lu type xattr length incorrect", - dentry->d_inode->i_ino); - ret = -EIO; - goto error; - -bad_type: - xtype[2] = 0; - kerror("Cache object %*.*s [%lu] type %s not %s", - dentry->d_name.len, dentry->d_name.len, - dentry->d_name.name, dentry->d_inode->i_ino, - xtype, type); - ret = -EIO; - goto error; } /* - * set the state xattr on a cache file + * check the consistency between the backing cache and the FS-Cache cookie */ -int cachefiles_set_object_xattr(struct cachefiles_object *object, - struct cachefiles_xattr *auxdata) +int cachefiles_check_auxdata(struct cachefiles_object *object, struct file *file) { - struct dentry *dentry = object->dentry; - int ret; - - ASSERT(dentry); - - _enter("%p,#%d", object, auxdata->len); + struct cachefiles_xattr *buf; + struct dentry *dentry = file->f_path.dentry; + unsigned int len = object->cookie->aux_len, tlen; + const void *p = fscache_get_aux(object->cookie); + enum cachefiles_coherency_trace why; + ssize_t xlen; + int ret = -ESTALE; + + tlen = sizeof(struct cachefiles_xattr) + len; + buf = kmalloc(tlen, GFP_KERNEL); + if (!buf) + return -ENOMEM; - /* attempt to install the cache metadata directly */ - _debug("SET #%u", auxdata->len); + xlen = cachefiles_inject_read_error(); + if (xlen == 0) + xlen = vfs_getxattr(&nop_mnt_idmap, dentry, cachefiles_xattr_cache, buf, tlen); + if (xlen != tlen) { + if (xlen < 0) { + ret = xlen; + trace_cachefiles_vfs_error(object, file_inode(file), xlen, + cachefiles_trace_getxattr_error); + } + if (xlen == -EIO) + cachefiles_io_error_obj( + object, + "Failed to read aux with error %zd", xlen); + why = cachefiles_coherency_check_xattr; + goto out; + } - ret = vfs_setxattr(dentry, cachefiles_xattr_cache, - &auxdata->type, auxdata->len, - XATTR_CREATE); - if (ret < 0 && ret != -ENOMEM) - cachefiles_io_error_obj( - object, - "Failed to set xattr with error %d", ret); + if (buf->type != CACHEFILES_COOKIE_TYPE_DATA) { + why = cachefiles_coherency_check_type; + } else if (memcmp(buf->data, p, len) != 0) { + why = cachefiles_coherency_check_aux; + } else if (be64_to_cpu(buf->object_size) != object->cookie->object_size) { + why = cachefiles_coherency_check_objsize; + } else if (buf->content == CACHEFILES_CONTENT_DIRTY) { + // TODO: Begin conflict resolution + pr_warn("Dirty object in cache\n"); + why = cachefiles_coherency_check_dirty; + } else { + why = cachefiles_coherency_check_ok; + ret = 0; + } - _leave(" = %d", ret); +out: + trace_cachefiles_coherency(object, file_inode(file)->i_ino, + be64_to_cpup((__be64 *)buf->data), + buf->content, why); + kfree(buf); return ret; } /* - * update the state xattr on a cache file + * remove the object's xattr to mark it stale */ -int cachefiles_update_object_xattr(struct cachefiles_object *object, - struct cachefiles_xattr *auxdata) +int cachefiles_remove_object_xattr(struct cachefiles_cache *cache, + struct cachefiles_object *object, + struct dentry *dentry) { - struct dentry *dentry = object->dentry; int ret; - ASSERT(dentry); - - _enter("%p,#%d", object, auxdata->len); - - /* attempt to install the cache metadata directly */ - _debug("SET #%u", auxdata->len); - - ret = vfs_setxattr(dentry, cachefiles_xattr_cache, - &auxdata->type, auxdata->len, - XATTR_REPLACE); - if (ret < 0 && ret != -ENOMEM) - cachefiles_io_error_obj( - object, - "Failed to update xattr with error %d", ret); + ret = cachefiles_inject_remove_error(); + if (ret == 0) { + ret = mnt_want_write(cache->mnt); + if (ret == 0) { + ret = vfs_removexattr(&nop_mnt_idmap, dentry, + cachefiles_xattr_cache); + mnt_drop_write(cache->mnt); + } + } + if (ret < 0) { + trace_cachefiles_vfs_error(object, d_inode(dentry), ret, + cachefiles_trace_remxattr_error); + if (ret == -ENOENT || ret == -ENODATA) + ret = 0; + else if (ret != -ENOMEM) + cachefiles_io_error(cache, + "Can't remove xattr from %lu" + " (error %d)", + d_backing_inode(dentry)->i_ino, -ret); + } _leave(" = %d", ret); return ret; } /* - * check the state xattr on a cache file - * - return -ESTALE if the object should be deleted + * Stick a marker on the cache object to indicate that it's dirty. */ -int cachefiles_check_object_xattr(struct cachefiles_object *object, - struct cachefiles_xattr *auxdata) +void cachefiles_prepare_to_write(struct fscache_cookie *cookie) { - struct cachefiles_xattr *auxbuf; - struct dentry *dentry = object->dentry; - int ret; + const struct cred *saved_cred; + struct cachefiles_object *object = cookie->cache_priv; + struct cachefiles_cache *cache = object->volume->cache; - _enter("%p,#%d", object, auxdata->len); + _enter("c=%08x", object->cookie->debug_id); - ASSERT(dentry); - ASSERT(dentry->d_inode); - - auxbuf = kmalloc(sizeof(struct cachefiles_xattr) + 512, cachefiles_gfp); - if (!auxbuf) { - _leave(" = -ENOMEM"); - return -ENOMEM; + if (!test_bit(CACHEFILES_OBJECT_USING_TMPFILE, &object->flags)) { + cachefiles_begin_secure(cache, &saved_cred); + cachefiles_set_object_xattr(object); + cachefiles_end_secure(cache, saved_cred); } +} - /* read the current type label */ - ret = vfs_getxattr(dentry, cachefiles_xattr_cache, - &auxbuf->type, 512 + 1); - if (ret < 0) { - if (ret == -ENODATA) - goto stale; /* no attribute - power went off - * mid-cull? */ - - if (ret == -ERANGE) - goto bad_type_length; - - cachefiles_io_error_obj(object, - "Can't read xattr on %lu (err %d)", - dentry->d_inode->i_ino, -ret); - goto error; - } - - /* check the on-disk object */ - if (ret < 1) - goto bad_type_length; - - if (auxbuf->type != auxdata->type) - goto stale; - - auxbuf->len = ret; - - /* consult the netfs */ - if (object->fscache.cookie->def->check_aux) { - enum fscache_checkaux result; - unsigned int dlen; - - dlen = auxbuf->len - 1; - - _debug("checkaux %s #%u", - object->fscache.cookie->def->name, dlen); - - result = fscache_check_aux(&object->fscache, - &auxbuf->data, dlen); - - switch (result) { - /* entry okay as is */ - case FSCACHE_CHECKAUX_OKAY: - goto okay; - - /* entry requires update */ - case FSCACHE_CHECKAUX_NEEDS_UPDATE: - break; +/* + * Set the state xattr on a volume directory. + */ +bool cachefiles_set_volume_xattr(struct cachefiles_volume *volume) +{ + struct cachefiles_vol_xattr *buf; + unsigned int len = volume->vcookie->coherency_len; + const void *p = volume->vcookie->coherency; + struct dentry *dentry = volume->dentry; + int ret; - /* entry requires deletion */ - case FSCACHE_CHECKAUX_OBSOLETE: - goto stale; + _enter("%x,#%d", volume->vcookie->debug_id, len); - default: - BUG(); - } + len += sizeof(*buf); + buf = kmalloc(len, GFP_KERNEL); + if (!buf) + return false; + buf->reserved = cpu_to_be32(0); + memcpy(buf->data, p, volume->vcookie->coherency_len); - /* update the current label */ - ret = vfs_setxattr(dentry, cachefiles_xattr_cache, - &auxdata->type, auxdata->len, - XATTR_REPLACE); - if (ret < 0) { - cachefiles_io_error_obj(object, - "Can't update xattr on %lu" - " (error %d)", - dentry->d_inode->i_ino, -ret); - goto error; + ret = cachefiles_inject_write_error(); + if (ret == 0) { + ret = mnt_want_write(volume->cache->mnt); + if (ret == 0) { + ret = vfs_setxattr(&nop_mnt_idmap, dentry, + cachefiles_xattr_cache, + buf, len, 0); + mnt_drop_write(volume->cache->mnt); } } + if (ret < 0) { + trace_cachefiles_vfs_error(NULL, d_inode(dentry), ret, + cachefiles_trace_setxattr_error); + trace_cachefiles_vol_coherency(volume, d_inode(dentry)->i_ino, + cachefiles_coherency_vol_set_fail); + if (ret != -ENOMEM) + cachefiles_io_error( + volume->cache, "Failed to set xattr with error %d", ret); + } else { + trace_cachefiles_vol_coherency(volume, d_inode(dentry)->i_ino, + cachefiles_coherency_vol_set_ok); + } -okay: - ret = 0; - -error: - kfree(auxbuf); + kfree(buf); _leave(" = %d", ret); - return ret; - -bad_type_length: - kerror("Cache object %lu xattr length incorrect", - dentry->d_inode->i_ino); - ret = -EIO; - goto error; - -stale: - ret = -ESTALE; - goto error; + return ret == 0; } /* - * remove the object's xattr to mark it stale + * Check the consistency between the backing cache and the volume cookie. */ -int cachefiles_remove_object_xattr(struct cachefiles_cache *cache, - struct dentry *dentry) +int cachefiles_check_volume_xattr(struct cachefiles_volume *volume) { - int ret; + struct cachefiles_vol_xattr *buf; + struct dentry *dentry = volume->dentry; + unsigned int len = volume->vcookie->coherency_len; + const void *p = volume->vcookie->coherency; + enum cachefiles_coherency_trace why; + ssize_t xlen; + int ret = -ESTALE; + + _enter(""); + + len += sizeof(*buf); + buf = kmalloc(len, GFP_KERNEL); + if (!buf) + return -ENOMEM; - ret = vfs_removexattr(dentry, cachefiles_xattr_cache); - if (ret < 0) { - if (ret == -ENOENT || ret == -ENODATA) - ret = 0; - else if (ret != -ENOMEM) - cachefiles_io_error(cache, - "Can't remove xattr from %lu" - " (error %d)", - dentry->d_inode->i_ino, -ret); + xlen = cachefiles_inject_read_error(); + if (xlen == 0) + xlen = vfs_getxattr(&nop_mnt_idmap, dentry, cachefiles_xattr_cache, buf, len); + if (xlen != len) { + if (xlen < 0) { + ret = xlen; + trace_cachefiles_vfs_error(NULL, d_inode(dentry), xlen, + cachefiles_trace_getxattr_error); + if (xlen == -EIO) + cachefiles_io_error( + volume->cache, + "Failed to read xattr with error %zd", xlen); + } + why = cachefiles_coherency_vol_check_xattr; + } else if (buf->reserved != cpu_to_be32(0)) { + why = cachefiles_coherency_vol_check_resv; + } else if (memcmp(buf->data, p, len - sizeof(*buf)) != 0) { + why = cachefiles_coherency_vol_check_cmp; + } else { + why = cachefiles_coherency_vol_check_ok; + ret = 0; } + trace_cachefiles_vol_coherency(volume, d_inode(dentry)->i_ino, why); + kfree(buf); _leave(" = %d", ret); return ret; } |
