diff options
Diffstat (limited to 'fs/fuse/inode.c')
| -rw-r--r-- | fs/fuse/inode.c | 525 |
1 files changed, 421 insertions, 104 deletions
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index f19d748890f0..819e50d66622 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -7,7 +7,10 @@ */ #include "fuse_i.h" +#include "fuse_dev_i.h" +#include "dev_uring_i.h" +#include <linux/dax.h> #include <linux/pagemap.h> #include <linux/slab.h> #include <linux/file.h> @@ -32,10 +35,16 @@ MODULE_LICENSE("GPL"); static struct kmem_cache *fuse_inode_cachep; struct list_head fuse_conn_list; DEFINE_MUTEX(fuse_mutex); +DECLARE_WAIT_QUEUE_HEAD(fuse_dev_waitq); static int set_global_limit(const char *val, const struct kernel_param *kp); -unsigned max_user_bgreq; +unsigned int fuse_max_pages_limit = 256; +/* default is no timeout */ +unsigned int fuse_default_req_timeout; +unsigned int fuse_max_req_timeout; + +unsigned int max_user_bgreq; module_param_call(max_user_bgreq, set_global_limit, param_get_uint, &max_user_bgreq, 0644); __MODULE_PARM_TYPE(max_user_bgreq, "uint"); @@ -43,7 +52,7 @@ MODULE_PARM_DESC(max_user_bgreq, "Global limit for the maximum number of backgrounded requests an " "unprivileged user can set"); -unsigned max_user_congthresh; +unsigned int max_user_congthresh; module_param_call(max_user_congthresh, set_global_limit, param_get_uint, &max_user_congthresh, 0644); __MODULE_PARM_TYPE(max_user_congthresh, "uint"); @@ -68,6 +77,24 @@ struct fuse_forget_link *fuse_alloc_forget(void) return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL_ACCOUNT); } +static struct fuse_submount_lookup *fuse_alloc_submount_lookup(void) +{ + struct fuse_submount_lookup *sl; + + sl = kzalloc(sizeof(struct fuse_submount_lookup), GFP_KERNEL_ACCOUNT); + if (!sl) + return NULL; + sl->forget = fuse_alloc_forget(); + if (!sl->forget) + goto out_free; + + return sl; + +out_free: + kfree(sl); + return NULL; +} + static struct inode *fuse_alloc_inode(struct super_block *sb) { struct fuse_inode *fi; @@ -76,13 +103,11 @@ static struct inode *fuse_alloc_inode(struct super_block *sb) if (!fi) return NULL; - fi->i_time = 0; - fi->inval_mask = 0; - fi->nodeid = 0; - fi->nlookup = 0; - fi->attr_version = 0; - fi->orig_ino = 0; - fi->state = 0; + /* Initialize private data (i.e. everything except fi->inode) */ + BUILD_BUG_ON(offsetof(struct fuse_inode, inode) != 0); + memset((void *) fi + sizeof(fi->inode), 0, sizeof(*fi) - sizeof(fi->inode)); + + fi->inval_mask = ~0; mutex_init(&fi->mutex); spin_lock_init(&fi->lock); fi->forget = fuse_alloc_forget(); @@ -92,6 +117,9 @@ static struct inode *fuse_alloc_inode(struct super_block *sb) if (IS_ENABLED(CONFIG_FUSE_DAX) && !fuse_dax_inode_alloc(sb, fi)) goto out_free_forget; + if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH)) + fuse_inode_backing_set(fi, NULL); + return &fi->inode; out_free_forget: @@ -110,15 +138,32 @@ static void fuse_free_inode(struct inode *inode) #ifdef CONFIG_FUSE_DAX kfree(fi->dax); #endif + if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH)) + fuse_backing_put(fuse_inode_backing(fi)); + kmem_cache_free(fuse_inode_cachep, fi); } +static void fuse_cleanup_submount_lookup(struct fuse_conn *fc, + struct fuse_submount_lookup *sl) +{ + if (!refcount_dec_and_test(&sl->count)) + return; + + fuse_queue_forget(fc, sl->forget, sl->nodeid, 1); + sl->forget = NULL; + kfree(sl); +} + static void fuse_evict_inode(struct inode *inode) { struct fuse_inode *fi = get_fuse_inode(inode); /* Will write inode on close/munmap and in all other dirtiers */ - WARN_ON(inode->i_state & I_DIRTY_INODE); + WARN_ON(inode_state_read_once(inode) & I_DIRTY_INODE); + + if (FUSE_IS_DAX(inode)) + dax_break_layout_final(inode); truncate_inode_pages_final(&inode->i_data); clear_inode(inode); @@ -132,8 +177,22 @@ static void fuse_evict_inode(struct inode *inode) fi->nlookup); fi->forget = NULL; } + + if (fi->submount_lookup) { + fuse_cleanup_submount_lookup(fc, fi->submount_lookup); + fi->submount_lookup = NULL; + } + /* + * Evict of non-deleted inode may race with outstanding + * LOOKUP/READDIRPLUS requests and result in inconsistency when + * the request finishes. Deal with that here by bumping a + * counter that can be compared to the starting value. + */ + if (inode->i_nlink > 0) + atomic64_inc(&fc->evict_ctr); } if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) { + WARN_ON(fi->iocachectr != 0); WARN_ON(!list_empty(&fi->write_files)); WARN_ON(!list_empty(&fi->queued_writes)); } @@ -163,16 +222,31 @@ static ino_t fuse_squash_ino(u64 ino64) } void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, - u64 attr_valid, u32 cache_mask) + struct fuse_statx *sx, + u64 attr_valid, u32 cache_mask, + u64 evict_ctr) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); lockdep_assert_held(&fi->lock); + /* + * Clear basic stats from invalid mask. + * + * Don't do this if this is coming from a fuse_iget() call and there + * might have been a racing evict which would've invalidated the result + * if the attr_version would've been preserved. + * + * !evict_ctr -> this is create + * fi->attr_version != 0 -> this is not a new inode + * evict_ctr == fuse_get_evict_ctr() -> no evicts while during request + */ + if (!evict_ctr || fi->attr_version || evict_ctr == fuse_get_evict_ctr(fc)) + set_mask_bits(&fi->inval_mask, STATX_BASIC_STATS, 0); + fi->attr_version = atomic64_inc_return(&fc->attr_version); fi->i_time = attr_valid; - WRITE_ONCE(fi->inval_mask, 0); inode->i_ino = fuse_squash_ino(attr->ino); inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); @@ -186,22 +260,38 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, attr->mtimensec = min_t(u32, attr->mtimensec, NSEC_PER_SEC - 1); attr->ctimensec = min_t(u32, attr->ctimensec, NSEC_PER_SEC - 1); - inode->i_atime.tv_sec = attr->atime; - inode->i_atime.tv_nsec = attr->atimensec; + inode_set_atime(inode, attr->atime, attr->atimensec); /* mtime from server may be stale due to local buffered write */ if (!(cache_mask & STATX_MTIME)) { - inode->i_mtime.tv_sec = attr->mtime; - inode->i_mtime.tv_nsec = attr->mtimensec; + inode_set_mtime(inode, attr->mtime, attr->mtimensec); } if (!(cache_mask & STATX_CTIME)) { - inode->i_ctime.tv_sec = attr->ctime; - inode->i_ctime.tv_nsec = attr->ctimensec; + inode_set_ctime(inode, attr->ctime, attr->ctimensec); + } + if (sx) { + /* Sanitize nsecs */ + sx->btime.tv_nsec = + min_t(u32, sx->btime.tv_nsec, NSEC_PER_SEC - 1); + + /* + * Btime has been queried, cache is valid (whether or not btime + * is available or not) so clear STATX_BTIME from inval_mask. + * + * Availability of the btime attribute is indicated in + * FUSE_I_BTIME + */ + set_mask_bits(&fi->inval_mask, STATX_BTIME, 0); + if (sx->mask & STATX_BTIME) { + set_bit(FUSE_I_BTIME, &fi->state); + fi->i_btime.tv_sec = sx->btime.tv_sec; + fi->i_btime.tv_nsec = sx->btime.tv_nsec; + } } - if (attr->blksize != 0) - inode->i_blkbits = ilog2(attr->blksize); + if (attr->blksize) + fi->cached_i_blkbits = ilog2(attr->blksize); else - inode->i_blkbits = inode->i_sb->s_blocksize_bits; + fi->cached_i_blkbits = inode->i_sb->s_blocksize_bits; /* * Don't set the sticky bit in i_mode, unless we want the VFS @@ -235,8 +325,9 @@ u32 fuse_get_cache_mask(struct inode *inode) return STATX_MTIME | STATX_CTIME | STATX_SIZE; } -void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, - u64 attr_valid, u64 attr_version) +static void fuse_change_attributes_i(struct inode *inode, struct fuse_attr *attr, + struct fuse_statx *sx, u64 attr_valid, + u64 attr_version, u64 evict_ctr) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); @@ -255,12 +346,12 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, attr->size = i_size_read(inode); if (cache_mask & STATX_MTIME) { - attr->mtime = inode->i_mtime.tv_sec; - attr->mtimensec = inode->i_mtime.tv_nsec; + attr->mtime = inode_get_mtime_sec(inode); + attr->mtimensec = inode_get_mtime_nsec(inode); } if (cache_mask & STATX_CTIME) { - attr->ctime = inode->i_ctime.tv_sec; - attr->ctimensec = inode->i_ctime.tv_nsec; + attr->ctime = inode_get_ctime_sec(inode); + attr->ctimensec = inode_get_ctime_nsec(inode); } if ((attr_version != 0 && fi->attr_version > attr_version) || @@ -269,8 +360,9 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, return; } - old_mtime = inode->i_mtime; - fuse_change_attributes_common(inode, attr, attr_valid, cache_mask); + old_mtime = inode_get_mtime(inode); + fuse_change_attributes_common(inode, attr, sx, attr_valid, cache_mask, + evict_ctr); oldsize = inode->i_size; /* @@ -311,15 +403,27 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, fuse_dax_dontcache(inode, attr->flags); } +void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, + struct fuse_statx *sx, u64 attr_valid, + u64 attr_version) +{ + fuse_change_attributes_i(inode, attr, sx, attr_valid, attr_version, 0); +} + +static void fuse_init_submount_lookup(struct fuse_submount_lookup *sl, + u64 nodeid) +{ + sl->nodeid = nodeid; + refcount_set(&sl->count, 1); +} + static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr, struct fuse_conn *fc) { inode->i_mode = attr->mode & S_IFMT; inode->i_size = attr->size; - inode->i_mtime.tv_sec = attr->mtime; - inode->i_mtime.tv_nsec = attr->mtimensec; - inode->i_ctime.tv_sec = attr->ctime; - inode->i_ctime.tv_nsec = attr->ctimensec; + inode_set_mtime(inode, attr->mtime, attr->mtimensec); + inode_set_ctime(inode, attr->ctime, attr->ctimensec); if (S_ISREG(inode->i_mode)) { fuse_init_common(inode); fuse_init_file_inode(inode, attr->flags); @@ -360,7 +464,8 @@ static int fuse_inode_set(struct inode *inode, void *_nodeidp) struct inode *fuse_iget(struct super_block *sb, u64 nodeid, int generation, struct fuse_attr *attr, - u64 attr_valid, u64 attr_version) + u64 attr_valid, u64 attr_version, + u64 evict_ctr) { struct inode *inode; struct fuse_inode *fi; @@ -375,12 +480,22 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, */ if (fc->auto_submounts && (attr->flags & FUSE_ATTR_SUBMOUNT) && S_ISDIR(attr->mode)) { + struct fuse_inode *fi; + inode = new_inode(sb); if (!inode) return NULL; fuse_init_inode(inode, attr, fc); - get_fuse_inode(inode)->nodeid = nodeid; + fi = get_fuse_inode(inode); + fi->nodeid = nodeid; + fi->submount_lookup = fuse_alloc_submount_lookup(); + if (!fi->submount_lookup) { + iput(inode); + return NULL; + } + /* Sets nlookup = 1 on fi->submount_lookup->nlookup */ + fuse_init_submount_lookup(fi->submount_lookup, nodeid); inode->i_flags |= S_AUTOMOUNT; goto done; } @@ -390,7 +505,7 @@ retry: if (!inode) return NULL; - if ((inode->i_state & I_NEW)) { + if ((inode_state_read_once(inode) & I_NEW)) { inode->i_flags |= S_NOATIME; if (!fc->writeback_cache || !S_ISREG(attr->mode)) inode->i_flags |= S_NOCMTIME; @@ -400,16 +515,19 @@ retry: } else if (fuse_stale_inode(inode, generation, attr)) { /* nodeid was reused, any I/O on the old inode should fail */ fuse_make_bad(inode); - iput(inode); - goto retry; + if (inode != d_inode(sb->s_root)) { + remove_inode_hash(inode); + iput(inode); + goto retry; + } } -done: fi = get_fuse_inode(inode); spin_lock(&fi->lock); fi->nlookup++; spin_unlock(&fi->lock); - fuse_change_attributes(inode, attr, attr_valid, attr_version); - +done: + fuse_change_attributes_i(inode, attr, NULL, attr_valid, attr_version, + evict_ctr); return inode; } @@ -467,6 +585,17 @@ int fuse_reverse_inval_inode(struct fuse_conn *fc, u64 nodeid, return 0; } +void fuse_try_prune_one_inode(struct fuse_conn *fc, u64 nodeid) +{ + struct inode *inode; + + inode = fuse_ilookup(fc, nodeid, NULL); + if (!inode) + return; + d_prune_aliases(inode); + iput(inode); +} + bool fuse_lock_inode(struct inode *inode) { bool locked = false; @@ -661,8 +790,8 @@ static const struct fs_parameter_spec fuse_fs_parameters[] = { fsparam_string ("source", OPT_SOURCE), fsparam_u32 ("fd", OPT_FD), fsparam_u32oct ("rootmode", OPT_ROOTMODE), - fsparam_u32 ("user_id", OPT_USER_ID), - fsparam_u32 ("group_id", OPT_GROUP_ID), + fsparam_uid ("user_id", OPT_USER_ID), + fsparam_gid ("group_id", OPT_GROUP_ID), fsparam_flag ("default_permissions", OPT_DEFAULT_PERMISSIONS), fsparam_flag ("allow_other", OPT_ALLOW_OTHER), fsparam_u32 ("max_read", OPT_MAX_READ), @@ -676,6 +805,8 @@ static int fuse_parse_param(struct fs_context *fsc, struct fs_parameter *param) struct fs_parse_result result; struct fuse_fs_context *ctx = fsc->fs_private; int opt; + kuid_t kuid; + kgid_t kgid; if (fsc->purpose == FS_CONTEXT_FOR_RECONFIGURE) { /* @@ -720,16 +851,26 @@ static int fuse_parse_param(struct fs_context *fsc, struct fs_parameter *param) break; case OPT_USER_ID: - ctx->user_id = make_kuid(fsc->user_ns, result.uint_32); - if (!uid_valid(ctx->user_id)) + kuid = result.uid; + /* + * The requested uid must be representable in the + * filesystem's idmapping. + */ + if (!kuid_has_mapping(fsc->user_ns, kuid)) return invalfc(fsc, "Invalid user_id"); + ctx->user_id = kuid; ctx->user_id_present = true; break; case OPT_GROUP_ID: - ctx->group_id = make_kgid(fsc->user_ns, result.uint_32); - if (!gid_valid(ctx->group_id)) + kgid = result.gid; + /* + * The requested gid must be representable in the + * filesystem's idmapping. + */ + if (!kgid_has_mapping(fsc->user_ns, kgid)) return invalfc(fsc, "Invalid group_id"); + ctx->group_id = kgid; ctx->group_id_present = true; break; @@ -814,7 +955,7 @@ static void fuse_iqueue_init(struct fuse_iqueue *fiq, fiq->priv = priv; } -static void fuse_pqueue_init(struct fuse_pqueue *fpq) +void fuse_pqueue_init(struct fuse_pqueue *fpq) { unsigned int i; @@ -835,6 +976,8 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm, init_rwsem(&fc->killsb); refcount_set(&fc->count, 1); atomic_set(&fc->dev_count, 1); + atomic_set(&fc->epoch, 1); + INIT_WORK(&fc->epoch_work, fuse_epoch_work); init_waitqueue_head(&fc->blocked_waitq); fuse_iqueue_init(&fc->iq, fiq_ops, fiq_priv); INIT_LIST_HEAD(&fc->bg_queue); @@ -849,11 +992,17 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm, fc->initialized = 0; fc->connected = 1; atomic64_set(&fc->attr_version, 1); + atomic64_set(&fc->evict_ctr, 1); get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key)); fc->pid_ns = get_pid_ns(task_active_pid_ns(current)); fc->user_ns = get_user_ns(user_ns); fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ; - fc->max_pages_limit = FUSE_MAX_MAX_PAGES; + fc->max_pages_limit = fuse_max_pages_limit; + fc->name_max = FUSE_NAME_LOW_MAX; + fc->timeout.req_timeout = 0; + + if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH)) + fuse_backing_files_init(fc); INIT_LIST_HEAD(&fc->mounts); list_add(&fm->fc_entry, &fc->mounts); @@ -861,25 +1010,40 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm, } EXPORT_SYMBOL_GPL(fuse_conn_init); +static void delayed_release(struct rcu_head *p) +{ + struct fuse_conn *fc = container_of(p, struct fuse_conn, rcu); + + fuse_uring_destruct(fc); + + put_user_ns(fc->user_ns); + fc->release(fc); +} + void fuse_conn_put(struct fuse_conn *fc) { - if (refcount_dec_and_test(&fc->count)) { - struct fuse_iqueue *fiq = &fc->iq; - struct fuse_sync_bucket *bucket; - - if (IS_ENABLED(CONFIG_FUSE_DAX)) - fuse_dax_conn_free(fc); - if (fiq->ops->release) - fiq->ops->release(fiq); - put_pid_ns(fc->pid_ns); - put_user_ns(fc->user_ns); - bucket = rcu_dereference_protected(fc->curr_bucket, 1); - if (bucket) { - WARN_ON(atomic_read(&bucket->count) != 1); - kfree(bucket); - } - fc->release(fc); + struct fuse_iqueue *fiq = &fc->iq; + struct fuse_sync_bucket *bucket; + + if (!refcount_dec_and_test(&fc->count)) + return; + + if (IS_ENABLED(CONFIG_FUSE_DAX)) + fuse_dax_conn_free(fc); + if (fc->timeout.req_timeout) + cancel_delayed_work_sync(&fc->timeout.work); + cancel_work_sync(&fc->epoch_work); + if (fiq->ops->release) + fiq->ops->release(fiq); + put_pid_ns(fc->pid_ns); + bucket = rcu_dereference_protected(fc->curr_bucket, 1); + if (bucket) { + WARN_ON(atomic_read(&bucket->count) != 1); + kfree(bucket); } + if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH)) + fuse_backing_files_free(fc); + call_rcu(&fc->rcu, delayed_release); } EXPORT_SYMBOL_GPL(fuse_conn_put); @@ -890,7 +1054,7 @@ struct fuse_conn *fuse_conn_get(struct fuse_conn *fc) } EXPORT_SYMBOL_GPL(fuse_conn_get); -static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode) +static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned int mode) { struct fuse_attr attr; memset(&attr, 0, sizeof(attr)); @@ -898,7 +1062,7 @@ static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode) attr.mode = mode; attr.ino = FUSE_ROOT_ID; attr.nlink = 1; - return fuse_iget(sb, 1, 0, &attr, 0, 0); + return fuse_iget(sb, FUSE_ROOT_ID, 0, &attr, 0, 0, 0); } struct fuse_inode_handle { @@ -982,7 +1146,7 @@ static int fuse_encode_fh(struct inode *inode, u32 *fh, int *max_len, } *max_len = len; - return parent ? 0x82 : 0x81; + return parent ? FILEID_INO64_GEN_PARENT : FILEID_INO64_GEN; } static struct dentry *fuse_fh_to_dentry(struct super_block *sb, @@ -990,7 +1154,8 @@ static struct dentry *fuse_fh_to_dentry(struct super_block *sb, { struct fuse_inode_handle handle; - if ((fh_type != 0x81 && fh_type != 0x82) || fh_len < 3) + if ((fh_type != FILEID_INO64_GEN && + fh_type != FILEID_INO64_GEN_PARENT) || fh_len < 3) return NULL; handle.nodeid = (u64) fid->raw[0] << 32; @@ -1004,7 +1169,7 @@ static struct dentry *fuse_fh_to_parent(struct super_block *sb, { struct fuse_inode_handle parent; - if (fh_type != 0x82 || fh_len < 6) + if (fh_type != FILEID_INO64_GEN_PARENT || fh_len < 6) return NULL; parent.nodeid = (u64) fid->raw[3] << 32; @@ -1040,6 +1205,11 @@ static struct dentry *fuse_get_parent(struct dentry *child) return parent; } +/* only for fid encoding; no support for file handle */ +static const struct export_operations fuse_export_fid_operations = { + .encode_fh = fuse_encode_fh, +}; + static const struct export_operations fuse_export_operations = { .fh_to_dentry = fuse_fh_to_dentry, .fh_to_parent = fuse_fh_to_parent, @@ -1052,14 +1222,14 @@ static const struct super_operations fuse_super_operations = { .free_inode = fuse_free_inode, .evict_inode = fuse_evict_inode, .write_inode = fuse_write_inode, - .drop_inode = generic_delete_inode, + .drop_inode = inode_just_drop, .umount_begin = fuse_umount_begin, .statfs = fuse_statfs, .sync_fs = fuse_sync_fs, .show_options = fuse_show_options, }; -static void sanitize_global_limit(unsigned *limit) +static void sanitize_global_limit(unsigned int *limit) { /* * The default maximum number of async requests is calculated to consume @@ -1080,7 +1250,7 @@ static int set_global_limit(const char *val, const struct kernel_param *kp) if (rv) return rv; - sanitize_global_limit((unsigned *)kp->arg); + sanitize_global_limit((unsigned int *)kp->arg); return 0; } @@ -1112,6 +1282,34 @@ static void process_init_limits(struct fuse_conn *fc, struct fuse_init_out *arg) spin_unlock(&fc->bg_lock); } +static void set_request_timeout(struct fuse_conn *fc, unsigned int timeout) +{ + fc->timeout.req_timeout = secs_to_jiffies(timeout); + INIT_DELAYED_WORK(&fc->timeout.work, fuse_check_timeout); + queue_delayed_work(system_percpu_wq, &fc->timeout.work, + fuse_timeout_timer_freq); +} + +static void init_server_timeout(struct fuse_conn *fc, unsigned int timeout) +{ + if (!timeout && !fuse_max_req_timeout && !fuse_default_req_timeout) + return; + + if (!timeout) + timeout = fuse_default_req_timeout; + + if (fuse_max_req_timeout) { + if (timeout) + timeout = min(fuse_max_req_timeout, timeout); + else + timeout = fuse_max_req_timeout; + } + + timeout = max(FUSE_TIMEOUT_TIMER_FREQ, timeout); + + set_request_timeout(fc, timeout); +} + struct fuse_init_args { struct fuse_args args; struct fuse_init_in in; @@ -1130,6 +1328,7 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, ok = false; else { unsigned long ra_pages; + unsigned int timeout = 0; process_init_limits(fc, arg); @@ -1193,6 +1392,13 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, fc->max_pages = min_t(unsigned int, fc->max_pages_limit, max_t(unsigned int, arg->max_pages, 1)); + + /* + * PATH_MAX file names might need two pages for + * ops like rename + */ + if (fc->max_pages > 1) + fc->name_max = FUSE_NAME_MAX; } if (IS_ENABLED(CONFIG_FUSE_DAX)) { if (flags & FUSE_MAP_ALIGNMENT && @@ -1212,12 +1418,52 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, fc->init_security = 1; if (flags & FUSE_CREATE_SUPP_GROUP) fc->create_supp_group = 1; + if (flags & FUSE_DIRECT_IO_ALLOW_MMAP) + fc->direct_io_allow_mmap = 1; + /* + * max_stack_depth is the max stack depth of FUSE fs, + * so it has to be at least 1 to support passthrough + * to backing files. + * + * with max_stack_depth > 1, the backing files can be + * on a stacked fs (e.g. overlayfs) themselves and with + * max_stack_depth == 1, FUSE fs can be stacked as the + * underlying fs of a stacked fs (e.g. overlayfs). + * + * Also don't allow the combination of FUSE_PASSTHROUGH + * and FUSE_WRITEBACK_CACHE, current design doesn't handle + * them together. + */ + if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH) && + (flags & FUSE_PASSTHROUGH) && + arg->max_stack_depth > 0 && + arg->max_stack_depth <= FILESYSTEM_MAX_STACK_DEPTH && + !(flags & FUSE_WRITEBACK_CACHE)) { + fc->passthrough = 1; + fc->max_stack_depth = arg->max_stack_depth; + fm->sb->s_stack_depth = arg->max_stack_depth; + } + if (flags & FUSE_NO_EXPORT_SUPPORT) + fm->sb->s_export_op = &fuse_export_fid_operations; + if (flags & FUSE_ALLOW_IDMAP) { + if (fc->default_permissions) + fm->sb->s_iflags &= ~SB_I_NOIDMAP; + else + ok = false; + } + if (flags & FUSE_OVER_IO_URING && fuse_uring_enabled()) + fc->io_uring = 1; + + if (flags & FUSE_REQUEST_TIMEOUT) + timeout = arg->request_timeout; } else { ra_pages = fc->max_read / PAGE_SIZE; fc->no_lock = 1; fc->no_flock = 1; } + init_server_timeout(fc, timeout); + fm->sb->s_bdi->ra_pages = min(fm->sb->s_bdi->ra_pages, ra_pages); fc->minor = arg->minor; @@ -1236,7 +1482,7 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, wake_up_all(&fc->blocked_waitq); } -void fuse_send_init(struct fuse_mount *fm) +static struct fuse_init_args *fuse_new_init(struct fuse_mount *fm) { struct fuse_init_args *ia; u64 flags; @@ -1258,7 +1504,9 @@ void fuse_send_init(struct fuse_mount *fm) FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA | FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT | FUSE_INIT_EXT | FUSE_SECURITY_CTX | FUSE_CREATE_SUPP_GROUP | - FUSE_HAS_EXPIRE_ONLY; + FUSE_HAS_EXPIRE_ONLY | FUSE_DIRECT_IO_ALLOW_MMAP | + FUSE_NO_EXPORT_SUPPORT | FUSE_HAS_RESEND | FUSE_ALLOW_IDMAP | + FUSE_REQUEST_TIMEOUT; #ifdef CONFIG_FUSE_DAX if (fm->fc->dax) flags |= FUSE_MAP_ALIGNMENT; @@ -1267,6 +1515,15 @@ void fuse_send_init(struct fuse_mount *fm) #endif if (fm->fc->auto_submounts) flags |= FUSE_SUBMOUNTS; + if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH)) + flags |= FUSE_PASSTHROUGH; + + /* + * This is just an information flag for fuse server. No need to check + * the reply - server is either sending IORING_OP_URING_CMD or not. + */ + if (fuse_uring_enabled()) + flags |= FUSE_OVER_IO_URING; ia->in.flags = flags; ia->in.flags2 = flags >> 32; @@ -1284,17 +1541,37 @@ void fuse_send_init(struct fuse_mount *fm) ia->args.out_args[0].value = &ia->out; ia->args.force = true; ia->args.nocreds = true; - ia->args.end = process_init_reply; - if (fuse_simple_background(fm, &ia->args, GFP_KERNEL) != 0) - process_init_reply(fm, &ia->args, -ENOTCONN); + return ia; +} + +int fuse_send_init(struct fuse_mount *fm) +{ + struct fuse_init_args *ia = fuse_new_init(fm); + int err; + + if (fm->fc->sync_init) { + err = fuse_simple_request(fm, &ia->args); + /* Ignore size of init reply */ + if (err > 0) + err = 0; + } else { + ia->args.end = process_init_reply; + err = fuse_simple_background(fm, &ia->args, GFP_KERNEL); + if (!err) + return 0; + } + process_init_reply(fm, &ia->args, err); + if (fm->fc->conn_error) + return -ENOTCONN; + return 0; } EXPORT_SYMBOL_GPL(fuse_send_init); void fuse_free_conn(struct fuse_conn *fc) { WARN_ON(!list_empty(&fc->devices)); - kfree_rcu(fc, rcu); + kfree(fc); } EXPORT_SYMBOL_GPL(fuse_free_conn); @@ -1317,8 +1594,6 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb) if (err) return err; - /* fuse does it's own writeback accounting */ - sb->s_bdi->capabilities &= ~BDI_CAP_WRITEBACK_ACCT; sb->s_bdi->capabilities |= BDI_CAP_STRICTLIMIT; /* @@ -1401,20 +1676,24 @@ EXPORT_SYMBOL_GPL(fuse_dev_free); static void fuse_fill_attr_from_inode(struct fuse_attr *attr, const struct fuse_inode *fi) { + struct timespec64 atime = inode_get_atime(&fi->inode); + struct timespec64 mtime = inode_get_mtime(&fi->inode); + struct timespec64 ctime = inode_get_ctime(&fi->inode); + *attr = (struct fuse_attr){ .ino = fi->inode.i_ino, .size = fi->inode.i_size, .blocks = fi->inode.i_blocks, - .atime = fi->inode.i_atime.tv_sec, - .mtime = fi->inode.i_mtime.tv_sec, - .ctime = fi->inode.i_ctime.tv_sec, - .atimensec = fi->inode.i_atime.tv_nsec, - .mtimensec = fi->inode.i_mtime.tv_nsec, - .ctimensec = fi->inode.i_ctime.tv_nsec, + .atime = atime.tv_sec, + .mtime = mtime.tv_sec, + .ctime = ctime.tv_sec, + .atimensec = atime.tv_nsec, + .mtimensec = mtime.tv_nsec, + .ctimensec = ctime.tv_nsec, .mode = fi->inode.i_mode, .nlink = fi->inode.i_nlink, - .uid = fi->inode.i_uid.val, - .gid = fi->inode.i_gid.val, + .uid = __kuid_val(fi->inode.i_uid), + .gid = __kgid_val(fi->inode.i_gid), .rdev = fi->inode.i_rdev, .blksize = 1u << fi->inode.i_blkbits, }; @@ -1429,6 +1708,7 @@ static void fuse_sb_defaults(struct super_block *sb) sb->s_time_gran = 1; sb->s_export_op = &fuse_export_operations; sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE; + sb->s_iflags |= SB_I_NOIDMAP; if (sb->s_user_ns != &init_user_ns) sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER; sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION); @@ -1441,6 +1721,8 @@ static int fuse_fill_super_submount(struct super_block *sb, struct super_block *parent_sb = parent_fi->inode.i_sb; struct fuse_attr root_attr; struct inode *root; + struct fuse_submount_lookup *sl; + struct fuse_inode *fi; fuse_sb_defaults(sb); fm->sb = sb; @@ -1449,6 +1731,7 @@ static int fuse_fill_super_submount(struct super_block *sb, sb->s_bdi = bdi_get(parent_sb->s_bdi); sb->s_xattr = parent_sb->s_xattr; + sb->s_export_op = parent_sb->s_export_op; sb->s_time_gran = parent_sb->s_time_gran; sb->s_blocksize = parent_sb->s_blocksize; sb->s_blocksize_bits = parent_sb->s_blocksize_bits; @@ -1457,18 +1740,34 @@ static int fuse_fill_super_submount(struct super_block *sb, return -ENOMEM; fuse_fill_attr_from_inode(&root_attr, parent_fi); - root = fuse_iget(sb, parent_fi->nodeid, 0, &root_attr, 0, 0); + root = fuse_iget(sb, parent_fi->nodeid, 0, &root_attr, 0, 0, + fuse_get_evict_ctr(fm->fc)); /* * This inode is just a duplicate, so it is not looked up and * its nlookup should not be incremented. fuse_iget() does * that, though, so undo it here. */ - get_fuse_inode(root)->nlookup--; - sb->s_d_op = &fuse_dentry_operations; + fi = get_fuse_inode(root); + fi->nlookup--; + + set_default_d_op(sb, &fuse_dentry_operations); sb->s_root = d_make_root(root); if (!sb->s_root) return -ENOMEM; + /* + * Grab the parent's submount_lookup pointer and take a + * reference on the shared nlookup from the parent. This is to + * prevent the last forget for this nodeid from getting + * triggered until all users have finished with it. + */ + sl = parent_fi->submount_lookup; + WARN_ON(!sl); + if (sl) { + refcount_inc(&sl->count); + fi->submount_lookup = sl; + } + return 0; } @@ -1543,6 +1842,7 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) if (!sb_set_blocksize(sb, ctx->blksize)) goto err; #endif + fc->sync_fs = 1; } else { sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; @@ -1586,17 +1886,19 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) err = -ENOMEM; root = fuse_get_root_inode(sb, ctx->rootmode); - sb->s_d_op = &fuse_root_dentry_operations; + set_default_d_op(sb, &fuse_dentry_operations); root_dentry = d_make_root(root); if (!root_dentry) goto err_dev_free; - /* Root dentry doesn't have .d_revalidate */ - sb->s_d_op = &fuse_dentry_operations; mutex_lock(&fuse_mutex); err = -EINVAL; - if (ctx->fudptr && *ctx->fudptr) - goto err_unlock; + if (ctx->fudptr && *ctx->fudptr) { + if (*ctx->fudptr == FUSE_DEV_SYNC_INIT) + fc->sync_init = 1; + else + goto err_unlock; + } err = fuse_ctl_add_conn(fc); if (err) @@ -1604,8 +1906,10 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) list_add_tail(&fc->entry, &fuse_conn_list); sb->s_root = root_dentry; - if (ctx->fudptr) + if (ctx->fudptr) { *ctx->fudptr = fud; + wake_up_all(&fuse_dev_waitq); + } mutex_unlock(&fuse_mutex); return 0; @@ -1626,6 +1930,7 @@ EXPORT_SYMBOL_GPL(fuse_fill_super_common); static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc) { struct fuse_fs_context *ctx = fsc->fs_private; + struct fuse_mount *fm; int err; if (!ctx->file || !ctx->rootmode_present || @@ -1646,8 +1951,10 @@ static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc) return err; /* file->private_data shall be visible on all CPUs after this */ smp_mb(); - fuse_send_init(get_fuse_mount_super(sb)); - return 0; + + fm = get_fuse_mount_super(sb); + + return fuse_send_init(fm); } /* @@ -1708,7 +2015,7 @@ static int fuse_get_tree(struct fs_context *fsc) * Allow creating a fuse mount with an already initialized fuse * connection */ - fud = READ_ONCE(ctx->file->private_data); + fud = __fuse_get_dev(ctx->file); if (ctx->file->f_op == &fuse_dev_operations && fud) { fsc->sget_key = fud->fc; sb = sget_fc(fsc, fuse_test_super, fuse_set_no_super); @@ -1809,7 +2116,7 @@ static void fuse_sb_destroy(struct super_block *sb) void fuse_mount_destroy(struct fuse_mount *fm) { fuse_conn_put(fm->fc); - kfree(fm); + kfree_rcu(fm, rcu); } EXPORT_SYMBOL(fuse_mount_destroy); @@ -1823,7 +2130,7 @@ static void fuse_kill_sb_anon(struct super_block *sb) static struct file_system_type fuse_fs_type = { .owner = THIS_MODULE, .name = "fuse", - .fs_flags = FS_HAS_SUBTYPE | FS_USERNS_MOUNT, + .fs_flags = FS_HAS_SUBTYPE | FS_USERNS_MOUNT | FS_ALLOW_IDMAP, .init_fs_context = fuse_init_fs_context, .parameters = fuse_fs_parameters, .kill_sb = fuse_kill_sb_anon, @@ -1844,7 +2151,7 @@ static struct file_system_type fuseblk_fs_type = { .init_fs_context = fuse_init_fs_context, .parameters = fuse_fs_parameters, .kill_sb = fuse_kill_sb_blk, - .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE, + .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE | FS_ALLOW_IDMAP, }; MODULE_ALIAS_FS("fuseblk"); @@ -1895,8 +2202,14 @@ static int __init fuse_fs_init(void) if (err) goto out3; + err = fuse_sysctl_register(); + if (err) + goto out4; + return 0; + out4: + unregister_filesystem(&fuse_fs_type); out3: unregister_fuseblk(); out2: @@ -1907,6 +2220,7 @@ static int __init fuse_fs_init(void) static void fuse_fs_cleanup(void) { + fuse_sysctl_unregister(); unregister_filesystem(&fuse_fs_type); unregister_fuseblk(); @@ -1972,6 +2286,8 @@ static int __init fuse_init(void) if (res) goto err_sysfs_cleanup; + fuse_dentry_tree_init(); + sanitize_global_limit(&max_user_bgreq); sanitize_global_limit(&max_user_congthresh); @@ -1991,6 +2307,7 @@ static void __exit fuse_exit(void) { pr_debug("exit\n"); + fuse_dentry_tree_cleanup(); fuse_ctl_cleanup(); fuse_sysfs_cleanup(); fuse_fs_cleanup(); |
