summaryrefslogtreecommitdiff
path: root/fs/fuse/dir.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/fuse/dir.c')
-rw-r--r--fs/fuse/dir.c1690
1 files changed, 1224 insertions, 466 deletions
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index e909678afa2d..4b6b3d2758ff 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -10,12 +10,83 @@
#include <linux/pagemap.h>
#include <linux/file.h>
+#include <linux/fs_context.h>
+#include <linux/moduleparam.h>
#include <linux/sched.h>
#include <linux/namei.h>
#include <linux/slab.h>
#include <linux/xattr.h>
#include <linux/iversion.h>
#include <linux/posix_acl.h>
+#include <linux/security.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+
+static bool __read_mostly allow_sys_admin_access;
+module_param(allow_sys_admin_access, bool, 0644);
+MODULE_PARM_DESC(allow_sys_admin_access,
+ "Allow users with CAP_SYS_ADMIN in initial userns to bypass allow_other access check");
+
+struct dentry_bucket {
+ struct rb_root tree;
+ spinlock_t lock;
+};
+
+#define HASH_BITS 5
+#define HASH_SIZE (1 << HASH_BITS)
+static struct dentry_bucket dentry_hash[HASH_SIZE];
+struct delayed_work dentry_tree_work;
+
+/* Minimum invalidation work queue frequency */
+#define FUSE_DENTRY_INVAL_FREQ_MIN 5
+
+unsigned __read_mostly inval_wq;
+static int inval_wq_set(const char *val, const struct kernel_param *kp)
+{
+ unsigned int num;
+ unsigned int old = inval_wq;
+ int ret;
+
+ if (!val)
+ return -EINVAL;
+
+ ret = kstrtouint(val, 0, &num);
+ if (ret)
+ return ret;
+
+ if ((num < FUSE_DENTRY_INVAL_FREQ_MIN) && (num != 0))
+ return -EINVAL;
+
+ /* This should prevent overflow in secs_to_jiffies() */
+ if (num > USHRT_MAX)
+ return -EINVAL;
+
+ *((unsigned int *)kp->arg) = num;
+
+ if (num && !old)
+ schedule_delayed_work(&dentry_tree_work,
+ secs_to_jiffies(num));
+ else if (!num && old)
+ cancel_delayed_work_sync(&dentry_tree_work);
+
+ return 0;
+}
+static const struct kernel_param_ops inval_wq_ops = {
+ .set = inval_wq_set,
+ .get = param_get_uint,
+};
+module_param_cb(inval_wq, &inval_wq_ops, &inval_wq, 0644);
+__MODULE_PARM_TYPE(inval_wq, "uint");
+MODULE_PARM_DESC(inval_wq,
+ "Dentries invalidation work queue period in secs (>= "
+ __stringify(FUSE_DENTRY_INVAL_FREQ_MIN) ").");
+
+static inline struct dentry_bucket *get_dentry_bucket(struct dentry *dentry)
+{
+ int i = hash_ptr(dentry, HASH_BITS);
+
+ return &dentry_hash[i];
+}
static void fuse_advise_use_readdirplus(struct inode *dir)
{
@@ -24,19 +95,172 @@ static void fuse_advise_use_readdirplus(struct inode *dir)
set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
}
-union fuse_dentry {
+struct fuse_dentry {
u64 time;
- struct rcu_head rcu;
+ union {
+ struct rcu_head rcu;
+ struct rb_node node;
+ };
+ struct dentry *dentry;
};
-static inline void fuse_dentry_settime(struct dentry *entry, u64 time)
+static void __fuse_dentry_tree_del_node(struct fuse_dentry *fd,
+ struct dentry_bucket *bucket)
{
- ((union fuse_dentry *) entry->d_fsdata)->time = time;
+ if (!RB_EMPTY_NODE(&fd->node)) {
+ rb_erase(&fd->node, &bucket->tree);
+ RB_CLEAR_NODE(&fd->node);
+ }
}
-static inline u64 fuse_dentry_time(struct dentry *entry)
+static void fuse_dentry_tree_del_node(struct dentry *dentry)
{
- return ((union fuse_dentry *) entry->d_fsdata)->time;
+ struct fuse_dentry *fd = dentry->d_fsdata;
+ struct dentry_bucket *bucket = get_dentry_bucket(dentry);
+
+ spin_lock(&bucket->lock);
+ __fuse_dentry_tree_del_node(fd, bucket);
+ spin_unlock(&bucket->lock);
+}
+
+static void fuse_dentry_tree_add_node(struct dentry *dentry)
+{
+ struct fuse_dentry *fd = dentry->d_fsdata;
+ struct dentry_bucket *bucket;
+ struct fuse_dentry *cur;
+ struct rb_node **p, *parent = NULL;
+
+ if (!inval_wq)
+ return;
+
+ bucket = get_dentry_bucket(dentry);
+
+ spin_lock(&bucket->lock);
+
+ __fuse_dentry_tree_del_node(fd, bucket);
+
+ p = &bucket->tree.rb_node;
+ while (*p) {
+ parent = *p;
+ cur = rb_entry(*p, struct fuse_dentry, node);
+ if (fd->time < cur->time)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+ rb_link_node(&fd->node, parent, p);
+ rb_insert_color(&fd->node, &bucket->tree);
+ spin_unlock(&bucket->lock);
+}
+
+/*
+ * work queue which, when enabled, will periodically check for expired dentries
+ * in the dentries tree.
+ */
+static void fuse_dentry_tree_work(struct work_struct *work)
+{
+ LIST_HEAD(dispose);
+ struct fuse_dentry *fd;
+ struct rb_node *node;
+ int i;
+
+ for (i = 0; i < HASH_SIZE; i++) {
+ spin_lock(&dentry_hash[i].lock);
+ node = rb_first(&dentry_hash[i].tree);
+ while (node) {
+ fd = rb_entry(node, struct fuse_dentry, node);
+ if (time_after64(get_jiffies_64(), fd->time)) {
+ rb_erase(&fd->node, &dentry_hash[i].tree);
+ RB_CLEAR_NODE(&fd->node);
+ spin_unlock(&dentry_hash[i].lock);
+ d_dispose_if_unused(fd->dentry, &dispose);
+ cond_resched();
+ spin_lock(&dentry_hash[i].lock);
+ } else
+ break;
+ node = rb_first(&dentry_hash[i].tree);
+ }
+ spin_unlock(&dentry_hash[i].lock);
+ shrink_dentry_list(&dispose);
+ }
+
+ if (inval_wq)
+ schedule_delayed_work(&dentry_tree_work,
+ secs_to_jiffies(inval_wq));
+}
+
+void fuse_epoch_work(struct work_struct *work)
+{
+ struct fuse_conn *fc = container_of(work, struct fuse_conn,
+ epoch_work);
+ struct fuse_mount *fm;
+ struct inode *inode;
+
+ down_read(&fc->killsb);
+
+ inode = fuse_ilookup(fc, FUSE_ROOT_ID, &fm);
+ if (inode) {
+ iput(inode);
+ /* Remove all possible active references to cached inodes */
+ shrink_dcache_sb(fm->sb);
+ } else
+ pr_warn("Failed to get root inode");
+
+ up_read(&fc->killsb);
+}
+
+void fuse_dentry_tree_init(void)
+{
+ int i;
+
+ for (i = 0; i < HASH_SIZE; i++) {
+ spin_lock_init(&dentry_hash[i].lock);
+ dentry_hash[i].tree = RB_ROOT;
+ }
+ INIT_DELAYED_WORK(&dentry_tree_work, fuse_dentry_tree_work);
+}
+
+void fuse_dentry_tree_cleanup(void)
+{
+ int i;
+
+ inval_wq = 0;
+ cancel_delayed_work_sync(&dentry_tree_work);
+
+ for (i = 0; i < HASH_SIZE; i++)
+ WARN_ON_ONCE(!RB_EMPTY_ROOT(&dentry_hash[i].tree));
+}
+
+static inline void __fuse_dentry_settime(struct dentry *dentry, u64 time)
+{
+ ((struct fuse_dentry *) dentry->d_fsdata)->time = time;
+}
+
+static inline u64 fuse_dentry_time(const struct dentry *entry)
+{
+ return ((struct fuse_dentry *) entry->d_fsdata)->time;
+}
+
+static void fuse_dentry_settime(struct dentry *dentry, u64 time)
+{
+ struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb);
+ bool delete = !time && fc->delete_stale;
+ /*
+ * Mess with DCACHE_OP_DELETE because dput() will be faster without it.
+ * Don't care about races, either way it's just an optimization
+ */
+ if ((!delete && (dentry->d_flags & DCACHE_OP_DELETE)) ||
+ (delete && !(dentry->d_flags & DCACHE_OP_DELETE))) {
+ spin_lock(&dentry->d_lock);
+ if (!delete)
+ dentry->d_flags &= ~DCACHE_OP_DELETE;
+ else
+ dentry->d_flags |= DCACHE_OP_DELETE;
+ spin_unlock(&dentry->d_lock);
+ }
+
+ __fuse_dentry_settime(dentry, time);
+ fuse_dentry_tree_add_node(dentry);
}
/*
@@ -48,7 +272,7 @@ static inline u64 fuse_dentry_time(struct dentry *entry)
/*
* Calculate the time in jiffies until a dentry/attributes are valid
*/
-static u64 time_to_jiffies(u64 sec, u32 nsec)
+u64 fuse_time_to_jiffies(u64 sec, u32 nsec)
{
if (sec || nsec) {
struct timespec64 ts = {
@@ -68,20 +292,10 @@ static u64 time_to_jiffies(u64 sec, u32 nsec)
void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o)
{
fuse_dentry_settime(entry,
- time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
+ fuse_time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
}
-static u64 attr_timeout(struct fuse_attr_out *o)
-{
- return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
-}
-
-u64 entry_attr_timeout(struct fuse_entry_out *o)
-{
- return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
-}
-
-static void fuse_invalidate_attr_mask(struct inode *inode, u32 mask)
+void fuse_invalidate_attr_mask(struct inode *inode, u32 mask)
{
set_mask_bits(&get_fuse_inode(inode)->inval_mask, 0, mask);
}
@@ -101,7 +315,7 @@ static void fuse_dir_changed(struct inode *dir)
inode_maybe_inc_iversion(dir, false);
}
-/**
+/*
* Mark the attributes as stale due to an atime change. Avoid the invalidate if
* atime is not used.
*/
@@ -139,29 +353,17 @@ static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
struct fuse_entry_out *outarg)
{
memset(outarg, 0, sizeof(struct fuse_entry_out));
- args->in.h.opcode = FUSE_LOOKUP;
- args->in.h.nodeid = nodeid;
- args->in.numargs = 1;
- args->in.args[0].size = name->len + 1;
- args->in.args[0].value = name->name;
- args->out.numargs = 1;
- args->out.args[0].size = sizeof(struct fuse_entry_out);
- args->out.args[0].value = outarg;
-}
-
-u64 fuse_get_attr_version(struct fuse_conn *fc)
-{
- u64 curr_version;
-
- /*
- * The spin lock isn't actually needed on 64bit archs, but we
- * don't yet care too much about such optimizations.
- */
- spin_lock(&fc->lock);
- curr_version = fc->attr_version;
- spin_unlock(&fc->lock);
-
- return curr_version;
+ args->opcode = FUSE_LOOKUP;
+ args->nodeid = nodeid;
+ args->in_numargs = 3;
+ fuse_set_zero_arg0(args);
+ args->in_args[1].size = name->len;
+ args->in_args[1].value = name->name;
+ args->in_args[2].size = 1;
+ args->in_args[2].value = "";
+ args->out_numargs = 1;
+ args->out_args[0].size = sizeof(struct fuse_entry_out);
+ args->out_args[0].value = outarg;
}
/*
@@ -173,19 +375,24 @@ u64 fuse_get_attr_version(struct fuse_conn *fc)
* the lookup once more. If the lookup results in the same inode,
* then refresh the attributes, timeouts and mark the dentry valid.
*/
-static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
+static int fuse_dentry_revalidate(struct inode *dir, const struct qstr *name,
+ struct dentry *entry, unsigned int flags)
{
struct inode *inode;
- struct dentry *parent;
+ struct fuse_mount *fm;
struct fuse_conn *fc;
struct fuse_inode *fi;
int ret;
+ fc = get_fuse_conn_super(dir->i_sb);
+ if (entry->d_time < atomic_read(&fc->epoch))
+ goto invalid;
+
inode = d_inode_rcu(entry);
- if (inode && is_bad_inode(inode))
+ if (inode && fuse_is_bad(inode))
goto invalid;
else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
- (flags & LOOKUP_REVAL)) {
+ (flags & (LOOKUP_EXCL | LOOKUP_REVAL | LOOKUP_RENAME_TARGET))) {
struct fuse_entry_out outarg;
FUSE_ARGS(args);
struct fuse_forget_link *forget;
@@ -199,42 +406,43 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
if (flags & LOOKUP_RCU)
goto out;
- fc = get_fuse_conn(inode);
+ fm = get_fuse_mount(inode);
forget = fuse_alloc_forget();
ret = -ENOMEM;
if (!forget)
goto out;
- attr_version = fuse_get_attr_version(fc);
+ attr_version = fuse_get_attr_version(fm->fc);
- parent = dget_parent(entry);
- fuse_lookup_init(fc, &args, get_node_id(d_inode(parent)),
- &entry->d_name, &outarg);
- ret = fuse_simple_request(fc, &args);
- dput(parent);
+ fuse_lookup_init(fm->fc, &args, get_node_id(dir),
+ name, &outarg);
+ ret = fuse_simple_request(fm, &args);
/* Zero nodeid is same as -ENOENT */
if (!ret && !outarg.nodeid)
ret = -ENOENT;
if (!ret) {
fi = get_fuse_inode(inode);
- if (outarg.nodeid != get_node_id(inode)) {
- fuse_queue_forget(fc, forget, outarg.nodeid, 1);
+ if (outarg.nodeid != get_node_id(inode) ||
+ (bool) IS_AUTOMOUNT(inode) != (bool) (outarg.attr.flags & FUSE_ATTR_SUBMOUNT)) {
+ fuse_queue_forget(fm->fc, forget,
+ outarg.nodeid, 1);
goto invalid;
}
- spin_lock(&fc->lock);
+ spin_lock(&fi->lock);
fi->nlookup++;
- spin_unlock(&fc->lock);
+ spin_unlock(&fi->lock);
}
kfree(forget);
- if (ret == -ENOMEM)
+ if (ret == -ENOMEM || ret == -EINTR)
goto out;
- if (ret || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
+ if (ret || fuse_invalid_attr(&outarg.attr) ||
+ fuse_stale_inode(inode, outarg.generation, &outarg.attr))
goto invalid;
forget_all_cached_acls(inode);
- fuse_change_attributes(inode, &outarg.attr,
- entry_attr_timeout(&outarg),
+ fuse_change_attributes(inode, &outarg.attr, NULL,
+ ATTR_TIMEOUT(&outarg),
attr_version);
fuse_change_entry_timeout(entry, &outarg);
} else if (inode) {
@@ -243,9 +451,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state))
return -ECHILD;
} else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) {
- parent = dget_parent(entry);
- fuse_advise_use_readdirplus(d_inode(parent));
- dput(parent);
+ fuse_advise_use_readdirplus(dir);
}
}
ret = 1;
@@ -259,26 +465,71 @@ invalid:
static int fuse_dentry_init(struct dentry *dentry)
{
- dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry), GFP_KERNEL);
+ struct fuse_dentry *fd;
+
+ fd = kzalloc(sizeof(struct fuse_dentry),
+ GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE);
+ if (!fd)
+ return -ENOMEM;
+
+ fd->dentry = dentry;
+ RB_CLEAR_NODE(&fd->node);
+ dentry->d_fsdata = fd;
+
+ return 0;
+}
+
+static void fuse_dentry_prune(struct dentry *dentry)
+{
+ struct fuse_dentry *fd = dentry->d_fsdata;
- return dentry->d_fsdata ? 0 : -ENOMEM;
+ if (!RB_EMPTY_NODE(&fd->node))
+ fuse_dentry_tree_del_node(dentry);
}
+
static void fuse_dentry_release(struct dentry *dentry)
{
- union fuse_dentry *fd = dentry->d_fsdata;
+ struct fuse_dentry *fd = dentry->d_fsdata;
kfree_rcu(fd, rcu);
}
+static int fuse_dentry_delete(const struct dentry *dentry)
+{
+ return time_before64(fuse_dentry_time(dentry), get_jiffies_64());
+}
+
+/*
+ * Create a fuse_mount object with a new superblock (with path->dentry
+ * as the root), and return that mount so it can be auto-mounted on
+ * @path.
+ */
+static struct vfsmount *fuse_dentry_automount(struct path *path)
+{
+ struct fs_context *fsc;
+ struct vfsmount *mnt;
+ struct fuse_inode *mp_fi = get_fuse_inode(d_inode(path->dentry));
+
+ fsc = fs_context_for_submount(path->mnt->mnt_sb->s_type, path->dentry);
+ if (IS_ERR(fsc))
+ return ERR_CAST(fsc);
+
+ /* Pass the FUSE inode of the mount for fuse_get_tree_submount() */
+ fsc->fs_private = mp_fi;
+
+ /* Create the submount */
+ mnt = fc_mount(fsc);
+ put_fs_context(fsc);
+ return mnt;
+}
+
const struct dentry_operations fuse_dentry_operations = {
.d_revalidate = fuse_dentry_revalidate,
+ .d_delete = fuse_dentry_delete,
.d_init = fuse_dentry_init,
+ .d_prune = fuse_dentry_prune,
.d_release = fuse_dentry_release,
-};
-
-const struct dentry_operations fuse_root_dentry_operations = {
- .d_init = fuse_dentry_init,
- .d_release = fuse_dentry_release,
+ .d_automount = fuse_dentry_automount,
};
int fuse_valid_type(int m)
@@ -287,18 +538,28 @@ int fuse_valid_type(int m)
S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
}
+static bool fuse_valid_size(u64 size)
+{
+ return size <= LLONG_MAX;
+}
+
+bool fuse_invalid_attr(struct fuse_attr *attr)
+{
+ return !fuse_valid_type(attr->mode) || !fuse_valid_size(attr->size);
+}
+
int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
struct fuse_entry_out *outarg, struct inode **inode)
{
- struct fuse_conn *fc = get_fuse_conn_super(sb);
+ struct fuse_mount *fm = get_fuse_mount_super(sb);
FUSE_ARGS(args);
struct fuse_forget_link *forget;
- u64 attr_version;
+ u64 attr_version, evict_ctr;
int err;
*inode = NULL;
err = -ENAMETOOLONG;
- if (name->len > FUSE_NAME_MAX)
+ if (name->len > fm->fc->name_max)
goto out;
@@ -307,26 +568,29 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name
if (!forget)
goto out;
- attr_version = fuse_get_attr_version(fc);
+ attr_version = fuse_get_attr_version(fm->fc);
+ evict_ctr = fuse_get_evict_ctr(fm->fc);
- fuse_lookup_init(fc, &args, nodeid, name, outarg);
- err = fuse_simple_request(fc, &args);
+ fuse_lookup_init(fm->fc, &args, nodeid, name, outarg);
+ err = fuse_simple_request(fm, &args);
/* Zero nodeid is same as -ENOENT, but with valid timeout */
if (err || !outarg->nodeid)
goto out_put_forget;
err = -EIO;
- if (!outarg->nodeid)
- goto out_put_forget;
- if (!fuse_valid_type(outarg->attr.mode))
+ if (fuse_invalid_attr(&outarg->attr))
goto out_put_forget;
+ if (outarg->nodeid == FUSE_ROOT_ID && outarg->generation != 0) {
+ pr_warn_once("root generation should be zero\n");
+ outarg->generation = 0;
+ }
*inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
- &outarg->attr, entry_attr_timeout(outarg),
- attr_version);
+ &outarg->attr, ATTR_TIMEOUT(outarg),
+ attr_version, evict_ctr);
err = -ENOMEM;
if (!*inode) {
- fuse_queue_forget(fc, forget, outarg->nodeid, 1);
+ fuse_queue_forget(fm->fc, forget, outarg->nodeid, 1);
goto out;
}
err = 0;
@@ -340,13 +604,20 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name
static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
unsigned int flags)
{
- int err;
struct fuse_entry_out outarg;
+ struct fuse_conn *fc;
struct inode *inode;
struct dentry *newent;
+ int err, epoch;
bool outarg_valid = true;
bool locked;
+ if (fuse_is_bad(dir))
+ return ERR_PTR(-EIO);
+
+ fc = get_fuse_conn_super(dir->i_sb);
+ epoch = atomic_read(&fc->epoch);
+
locked = fuse_lock_inode(dir);
err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
&outarg, &inode);
@@ -368,12 +639,14 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
goto out_err;
entry = newent ? newent : entry;
+ entry->d_time = epoch;
if (outarg_valid)
fuse_change_entry_timeout(entry, &outarg);
else
fuse_invalidate_entry_cache(entry);
- fuse_advise_use_readdirplus(dir);
+ if (inode)
+ fuse_advise_use_readdirplus(dir);
return newent;
out_iput:
@@ -382,40 +655,191 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
return ERR_PTR(err);
}
+static int get_security_context(struct dentry *entry, umode_t mode,
+ struct fuse_in_arg *ext)
+{
+ struct fuse_secctx *fctx;
+ struct fuse_secctx_header *header;
+ struct lsm_context lsmctx = { };
+ void *ptr;
+ u32 total_len = sizeof(*header);
+ int err, nr_ctx = 0;
+ const char *name = NULL;
+ size_t namesize;
+
+ err = security_dentry_init_security(entry, mode, &entry->d_name,
+ &name, &lsmctx);
+
+ /* If no LSM is supporting this security hook ignore error */
+ if (err && err != -EOPNOTSUPP)
+ goto out_err;
+
+ if (lsmctx.len) {
+ nr_ctx = 1;
+ namesize = strlen(name) + 1;
+ err = -EIO;
+ if (WARN_ON(namesize > XATTR_NAME_MAX + 1 ||
+ lsmctx.len > S32_MAX))
+ goto out_err;
+ total_len += FUSE_REC_ALIGN(sizeof(*fctx) + namesize +
+ lsmctx.len);
+ }
+
+ err = -ENOMEM;
+ header = ptr = kzalloc(total_len, GFP_KERNEL);
+ if (!ptr)
+ goto out_err;
+
+ header->nr_secctx = nr_ctx;
+ header->size = total_len;
+ ptr += sizeof(*header);
+ if (nr_ctx) {
+ fctx = ptr;
+ fctx->size = lsmctx.len;
+ ptr += sizeof(*fctx);
+
+ strscpy(ptr, name, namesize);
+ ptr += namesize;
+
+ memcpy(ptr, lsmctx.context, lsmctx.len);
+ }
+ ext->size = total_len;
+ ext->value = header;
+ err = 0;
+out_err:
+ if (nr_ctx)
+ security_release_secctx(&lsmctx);
+ return err;
+}
+
+static void *extend_arg(struct fuse_in_arg *buf, u32 bytes)
+{
+ void *p;
+ u32 newlen = buf->size + bytes;
+
+ p = krealloc(buf->value, newlen, GFP_KERNEL);
+ if (!p) {
+ kfree(buf->value);
+ buf->size = 0;
+ buf->value = NULL;
+ return NULL;
+ }
+
+ memset(p + buf->size, 0, bytes);
+ buf->value = p;
+ buf->size = newlen;
+
+ return p + newlen - bytes;
+}
+
+static u32 fuse_ext_size(size_t size)
+{
+ return FUSE_REC_ALIGN(sizeof(struct fuse_ext_header) + size);
+}
+
+/*
+ * This adds just a single supplementary group that matches the parent's group.
+ */
+static int get_create_supp_group(struct mnt_idmap *idmap,
+ struct inode *dir,
+ struct fuse_in_arg *ext)
+{
+ struct fuse_conn *fc = get_fuse_conn(dir);
+ struct fuse_ext_header *xh;
+ struct fuse_supp_groups *sg;
+ kgid_t kgid = dir->i_gid;
+ vfsgid_t vfsgid = make_vfsgid(idmap, fc->user_ns, kgid);
+ gid_t parent_gid = from_kgid(fc->user_ns, kgid);
+
+ u32 sg_len = fuse_ext_size(sizeof(*sg) + sizeof(sg->groups[0]));
+
+ if (parent_gid == (gid_t) -1 || vfsgid_eq_kgid(vfsgid, current_fsgid()) ||
+ !vfsgid_in_group_p(vfsgid))
+ return 0;
+
+ xh = extend_arg(ext, sg_len);
+ if (!xh)
+ return -ENOMEM;
+
+ xh->size = sg_len;
+ xh->type = FUSE_EXT_GROUPS;
+
+ sg = (struct fuse_supp_groups *) &xh[1];
+ sg->nr_groups = 1;
+ sg->groups[0] = parent_gid;
+
+ return 0;
+}
+
+static int get_create_ext(struct mnt_idmap *idmap,
+ struct fuse_args *args,
+ struct inode *dir, struct dentry *dentry,
+ umode_t mode)
+{
+ struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb);
+ struct fuse_in_arg ext = { .size = 0, .value = NULL };
+ int err = 0;
+
+ if (fc->init_security)
+ err = get_security_context(dentry, mode, &ext);
+ if (!err && fc->create_supp_group)
+ err = get_create_supp_group(idmap, dir, &ext);
+
+ if (!err && ext.size) {
+ WARN_ON(args->in_numargs >= ARRAY_SIZE(args->in_args));
+ args->is_ext = true;
+ args->ext_idx = args->in_numargs++;
+ args->in_args[args->ext_idx] = ext;
+ } else {
+ kfree(ext.value);
+ }
+
+ return err;
+}
+
+static void free_ext_value(struct fuse_args *args)
+{
+ if (args->is_ext)
+ kfree(args->in_args[args->ext_idx].value);
+}
+
/*
* Atomic create+open operation
*
* If the filesystem doesn't support this, then fall back to separate
* 'mknod' + 'open' requests.
*/
-static int fuse_create_open(struct inode *dir, struct dentry *entry,
- struct file *file, unsigned flags,
- umode_t mode)
+static int fuse_create_open(struct mnt_idmap *idmap, struct inode *dir,
+ struct dentry *entry, struct file *file,
+ unsigned int flags, umode_t mode, u32 opcode)
{
- int err;
struct inode *inode;
- struct fuse_conn *fc = get_fuse_conn(dir);
+ struct fuse_mount *fm = get_fuse_mount(dir);
FUSE_ARGS(args);
struct fuse_forget_link *forget;
struct fuse_create_in inarg;
- struct fuse_open_out outopen;
+ struct fuse_open_out *outopenp;
struct fuse_entry_out outentry;
+ struct fuse_inode *fi;
struct fuse_file *ff;
+ int epoch, err;
+ bool trunc = flags & O_TRUNC;
/* Userspace expects S_IFREG in create mode */
BUG_ON((mode & S_IFMT) != S_IFREG);
+ epoch = atomic_read(&fm->fc->epoch);
forget = fuse_alloc_forget();
err = -ENOMEM;
if (!forget)
goto out_err;
err = -ENOMEM;
- ff = fuse_file_alloc(fc);
+ ff = fuse_file_alloc(fm, true);
if (!ff)
goto out_put_forget_req;
- if (!fc->dont_mask)
+ if (!fm->fc->dont_mask)
mode &= ~current_umask();
flags &= ~O_NOCTTY;
@@ -424,48 +848,71 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
inarg.flags = flags;
inarg.mode = mode;
inarg.umask = current_umask();
- args.in.h.opcode = FUSE_CREATE;
- args.in.h.nodeid = get_node_id(dir);
- args.in.numargs = 2;
- args.in.args[0].size = sizeof(inarg);
- args.in.args[0].value = &inarg;
- args.in.args[1].size = entry->d_name.len + 1;
- args.in.args[1].value = entry->d_name.name;
- args.out.numargs = 2;
- args.out.args[0].size = sizeof(outentry);
- args.out.args[0].value = &outentry;
- args.out.args[1].size = sizeof(outopen);
- args.out.args[1].value = &outopen;
- err = fuse_simple_request(fc, &args);
+
+ if (fm->fc->handle_killpriv_v2 && trunc &&
+ !(flags & O_EXCL) && !capable(CAP_FSETID)) {
+ inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID;
+ }
+
+ args.opcode = opcode;
+ args.nodeid = get_node_id(dir);
+ args.in_numargs = 2;
+ args.in_args[0].size = sizeof(inarg);
+ args.in_args[0].value = &inarg;
+ args.in_args[1].size = entry->d_name.len + 1;
+ args.in_args[1].value = entry->d_name.name;
+ args.out_numargs = 2;
+ args.out_args[0].size = sizeof(outentry);
+ args.out_args[0].value = &outentry;
+ /* Store outarg for fuse_finish_open() */
+ outopenp = &ff->args->open_outarg;
+ args.out_args[1].size = sizeof(*outopenp);
+ args.out_args[1].value = outopenp;
+
+ err = get_create_ext(idmap, &args, dir, entry, mode);
+ if (err)
+ goto out_free_ff;
+
+ err = fuse_simple_idmap_request(idmap, fm, &args);
+ free_ext_value(&args);
if (err)
goto out_free_ff;
err = -EIO;
- if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid))
+ if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid) ||
+ fuse_invalid_attr(&outentry.attr))
goto out_free_ff;
- ff->fh = outopen.fh;
+ ff->fh = outopenp->fh;
ff->nodeid = outentry.nodeid;
- ff->open_flags = outopen.open_flags;
+ ff->open_flags = outopenp->open_flags;
inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
- &outentry.attr, entry_attr_timeout(&outentry), 0);
+ &outentry.attr, ATTR_TIMEOUT(&outentry), 0, 0);
if (!inode) {
flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
- fuse_sync_release(ff, flags);
- fuse_queue_forget(fc, forget, outentry.nodeid, 1);
+ fuse_sync_release(NULL, ff, flags);
+ fuse_queue_forget(fm->fc, forget, outentry.nodeid, 1);
err = -ENOMEM;
goto out_err;
}
kfree(forget);
d_instantiate(entry, inode);
+ entry->d_time = epoch;
fuse_change_entry_timeout(entry, &outentry);
fuse_dir_changed(dir);
- err = finish_open(file, entry, generic_file_open);
+ err = generic_file_open(inode, file);
+ if (!err) {
+ file->private_data = ff;
+ err = finish_open(file, entry, fuse_finish_open);
+ }
if (err) {
- fuse_sync_release(ff, flags);
+ fi = get_fuse_inode(inode);
+ fuse_sync_release(fi, ff, flags);
} else {
- file->private_data = ff;
- fuse_finish_open(inode, file);
+ if (fm->fc->atomic_o_trunc && trunc)
+ truncate_pagecache(inode, 0);
+ else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
+ invalidate_inode_pages2(inode->i_mapping);
}
return err;
@@ -477,26 +924,27 @@ out_err:
return err;
}
-static int fuse_mknod(struct inode *, struct dentry *, umode_t, dev_t);
+static int fuse_mknod(struct mnt_idmap *, struct inode *, struct dentry *,
+ umode_t, dev_t);
static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
struct file *file, unsigned flags,
umode_t mode)
{
int err;
+ struct mnt_idmap *idmap = file_mnt_idmap(file);
struct fuse_conn *fc = get_fuse_conn(dir);
- struct dentry *res = NULL;
- if (d_in_lookup(entry)) {
- res = fuse_lookup(dir, entry, 0);
- if (IS_ERR(res))
- return PTR_ERR(res);
+ if (fuse_is_bad(dir))
+ return -EIO;
- if (res)
- entry = res;
+ if (d_in_lookup(entry)) {
+ struct dentry *res = fuse_lookup(dir, entry, 0);
+ if (res || d_really_is_positive(entry))
+ return finish_no_open(file, res);
}
- if (!(flags & O_CREAT) || d_really_is_positive(entry))
- goto no_open;
+ if (!(flags & O_CREAT))
+ return finish_no_open(file, NULL);
/* Only creates */
file->f_mode |= FMODE_CREATED;
@@ -504,189 +952,264 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
if (fc->no_create)
goto mknod;
- err = fuse_create_open(dir, entry, file, flags, mode);
+ err = fuse_create_open(idmap, dir, entry, file, flags, mode, FUSE_CREATE);
if (err == -ENOSYS) {
fc->no_create = 1;
goto mknod;
- }
-out_dput:
- dput(res);
+ } else if (err == -EEXIST)
+ fuse_invalidate_entry(entry);
return err;
mknod:
- err = fuse_mknod(dir, entry, mode, 0);
+ err = fuse_mknod(idmap, dir, entry, mode, 0);
if (err)
- goto out_dput;
-no_open:
- return finish_no_open(file, res);
+ return err;
+ return finish_no_open(file, NULL);
}
/*
* Code shared between mknod, mkdir, symlink and link
*/
-static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args,
- struct inode *dir, struct dentry *entry,
- umode_t mode)
+static struct dentry *create_new_entry(struct mnt_idmap *idmap, struct fuse_mount *fm,
+ struct fuse_args *args, struct inode *dir,
+ struct dentry *entry, umode_t mode)
{
struct fuse_entry_out outarg;
struct inode *inode;
struct dentry *d;
- int err;
struct fuse_forget_link *forget;
+ int epoch, err;
+
+ if (fuse_is_bad(dir))
+ return ERR_PTR(-EIO);
+
+ epoch = atomic_read(&fm->fc->epoch);
forget = fuse_alloc_forget();
if (!forget)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
memset(&outarg, 0, sizeof(outarg));
- args->in.h.nodeid = get_node_id(dir);
- args->out.numargs = 1;
- args->out.args[0].size = sizeof(outarg);
- args->out.args[0].value = &outarg;
- err = fuse_simple_request(fc, args);
+ args->nodeid = get_node_id(dir);
+ args->out_numargs = 1;
+ args->out_args[0].size = sizeof(outarg);
+ args->out_args[0].value = &outarg;
+
+ if (args->opcode != FUSE_LINK) {
+ err = get_create_ext(idmap, args, dir, entry, mode);
+ if (err)
+ goto out_put_forget_req;
+ }
+
+ err = fuse_simple_idmap_request(idmap, fm, args);
+ free_ext_value(args);
if (err)
goto out_put_forget_req;
err = -EIO;
- if (invalid_nodeid(outarg.nodeid))
+ if (invalid_nodeid(outarg.nodeid) || fuse_invalid_attr(&outarg.attr))
goto out_put_forget_req;
if ((outarg.attr.mode ^ mode) & S_IFMT)
goto out_put_forget_req;
inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
- &outarg.attr, entry_attr_timeout(&outarg), 0);
+ &outarg.attr, ATTR_TIMEOUT(&outarg), 0, 0);
if (!inode) {
- fuse_queue_forget(fc, forget, outarg.nodeid, 1);
- return -ENOMEM;
+ fuse_queue_forget(fm->fc, forget, outarg.nodeid, 1);
+ return ERR_PTR(-ENOMEM);
}
kfree(forget);
d_drop(entry);
d = d_splice_alias(inode, entry);
if (IS_ERR(d))
- return PTR_ERR(d);
+ return d;
if (d) {
+ d->d_time = epoch;
fuse_change_entry_timeout(d, &outarg);
- dput(d);
} else {
+ entry->d_time = epoch;
fuse_change_entry_timeout(entry, &outarg);
}
fuse_dir_changed(dir);
- return 0;
+ return d;
out_put_forget_req:
+ if (err == -EEXIST)
+ fuse_invalidate_entry(entry);
kfree(forget);
- return err;
+ return ERR_PTR(err);
}
-static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
- dev_t rdev)
+static int create_new_nondir(struct mnt_idmap *idmap, struct fuse_mount *fm,
+ struct fuse_args *args, struct inode *dir,
+ struct dentry *entry, umode_t mode)
+{
+ /*
+ * Note that when creating anything other than a directory we
+ * can be sure create_new_entry() will NOT return an alternate
+ * dentry as d_splice_alias() only returns an alternate dentry
+ * for directories. So we don't need to check for that case
+ * when passing back the result.
+ */
+ WARN_ON_ONCE(S_ISDIR(mode));
+
+ return PTR_ERR(create_new_entry(idmap, fm, args, dir, entry, mode));
+}
+
+static int fuse_mknod(struct mnt_idmap *idmap, struct inode *dir,
+ struct dentry *entry, umode_t mode, dev_t rdev)
{
struct fuse_mknod_in inarg;
- struct fuse_conn *fc = get_fuse_conn(dir);
+ struct fuse_mount *fm = get_fuse_mount(dir);
FUSE_ARGS(args);
- if (!fc->dont_mask)
+ if (!fm->fc->dont_mask)
mode &= ~current_umask();
memset(&inarg, 0, sizeof(inarg));
inarg.mode = mode;
inarg.rdev = new_encode_dev(rdev);
inarg.umask = current_umask();
- args.in.h.opcode = FUSE_MKNOD;
- args.in.numargs = 2;
- args.in.args[0].size = sizeof(inarg);
- args.in.args[0].value = &inarg;
- args.in.args[1].size = entry->d_name.len + 1;
- args.in.args[1].value = entry->d_name.name;
- return create_new_entry(fc, &args, dir, entry, mode);
+ args.opcode = FUSE_MKNOD;
+ args.in_numargs = 2;
+ args.in_args[0].size = sizeof(inarg);
+ args.in_args[0].value = &inarg;
+ args.in_args[1].size = entry->d_name.len + 1;
+ args.in_args[1].value = entry->d_name.name;
+ return create_new_nondir(idmap, fm, &args, dir, entry, mode);
}
-static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode,
- bool excl)
+static int fuse_create(struct mnt_idmap *idmap, struct inode *dir,
+ struct dentry *entry, umode_t mode, bool excl)
{
- return fuse_mknod(dir, entry, mode, 0);
+ return fuse_mknod(idmap, dir, entry, mode, 0);
}
-static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
+static int fuse_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
+ struct file *file, umode_t mode)
{
- struct fuse_mkdir_in inarg;
struct fuse_conn *fc = get_fuse_conn(dir);
+ int err;
+
+ if (fc->no_tmpfile)
+ return -EOPNOTSUPP;
+
+ err = fuse_create_open(idmap, dir, file->f_path.dentry, file,
+ file->f_flags, mode, FUSE_TMPFILE);
+ if (err == -ENOSYS) {
+ fc->no_tmpfile = 1;
+ err = -EOPNOTSUPP;
+ }
+ return err;
+}
+
+static struct dentry *fuse_mkdir(struct mnt_idmap *idmap, struct inode *dir,
+ struct dentry *entry, umode_t mode)
+{
+ struct fuse_mkdir_in inarg;
+ struct fuse_mount *fm = get_fuse_mount(dir);
FUSE_ARGS(args);
- if (!fc->dont_mask)
+ if (!fm->fc->dont_mask)
mode &= ~current_umask();
memset(&inarg, 0, sizeof(inarg));
inarg.mode = mode;
inarg.umask = current_umask();
- args.in.h.opcode = FUSE_MKDIR;
- args.in.numargs = 2;
- args.in.args[0].size = sizeof(inarg);
- args.in.args[0].value = &inarg;
- args.in.args[1].size = entry->d_name.len + 1;
- args.in.args[1].value = entry->d_name.name;
- return create_new_entry(fc, &args, dir, entry, S_IFDIR);
+ args.opcode = FUSE_MKDIR;
+ args.in_numargs = 2;
+ args.in_args[0].size = sizeof(inarg);
+ args.in_args[0].value = &inarg;
+ args.in_args[1].size = entry->d_name.len + 1;
+ args.in_args[1].value = entry->d_name.name;
+ return create_new_entry(idmap, fm, &args, dir, entry, S_IFDIR);
}
-static int fuse_symlink(struct inode *dir, struct dentry *entry,
- const char *link)
+static int fuse_symlink(struct mnt_idmap *idmap, struct inode *dir,
+ struct dentry *entry, const char *link)
{
- struct fuse_conn *fc = get_fuse_conn(dir);
+ struct fuse_mount *fm = get_fuse_mount(dir);
unsigned len = strlen(link) + 1;
FUSE_ARGS(args);
- args.in.h.opcode = FUSE_SYMLINK;
- args.in.numargs = 2;
- args.in.args[0].size = entry->d_name.len + 1;
- args.in.args[0].value = entry->d_name.name;
- args.in.args[1].size = len;
- args.in.args[1].value = link;
- return create_new_entry(fc, &args, dir, entry, S_IFLNK);
+ args.opcode = FUSE_SYMLINK;
+ args.in_numargs = 3;
+ fuse_set_zero_arg0(&args);
+ args.in_args[1].size = entry->d_name.len + 1;
+ args.in_args[1].value = entry->d_name.name;
+ args.in_args[2].size = len;
+ args.in_args[2].value = link;
+ return create_new_nondir(idmap, fm, &args, dir, entry, S_IFLNK);
}
-void fuse_update_ctime(struct inode *inode)
+void fuse_flush_time_update(struct inode *inode)
+{
+ int err = sync_inode_metadata(inode, 1);
+
+ mapping_set_error(inode->i_mapping, err);
+}
+
+static void fuse_update_ctime_in_cache(struct inode *inode)
{
if (!IS_NOCMTIME(inode)) {
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
mark_inode_dirty_sync(inode);
+ fuse_flush_time_update(inode);
}
}
+void fuse_update_ctime(struct inode *inode)
+{
+ fuse_invalidate_attr_mask(inode, STATX_CTIME);
+ fuse_update_ctime_in_cache(inode);
+}
+
+static void fuse_entry_unlinked(struct dentry *entry)
+{
+ struct inode *inode = d_inode(entry);
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ spin_lock(&fi->lock);
+ fi->attr_version = atomic64_inc_return(&fc->attr_version);
+ /*
+ * If i_nlink == 0 then unlink doesn't make sense, yet this can
+ * happen if userspace filesystem is careless. It would be
+ * difficult to enforce correct nlink usage so just ignore this
+ * condition here
+ */
+ if (S_ISDIR(inode->i_mode))
+ clear_nlink(inode);
+ else if (inode->i_nlink > 0)
+ drop_nlink(inode);
+ spin_unlock(&fi->lock);
+ fuse_invalidate_entry_cache(entry);
+ fuse_update_ctime(inode);
+}
+
static int fuse_unlink(struct inode *dir, struct dentry *entry)
{
int err;
- struct fuse_conn *fc = get_fuse_conn(dir);
+ struct fuse_mount *fm = get_fuse_mount(dir);
FUSE_ARGS(args);
- args.in.h.opcode = FUSE_UNLINK;
- args.in.h.nodeid = get_node_id(dir);
- args.in.numargs = 1;
- args.in.args[0].size = entry->d_name.len + 1;
- args.in.args[0].value = entry->d_name.name;
- err = fuse_simple_request(fc, &args);
- if (!err) {
- struct inode *inode = d_inode(entry);
- struct fuse_inode *fi = get_fuse_inode(inode);
+ if (fuse_is_bad(dir))
+ return -EIO;
- spin_lock(&fc->lock);
- fi->attr_version = ++fc->attr_version;
- /*
- * If i_nlink == 0 then unlink doesn't make sense, yet this can
- * happen if userspace filesystem is careless. It would be
- * difficult to enforce correct nlink usage so just ignore this
- * condition here
- */
- if (inode->i_nlink > 0)
- drop_nlink(inode);
- spin_unlock(&fc->lock);
- fuse_invalidate_attr(inode);
+ args.opcode = FUSE_UNLINK;
+ args.nodeid = get_node_id(dir);
+ args.in_numargs = 2;
+ fuse_set_zero_arg0(&args);
+ args.in_args[1].size = entry->d_name.len + 1;
+ args.in_args[1].value = entry->d_name.name;
+ err = fuse_simple_request(fm, &args);
+ if (!err) {
fuse_dir_changed(dir);
- fuse_invalidate_entry_cache(entry);
- fuse_update_ctime(inode);
- } else if (err == -EINTR)
+ fuse_entry_unlinked(entry);
+ } else if (err == -EINTR || err == -ENOENT)
fuse_invalidate_entry(entry);
return err;
}
@@ -694,67 +1217,64 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
static int fuse_rmdir(struct inode *dir, struct dentry *entry)
{
int err;
- struct fuse_conn *fc = get_fuse_conn(dir);
+ struct fuse_mount *fm = get_fuse_mount(dir);
FUSE_ARGS(args);
- args.in.h.opcode = FUSE_RMDIR;
- args.in.h.nodeid = get_node_id(dir);
- args.in.numargs = 1;
- args.in.args[0].size = entry->d_name.len + 1;
- args.in.args[0].value = entry->d_name.name;
- err = fuse_simple_request(fc, &args);
+ if (fuse_is_bad(dir))
+ return -EIO;
+
+ args.opcode = FUSE_RMDIR;
+ args.nodeid = get_node_id(dir);
+ args.in_numargs = 2;
+ fuse_set_zero_arg0(&args);
+ args.in_args[1].size = entry->d_name.len + 1;
+ args.in_args[1].value = entry->d_name.name;
+ err = fuse_simple_request(fm, &args);
if (!err) {
- clear_nlink(d_inode(entry));
fuse_dir_changed(dir);
- fuse_invalidate_entry_cache(entry);
- } else if (err == -EINTR)
+ fuse_entry_unlinked(entry);
+ } else if (err == -EINTR || err == -ENOENT)
fuse_invalidate_entry(entry);
return err;
}
-static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
+static int fuse_rename_common(struct mnt_idmap *idmap, struct inode *olddir, struct dentry *oldent,
struct inode *newdir, struct dentry *newent,
unsigned int flags, int opcode, size_t argsize)
{
int err;
struct fuse_rename2_in inarg;
- struct fuse_conn *fc = get_fuse_conn(olddir);
+ struct fuse_mount *fm = get_fuse_mount(olddir);
FUSE_ARGS(args);
memset(&inarg, 0, argsize);
inarg.newdir = get_node_id(newdir);
inarg.flags = flags;
- args.in.h.opcode = opcode;
- args.in.h.nodeid = get_node_id(olddir);
- args.in.numargs = 3;
- args.in.args[0].size = argsize;
- args.in.args[0].value = &inarg;
- args.in.args[1].size = oldent->d_name.len + 1;
- args.in.args[1].value = oldent->d_name.name;
- args.in.args[2].size = newent->d_name.len + 1;
- args.in.args[2].value = newent->d_name.name;
- err = fuse_simple_request(fc, &args);
+ args.opcode = opcode;
+ args.nodeid = get_node_id(olddir);
+ args.in_numargs = 3;
+ args.in_args[0].size = argsize;
+ args.in_args[0].value = &inarg;
+ args.in_args[1].size = oldent->d_name.len + 1;
+ args.in_args[1].value = oldent->d_name.name;
+ args.in_args[2].size = newent->d_name.len + 1;
+ args.in_args[2].value = newent->d_name.name;
+ err = fuse_simple_idmap_request(idmap, fm, &args);
if (!err) {
/* ctime changes */
- fuse_invalidate_attr(d_inode(oldent));
fuse_update_ctime(d_inode(oldent));
- if (flags & RENAME_EXCHANGE) {
- fuse_invalidate_attr(d_inode(newent));
+ if (flags & RENAME_EXCHANGE)
fuse_update_ctime(d_inode(newent));
- }
fuse_dir_changed(olddir);
if (olddir != newdir)
fuse_dir_changed(newdir);
/* newent will end up negative */
- if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent)) {
- fuse_invalidate_attr(d_inode(newent));
- fuse_invalidate_entry_cache(newent);
- fuse_update_ctime(d_inode(newent));
- }
- } else if (err == -EINTR) {
+ if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent))
+ fuse_entry_unlinked(newent);
+ } else if (err == -EINTR || err == -ENOENT) {
/* If request was interrupted, DEITY only knows if the
rename actually took place. If the invalidation
fails (e.g. some process has CWD under the renamed
@@ -768,21 +1288,25 @@ static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
return err;
}
-static int fuse_rename2(struct inode *olddir, struct dentry *oldent,
- struct inode *newdir, struct dentry *newent,
- unsigned int flags)
+static int fuse_rename2(struct mnt_idmap *idmap, struct inode *olddir,
+ struct dentry *oldent, struct inode *newdir,
+ struct dentry *newent, unsigned int flags)
{
struct fuse_conn *fc = get_fuse_conn(olddir);
int err;
- if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
+ if (fuse_is_bad(olddir))
+ return -EIO;
+
+ if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
return -EINVAL;
if (flags) {
if (fc->no_rename2 || fc->minor < 23)
return -EINVAL;
- err = fuse_rename_common(olddir, oldent, newdir, newent, flags,
+ err = fuse_rename_common((flags & RENAME_WHITEOUT) ? idmap : &invalid_mnt_idmap,
+ olddir, oldent, newdir, newent, flags,
FUSE_RENAME2,
sizeof(struct fuse_rename2_in));
if (err == -ENOSYS) {
@@ -790,7 +1314,7 @@ static int fuse_rename2(struct inode *olddir, struct dentry *oldent,
err = -EINVAL;
}
} else {
- err = fuse_rename_common(olddir, oldent, newdir, newent, 0,
+ err = fuse_rename_common(&invalid_mnt_idmap, olddir, oldent, newdir, newent, 0,
FUSE_RENAME,
sizeof(struct fuse_rename_in));
}
@@ -804,60 +1328,51 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
int err;
struct fuse_link_in inarg;
struct inode *inode = d_inode(entry);
- struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_mount *fm = get_fuse_mount(inode);
FUSE_ARGS(args);
+ if (fm->fc->no_link)
+ goto out;
+
memset(&inarg, 0, sizeof(inarg));
inarg.oldnodeid = get_node_id(inode);
- args.in.h.opcode = FUSE_LINK;
- args.in.numargs = 2;
- args.in.args[0].size = sizeof(inarg);
- args.in.args[0].value = &inarg;
- args.in.args[1].size = newent->d_name.len + 1;
- args.in.args[1].value = newent->d_name.name;
- err = create_new_entry(fc, &args, newdir, newent, inode->i_mode);
- /* Contrary to "normal" filesystems it can happen that link
- makes two "logical" inodes point to the same "physical"
- inode. We invalidate the attributes of the old one, so it
- will reflect changes in the backing inode (link count,
- etc.)
- */
- if (!err) {
- struct fuse_inode *fi = get_fuse_inode(inode);
-
- spin_lock(&fc->lock);
- fi->attr_version = ++fc->attr_version;
- inc_nlink(inode);
- spin_unlock(&fc->lock);
- fuse_invalidate_attr(inode);
- fuse_update_ctime(inode);
- } else if (err == -EINTR) {
+ args.opcode = FUSE_LINK;
+ args.in_numargs = 2;
+ args.in_args[0].size = sizeof(inarg);
+ args.in_args[0].value = &inarg;
+ args.in_args[1].size = newent->d_name.len + 1;
+ args.in_args[1].value = newent->d_name.name;
+ err = create_new_nondir(&invalid_mnt_idmap, fm, &args, newdir, newent, inode->i_mode);
+ if (!err)
+ fuse_update_ctime_in_cache(inode);
+ else if (err == -EINTR)
fuse_invalidate_attr(inode);
- }
+
+ if (err == -ENOSYS)
+ fm->fc->no_link = 1;
+out:
+ if (fm->fc->no_link)
+ return -EPERM;
+
return err;
}
-static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
- struct kstat *stat)
+static void fuse_fillattr(struct mnt_idmap *idmap, struct inode *inode,
+ struct fuse_attr *attr, struct kstat *stat)
{
unsigned int blkbits;
struct fuse_conn *fc = get_fuse_conn(inode);
-
- /* see the comment in fuse_change_attributes() */
- if (fc->writeback_cache && S_ISREG(inode->i_mode)) {
- attr->size = i_size_read(inode);
- attr->mtime = inode->i_mtime.tv_sec;
- attr->mtimensec = inode->i_mtime.tv_nsec;
- attr->ctime = inode->i_ctime.tv_sec;
- attr->ctimensec = inode->i_ctime.tv_nsec;
- }
+ vfsuid_t vfsuid = make_vfsuid(idmap, fc->user_ns,
+ make_kuid(fc->user_ns, attr->uid));
+ vfsgid_t vfsgid = make_vfsgid(idmap, fc->user_ns,
+ make_kgid(fc->user_ns, attr->gid));
stat->dev = inode->i_sb->s_dev;
stat->ino = attr->ino;
stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
stat->nlink = attr->nlink;
- stat->uid = make_kuid(fc->user_ns, attr->uid);
- stat->gid = make_kgid(fc->user_ns, attr->gid);
+ stat->uid = vfsuid_into_kuid(vfsuid);
+ stat->gid = vfsgid_into_kgid(vfsgid);
stat->rdev = inode->i_rdev;
stat->atime.tv_sec = attr->atime;
stat->atime.tv_nsec = attr->atimensec;
@@ -876,17 +1391,98 @@ static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
stat->blksize = 1 << blkbits;
}
-static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
- struct file *file)
+static void fuse_statx_to_attr(struct fuse_statx *sx, struct fuse_attr *attr)
+{
+ memset(attr, 0, sizeof(*attr));
+ attr->ino = sx->ino;
+ attr->size = sx->size;
+ attr->blocks = sx->blocks;
+ attr->atime = sx->atime.tv_sec;
+ attr->mtime = sx->mtime.tv_sec;
+ attr->ctime = sx->ctime.tv_sec;
+ attr->atimensec = sx->atime.tv_nsec;
+ attr->mtimensec = sx->mtime.tv_nsec;
+ attr->ctimensec = sx->ctime.tv_nsec;
+ attr->mode = sx->mode;
+ attr->nlink = sx->nlink;
+ attr->uid = sx->uid;
+ attr->gid = sx->gid;
+ attr->rdev = new_encode_dev(MKDEV(sx->rdev_major, sx->rdev_minor));
+ attr->blksize = sx->blksize;
+}
+
+static int fuse_do_statx(struct mnt_idmap *idmap, struct inode *inode,
+ struct file *file, struct kstat *stat)
+{
+ int err;
+ struct fuse_attr attr;
+ struct fuse_statx *sx;
+ struct fuse_statx_in inarg;
+ struct fuse_statx_out outarg;
+ struct fuse_mount *fm = get_fuse_mount(inode);
+ u64 attr_version = fuse_get_attr_version(fm->fc);
+ FUSE_ARGS(args);
+
+ memset(&inarg, 0, sizeof(inarg));
+ memset(&outarg, 0, sizeof(outarg));
+ /* Directories have separate file-handle space */
+ if (file && S_ISREG(inode->i_mode)) {
+ struct fuse_file *ff = file->private_data;
+
+ inarg.getattr_flags |= FUSE_GETATTR_FH;
+ inarg.fh = ff->fh;
+ }
+ /* For now leave sync hints as the default, request all stats. */
+ inarg.sx_flags = 0;
+ inarg.sx_mask = STATX_BASIC_STATS | STATX_BTIME;
+ args.opcode = FUSE_STATX;
+ args.nodeid = get_node_id(inode);
+ args.in_numargs = 1;
+ args.in_args[0].size = sizeof(inarg);
+ args.in_args[0].value = &inarg;
+ args.out_numargs = 1;
+ args.out_args[0].size = sizeof(outarg);
+ args.out_args[0].value = &outarg;
+ err = fuse_simple_request(fm, &args);
+ if (err)
+ return err;
+
+ sx = &outarg.stat;
+ if (((sx->mask & STATX_SIZE) && !fuse_valid_size(sx->size)) ||
+ ((sx->mask & STATX_TYPE) && (!fuse_valid_type(sx->mode) ||
+ inode_wrong_type(inode, sx->mode)))) {
+ fuse_make_bad(inode);
+ return -EIO;
+ }
+
+ fuse_statx_to_attr(&outarg.stat, &attr);
+ if ((sx->mask & STATX_BASIC_STATS) == STATX_BASIC_STATS) {
+ fuse_change_attributes(inode, &attr, &outarg.stat,
+ ATTR_TIMEOUT(&outarg), attr_version);
+ }
+
+ if (stat) {
+ stat->result_mask = sx->mask & (STATX_BASIC_STATS | STATX_BTIME);
+ stat->btime.tv_sec = sx->btime.tv_sec;
+ stat->btime.tv_nsec = min_t(u32, sx->btime.tv_nsec, NSEC_PER_SEC - 1);
+ fuse_fillattr(idmap, inode, &attr, stat);
+ stat->result_mask |= STATX_TYPE;
+ }
+
+ return 0;
+}
+
+static int fuse_do_getattr(struct mnt_idmap *idmap, struct inode *inode,
+ struct kstat *stat, struct file *file)
{
int err;
struct fuse_getattr_in inarg;
struct fuse_attr_out outarg;
- struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_mount *fm = get_fuse_mount(inode);
FUSE_ARGS(args);
u64 attr_version;
- attr_version = fuse_get_attr_version(fc);
+ attr_version = fuse_get_attr_version(fm->fc);
memset(&inarg, 0, sizeof(inarg));
memset(&outarg, 0, sizeof(outarg));
@@ -897,97 +1493,123 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
inarg.getattr_flags |= FUSE_GETATTR_FH;
inarg.fh = ff->fh;
}
- args.in.h.opcode = FUSE_GETATTR;
- args.in.h.nodeid = get_node_id(inode);
- args.in.numargs = 1;
- args.in.args[0].size = sizeof(inarg);
- args.in.args[0].value = &inarg;
- args.out.numargs = 1;
- args.out.args[0].size = sizeof(outarg);
- args.out.args[0].value = &outarg;
- err = fuse_simple_request(fc, &args);
+ args.opcode = FUSE_GETATTR;
+ args.nodeid = get_node_id(inode);
+ args.in_numargs = 1;
+ args.in_args[0].size = sizeof(inarg);
+ args.in_args[0].value = &inarg;
+ args.out_numargs = 1;
+ args.out_args[0].size = sizeof(outarg);
+ args.out_args[0].value = &outarg;
+ err = fuse_simple_request(fm, &args);
if (!err) {
- if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
- make_bad_inode(inode);
+ if (fuse_invalid_attr(&outarg.attr) ||
+ inode_wrong_type(inode, outarg.attr.mode)) {
+ fuse_make_bad(inode);
err = -EIO;
} else {
- fuse_change_attributes(inode, &outarg.attr,
- attr_timeout(&outarg),
+ fuse_change_attributes(inode, &outarg.attr, NULL,
+ ATTR_TIMEOUT(&outarg),
attr_version);
if (stat)
- fuse_fillattr(inode, &outarg.attr, stat);
+ fuse_fillattr(idmap, inode, &outarg.attr, stat);
}
}
return err;
}
-static int fuse_update_get_attr(struct inode *inode, struct file *file,
- struct kstat *stat, u32 request_mask,
- unsigned int flags)
+static int fuse_update_get_attr(struct mnt_idmap *idmap, struct inode *inode,
+ struct file *file, struct kstat *stat,
+ u32 request_mask, unsigned int flags)
{
struct fuse_inode *fi = get_fuse_inode(inode);
+ struct fuse_conn *fc = get_fuse_conn(inode);
int err = 0;
bool sync;
+ u32 inval_mask = READ_ONCE(fi->inval_mask);
+ u32 cache_mask = fuse_get_cache_mask(inode);
+
- if (flags & AT_STATX_FORCE_SYNC)
+ /* FUSE only supports basic stats and possibly btime */
+ request_mask &= STATX_BASIC_STATS | STATX_BTIME;
+retry:
+ if (fc->no_statx)
+ request_mask &= STATX_BASIC_STATS;
+
+ if (!request_mask)
+ sync = false;
+ else if (flags & AT_STATX_FORCE_SYNC)
sync = true;
else if (flags & AT_STATX_DONT_SYNC)
sync = false;
- else if (request_mask & READ_ONCE(fi->inval_mask))
+ else if (request_mask & inval_mask & ~cache_mask)
sync = true;
else
sync = time_before64(fi->i_time, get_jiffies_64());
if (sync) {
forget_all_cached_acls(inode);
- err = fuse_do_getattr(inode, stat, file);
+ /* Try statx if BTIME is requested */
+ if (!fc->no_statx && (request_mask & ~STATX_BASIC_STATS)) {
+ err = fuse_do_statx(idmap, inode, file, stat);
+ if (err == -ENOSYS) {
+ fc->no_statx = 1;
+ err = 0;
+ goto retry;
+ }
+ } else {
+ err = fuse_do_getattr(idmap, inode, stat, file);
+ }
} else if (stat) {
- generic_fillattr(inode, stat);
+ generic_fillattr(idmap, request_mask, inode, stat);
stat->mode = fi->orig_i_mode;
stat->ino = fi->orig_ino;
+ stat->blksize = 1 << fi->cached_i_blkbits;
+ if (test_bit(FUSE_I_BTIME, &fi->state)) {
+ stat->btime = fi->i_btime;
+ stat->result_mask |= STATX_BTIME;
+ }
}
return err;
}
-int fuse_update_attributes(struct inode *inode, struct file *file)
+int fuse_update_attributes(struct inode *inode, struct file *file, u32 mask)
{
- /* Do *not* need to get atime for internal purposes */
- return fuse_update_get_attr(inode, file, NULL,
- STATX_BASIC_STATS & ~STATX_ATIME, 0);
+ return fuse_update_get_attr(&nop_mnt_idmap, inode, file, NULL, mask, 0);
}
-int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
- u64 child_nodeid, struct qstr *name)
+int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid,
+ u64 child_nodeid, struct qstr *name, u32 flags)
{
int err = -ENOTDIR;
struct inode *parent;
struct dentry *dir;
struct dentry *entry;
- parent = ilookup5(sb, parent_nodeid, fuse_inode_eq, &parent_nodeid);
+ parent = fuse_ilookup(fc, parent_nodeid, NULL);
if (!parent)
return -ENOENT;
- inode_lock(parent);
if (!S_ISDIR(parent->i_mode))
- goto unlock;
+ goto put_parent;
err = -ENOENT;
dir = d_find_alias(parent);
if (!dir)
- goto unlock;
+ goto put_parent;
- name->hash = full_name_hash(dir, name->name, name->len);
- entry = d_lookup(dir, name);
+ entry = start_removing_noperm(dir, name);
dput(dir);
- if (!entry)
- goto unlock;
+ if (IS_ERR(entry))
+ goto put_parent;
fuse_dir_changed(parent);
- fuse_invalidate_entry(entry);
+ if (!(flags & FUSE_EXPIRE_ONLY))
+ d_invalidate(entry);
+ fuse_invalidate_entry_cache(entry);
- if (child_nodeid != 0 && d_really_is_positive(entry)) {
+ if (child_nodeid != 0) {
inode_lock(d_inode(entry));
if (get_node_id(d_inode(entry)) != child_nodeid) {
err = -ENOENT;
@@ -1015,14 +1637,25 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
} else {
err = 0;
}
- dput(entry);
- unlock:
- inode_unlock(parent);
+ end_removing(entry);
+ put_parent:
iput(parent);
return err;
}
+static inline bool fuse_permissible_uidgid(struct fuse_conn *fc)
+{
+ const struct cred *cred = current_cred();
+
+ return (uid_eq(cred->euid, fc->user_id) &&
+ uid_eq(cred->suid, fc->user_id) &&
+ uid_eq(cred->uid, fc->user_id) &&
+ gid_eq(cred->egid, fc->group_id) &&
+ gid_eq(cred->sgid, fc->group_id) &&
+ gid_eq(cred->gid, fc->group_id));
+}
+
/*
* Calling into a user-controlled filesystem gives the filesystem
* daemon ptrace-like capabilities over the current process. This
@@ -1036,47 +1669,51 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
* for which the owner of the mount has ptrace privilege. This
* excludes processes started by other users, suid or sgid processes.
*/
-int fuse_allow_current_process(struct fuse_conn *fc)
+bool fuse_allow_current_process(struct fuse_conn *fc)
{
- const struct cred *cred;
+ bool allow;
if (fc->allow_other)
- return current_in_userns(fc->user_ns);
+ allow = current_in_userns(fc->user_ns);
+ else
+ allow = fuse_permissible_uidgid(fc);
- cred = current_cred();
- if (uid_eq(cred->euid, fc->user_id) &&
- uid_eq(cred->suid, fc->user_id) &&
- uid_eq(cred->uid, fc->user_id) &&
- gid_eq(cred->egid, fc->group_id) &&
- gid_eq(cred->sgid, fc->group_id) &&
- gid_eq(cred->gid, fc->group_id))
- return 1;
+ if (!allow && allow_sys_admin_access && capable(CAP_SYS_ADMIN))
+ allow = true;
- return 0;
+ return allow;
}
static int fuse_access(struct inode *inode, int mask)
{
- struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_mount *fm = get_fuse_mount(inode);
FUSE_ARGS(args);
struct fuse_access_in inarg;
int err;
BUG_ON(mask & MAY_NOT_BLOCK);
- if (fc->no_access)
+ /*
+ * We should not send FUSE_ACCESS to the userspace
+ * when idmapped mounts are enabled as for this case
+ * we have fc->default_permissions = 1 and access
+ * permission checks are done on the kernel side.
+ */
+ WARN_ON_ONCE(!(fm->sb->s_iflags & SB_I_NOIDMAP));
+
+ if (fm->fc->no_access)
return 0;
memset(&inarg, 0, sizeof(inarg));
inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
- args.in.h.opcode = FUSE_ACCESS;
- args.in.h.nodeid = get_node_id(inode);
- args.in.numargs = 1;
- args.in.args[0].size = sizeof(inarg);
- args.in.args[0].value = &inarg;
- err = fuse_simple_request(fc, &args);
+ args.opcode = FUSE_ACCESS;
+ args.nodeid = get_node_id(inode);
+ args.in_numargs = 1;
+ args.in_args[0].size = sizeof(inarg);
+ args.in_args[0].value = &inarg;
+ err = fuse_simple_request(fm, &args);
if (err == -ENOSYS) {
- fc->no_access = 1;
+ fm->fc->no_access = 1;
err = 0;
}
return err;
@@ -1088,7 +1725,7 @@ static int fuse_perm_getattr(struct inode *inode, int mask)
return -ECHILD;
forget_all_cached_acls(inode);
- return fuse_do_getattr(inode, NULL, NULL);
+ return fuse_do_getattr(&nop_mnt_idmap, inode, NULL, NULL);
}
/*
@@ -1096,7 +1733,7 @@ static int fuse_perm_getattr(struct inode *inode, int mask)
*
* 1) Local access checking ('default_permissions' mount option) based
* on file mode. This is the plain old disk filesystem permission
- * modell.
+ * model.
*
* 2) "Remote" access checking, where server is responsible for
* checking permission in each inode operation. An exception to this
@@ -1104,12 +1741,16 @@ static int fuse_perm_getattr(struct inode *inode, int mask)
* access request is sent. Execute permission is still checked
* locally based on file mode.
*/
-static int fuse_permission(struct inode *inode, int mask)
+static int fuse_permission(struct mnt_idmap *idmap,
+ struct inode *inode, int mask)
{
struct fuse_conn *fc = get_fuse_conn(inode);
bool refreshed = false;
int err = 0;
+ if (fuse_is_bad(inode))
+ return -EIO;
+
if (!fuse_allow_current_process(fc))
return -EACCES;
@@ -1132,7 +1773,7 @@ static int fuse_permission(struct inode *inode, int mask)
}
if (fc->default_permissions) {
- err = generic_permission(inode, mask);
+ err = generic_permission(idmap, inode, mask);
/* If permission is denied, try to refresh file
attributes. This is also needed, because the root
@@ -1140,7 +1781,8 @@ static int fuse_permission(struct inode *inode, int mask)
if (err == -EACCES && !refreshed) {
err = fuse_perm_getattr(inode, mask);
if (!err)
- err = generic_permission(inode, mask);
+ err = generic_permission(idmap,
+ inode, mask);
}
/* Note: the opposite of the above test does not
@@ -1162,75 +1804,73 @@ static int fuse_permission(struct inode *inode, int mask)
return err;
}
-static int fuse_readlink_page(struct inode *inode, struct page *page)
+static int fuse_readlink_folio(struct inode *inode, struct folio *folio)
{
- struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_req *req;
- int err;
+ struct fuse_mount *fm = get_fuse_mount(inode);
+ struct fuse_folio_desc desc = { .length = folio_size(folio) - 1 };
+ struct fuse_args_pages ap = {
+ .num_folios = 1,
+ .folios = &folio,
+ .descs = &desc,
+ };
+ char *link;
+ ssize_t res;
- req = fuse_get_req(fc, 1);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
- req->out.page_zeroing = 1;
- req->out.argpages = 1;
- req->num_pages = 1;
- req->pages[0] = page;
- req->page_descs[0].length = PAGE_SIZE - 1;
- req->in.h.opcode = FUSE_READLINK;
- req->in.h.nodeid = get_node_id(inode);
- req->out.argvar = 1;
- req->out.numargs = 1;
- req->out.args[0].size = PAGE_SIZE - 1;
- fuse_request_send(fc, req);
- err = req->out.h.error;
+ ap.args.opcode = FUSE_READLINK;
+ ap.args.nodeid = get_node_id(inode);
+ ap.args.out_pages = true;
+ ap.args.out_argvar = true;
+ ap.args.page_zeroing = true;
+ ap.args.out_numargs = 1;
+ ap.args.out_args[0].size = desc.length;
+ res = fuse_simple_request(fm, &ap.args);
- if (!err) {
- char *link = page_address(page);
- size_t len = req->out.args[0].size;
+ fuse_invalidate_atime(inode);
- BUG_ON(len >= PAGE_SIZE);
- link[len] = '\0';
- }
+ if (res < 0)
+ return res;
- fuse_put_request(fc, req);
- fuse_invalidate_atime(inode);
+ if (WARN_ON(res >= PAGE_SIZE))
+ return -EIO;
- return err;
+ link = folio_address(folio);
+ link[res] = '\0';
+
+ return 0;
}
static const char *fuse_get_link(struct dentry *dentry, struct inode *inode,
struct delayed_call *callback)
{
struct fuse_conn *fc = get_fuse_conn(inode);
- struct page *page;
+ struct folio *folio;
int err;
err = -EIO;
- if (is_bad_inode(inode))
+ if (fuse_is_bad(inode))
goto out_err;
if (fc->cache_symlinks)
- return page_get_link(dentry, inode, callback);
+ return page_get_link_raw(dentry, inode, callback);
err = -ECHILD;
if (!dentry)
goto out_err;
- page = alloc_page(GFP_KERNEL);
+ folio = folio_alloc(GFP_KERNEL, 0);
err = -ENOMEM;
- if (!page)
+ if (!folio)
goto out_err;
- err = fuse_readlink_page(inode, page);
+ err = fuse_readlink_folio(inode, folio);
if (err) {
- __free_page(page);
+ folio_put(folio);
goto out_err;
}
- set_delayed_call(callback, page_put_link, page);
+ set_delayed_call(callback, page_put_link, folio);
- return page_address(page);
+ return folio_address(folio);
out_err:
return ERR_PTR(err);
@@ -1238,7 +1878,32 @@ out_err:
static int fuse_dir_open(struct inode *inode, struct file *file)
{
- return fuse_open_common(inode, file, true);
+ struct fuse_mount *fm = get_fuse_mount(inode);
+ int err;
+
+ if (fuse_is_bad(inode))
+ return -EIO;
+
+ err = generic_file_open(inode, file);
+ if (err)
+ return err;
+
+ err = fuse_do_open(fm, get_node_id(inode), file, true);
+ if (!err) {
+ struct fuse_file *ff = file->private_data;
+
+ /*
+ * Keep handling FOPEN_STREAM and FOPEN_NONSEEKABLE for
+ * directories for backward compatibility, though it's unlikely
+ * to be useful.
+ */
+ if (ff->open_flags & (FOPEN_STREAM | FOPEN_NONSEEKABLE))
+ nonseekable_open(inode, file);
+ if (!(ff->open_flags & FOPEN_KEEP_CACHE))
+ invalidate_inode_pages2(inode->i_mapping);
+ }
+
+ return err;
}
static int fuse_dir_release(struct inode *inode, struct file *file)
@@ -1255,7 +1920,7 @@ static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end,
struct fuse_conn *fc = get_fuse_conn(inode);
int err;
- if (is_bad_inode(inode))
+ if (fuse_is_bad(inode))
return -EIO;
if (fc->no_fsyncdir)
@@ -1314,17 +1979,29 @@ static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
return true;
}
-static void iattr_to_fattr(struct fuse_conn *fc, struct iattr *iattr,
- struct fuse_setattr_in *arg, bool trust_local_cmtime)
+static void iattr_to_fattr(struct mnt_idmap *idmap, struct fuse_conn *fc,
+ struct iattr *iattr, struct fuse_setattr_in *arg,
+ bool trust_local_cmtime)
{
unsigned ivalid = iattr->ia_valid;
if (ivalid & ATTR_MODE)
arg->valid |= FATTR_MODE, arg->mode = iattr->ia_mode;
- if (ivalid & ATTR_UID)
- arg->valid |= FATTR_UID, arg->uid = from_kuid(fc->user_ns, iattr->ia_uid);
- if (ivalid & ATTR_GID)
- arg->valid |= FATTR_GID, arg->gid = from_kgid(fc->user_ns, iattr->ia_gid);
+
+ if (ivalid & ATTR_UID) {
+ kuid_t fsuid = from_vfsuid(idmap, fc->user_ns, iattr->ia_vfsuid);
+
+ arg->valid |= FATTR_UID;
+ arg->uid = from_kuid(fc->user_ns, fsuid);
+ }
+
+ if (ivalid & ATTR_GID) {
+ kgid_t fsgid = from_vfsgid(idmap, fc->user_ns, iattr->ia_vfsgid);
+
+ arg->valid |= FATTR_GID;
+ arg->gid = from_kgid(fc->user_ns, fsgid);
+ }
+
if (ivalid & ATTR_SIZE)
arg->valid |= FATTR_SIZE, arg->size = iattr->ia_size;
if (ivalid & ATTR_ATIME) {
@@ -1356,15 +2033,14 @@ static void iattr_to_fattr(struct fuse_conn *fc, struct iattr *iattr,
*/
void fuse_set_nowrite(struct inode *inode)
{
- struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
BUG_ON(!inode_is_locked(inode));
- spin_lock(&fc->lock);
+ spin_lock(&fi->lock);
BUG_ON(fi->writectr < 0);
fi->writectr += FUSE_NOWRITE;
- spin_unlock(&fc->lock);
+ spin_unlock(&fi->lock);
wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE);
}
@@ -1385,11 +2061,11 @@ static void __fuse_release_nowrite(struct inode *inode)
void fuse_release_nowrite(struct inode *inode)
{
- struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
- spin_lock(&fc->lock);
+ spin_lock(&fi->lock);
__fuse_release_nowrite(inode);
- spin_unlock(&fc->lock);
+ spin_unlock(&fi->lock);
}
static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
@@ -1397,14 +2073,14 @@ static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
struct fuse_setattr_in *inarg_p,
struct fuse_attr_out *outarg_p)
{
- args->in.h.opcode = FUSE_SETATTR;
- args->in.h.nodeid = get_node_id(inode);
- args->in.numargs = 1;
- args->in.args[0].size = sizeof(*inarg_p);
- args->in.args[0].value = inarg_p;
- args->out.numargs = 1;
- args->out.args[0].size = sizeof(*outarg_p);
- args->out.args[0].value = outarg_p;
+ args->opcode = FUSE_SETATTR;
+ args->nodeid = get_node_id(inode);
+ args->in_numargs = 1;
+ args->in_args[0].size = sizeof(*inarg_p);
+ args->in_args[0].value = inarg_p;
+ args->out_numargs = 1;
+ args->out_args[0].size = sizeof(*outarg_p);
+ args->out_args[0].value = outarg_p;
}
/*
@@ -1412,7 +2088,7 @@ static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
*/
int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
{
- struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_mount *fm = get_fuse_mount(inode);
FUSE_ARGS(args);
struct fuse_setattr_in inarg;
struct fuse_attr_out outarg;
@@ -1421,20 +2097,20 @@ int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
memset(&outarg, 0, sizeof(outarg));
inarg.valid = FATTR_MTIME;
- inarg.mtime = inode->i_mtime.tv_sec;
- inarg.mtimensec = inode->i_mtime.tv_nsec;
- if (fc->minor >= 23) {
+ inarg.mtime = inode_get_mtime_sec(inode);
+ inarg.mtimensec = inode_get_mtime_nsec(inode);
+ if (fm->fc->minor >= 23) {
inarg.valid |= FATTR_CTIME;
- inarg.ctime = inode->i_ctime.tv_sec;
- inarg.ctimensec = inode->i_ctime.tv_nsec;
+ inarg.ctime = inode_get_ctime_sec(inode);
+ inarg.ctimensec = inode_get_ctime_nsec(inode);
}
if (ff) {
inarg.valid |= FATTR_FH;
inarg.fh = ff->fh;
}
- fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
+ fuse_setattr_fill(fm->fc, &args, inode, &inarg, &outarg);
- return fuse_simple_request(fc, &args);
+ return fuse_simple_request(fm, &args);
}
/*
@@ -1445,28 +2121,48 @@ int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
* vmtruncate() doesn't allow for this case, so do the rlimit checking
* and the actual truncation by hand.
*/
-int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
- struct file *file)
+int fuse_do_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
+ struct iattr *attr, struct file *file)
{
struct inode *inode = d_inode(dentry);
- struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_mount *fm = get_fuse_mount(inode);
+ struct fuse_conn *fc = fm->fc;
struct fuse_inode *fi = get_fuse_inode(inode);
+ struct address_space *mapping = inode->i_mapping;
FUSE_ARGS(args);
struct fuse_setattr_in inarg;
struct fuse_attr_out outarg;
bool is_truncate = false;
- bool is_wb = fc->writeback_cache;
+ bool is_wb = fc->writeback_cache && S_ISREG(inode->i_mode);
loff_t oldsize;
int err;
- bool trust_local_cmtime = is_wb && S_ISREG(inode->i_mode);
+ bool trust_local_cmtime = is_wb;
+ bool fault_blocked = false;
+ u64 attr_version;
if (!fc->default_permissions)
attr->ia_valid |= ATTR_FORCE;
- err = setattr_prepare(dentry, attr);
+ err = setattr_prepare(idmap, dentry, attr);
if (err)
return err;
+ if (attr->ia_valid & ATTR_SIZE) {
+ if (WARN_ON(!S_ISREG(inode->i_mode)))
+ return -EIO;
+ is_truncate = true;
+ }
+
+ if (FUSE_IS_DAX(inode) && is_truncate) {
+ filemap_invalidate_lock(mapping);
+ fault_blocked = true;
+ err = fuse_dax_break_layouts(inode, 0, -1);
+ if (err) {
+ filemap_invalidate_unlock(mapping);
+ return err;
+ }
+ }
+
if (attr->ia_valid & ATTR_OPEN) {
/* This is coming from open(..., ... | O_TRUNC); */
WARN_ON(!(attr->ia_valid & ATTR_SIZE));
@@ -1479,15 +2175,22 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
*/
i_size_write(inode, 0);
truncate_pagecache(inode, 0);
- return 0;
+ goto out;
}
file = NULL;
}
- if (attr->ia_valid & ATTR_SIZE) {
- if (WARN_ON(!S_ISREG(inode->i_mode)))
- return -EIO;
- is_truncate = true;
+ /* Flush dirty data/metadata before non-truncate SETATTR */
+ if (is_wb &&
+ attr->ia_valid &
+ (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_MTIME_SET |
+ ATTR_TIMES_SET)) {
+ err = write_inode_now(inode, true);
+ if (err)
+ return err;
+
+ fuse_set_nowrite(inode);
+ fuse_release_nowrite(inode);
}
if (is_truncate) {
@@ -1499,65 +2202,91 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
memset(&inarg, 0, sizeof(inarg));
memset(&outarg, 0, sizeof(outarg));
- iattr_to_fattr(fc, attr, &inarg, trust_local_cmtime);
+ iattr_to_fattr(idmap, fc, attr, &inarg, trust_local_cmtime);
if (file) {
struct fuse_file *ff = file->private_data;
inarg.valid |= FATTR_FH;
inarg.fh = ff->fh;
}
+
+ /* Kill suid/sgid for non-directory chown unconditionally */
+ if (fc->handle_killpriv_v2 && !S_ISDIR(inode->i_mode) &&
+ attr->ia_valid & (ATTR_UID | ATTR_GID))
+ inarg.valid |= FATTR_KILL_SUIDGID;
+
if (attr->ia_valid & ATTR_SIZE) {
/* For mandatory locking in truncate */
inarg.valid |= FATTR_LOCKOWNER;
inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
+
+ /* Kill suid/sgid for truncate only if no CAP_FSETID */
+ if (fc->handle_killpriv_v2 && !capable(CAP_FSETID))
+ inarg.valid |= FATTR_KILL_SUIDGID;
}
+
+ attr_version = fuse_get_attr_version(fm->fc);
fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
- err = fuse_simple_request(fc, &args);
+ err = fuse_simple_request(fm, &args);
if (err) {
if (err == -EINTR)
fuse_invalidate_attr(inode);
goto error;
}
- if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
- make_bad_inode(inode);
+ if (fuse_invalid_attr(&outarg.attr) ||
+ inode_wrong_type(inode, outarg.attr.mode)) {
+ fuse_make_bad(inode);
err = -EIO;
goto error;
}
- spin_lock(&fc->lock);
+ spin_lock(&fi->lock);
/* the kernel maintains i_mtime locally */
if (trust_local_cmtime) {
if (attr->ia_valid & ATTR_MTIME)
- inode->i_mtime = attr->ia_mtime;
+ inode_set_mtime_to_ts(inode, attr->ia_mtime);
if (attr->ia_valid & ATTR_CTIME)
- inode->i_ctime = attr->ia_ctime;
+ inode_set_ctime_to_ts(inode, attr->ia_ctime);
/* FIXME: clear I_DIRTY_SYNC? */
}
- fuse_change_attributes_common(inode, &outarg.attr,
- attr_timeout(&outarg));
+ if (fi->attr_version > attr_version) {
+ /*
+ * Apply attributes, for example for fsnotify_change(), but set
+ * attribute timeout to zero.
+ */
+ outarg.attr_valid = outarg.attr_valid_nsec = 0;
+ }
+
+ fuse_change_attributes_common(inode, &outarg.attr, NULL,
+ ATTR_TIMEOUT(&outarg),
+ fuse_get_cache_mask(inode), 0);
oldsize = inode->i_size;
/* see the comment in fuse_change_attributes() */
- if (!is_wb || is_truncate || !S_ISREG(inode->i_mode))
+ if (!is_wb || is_truncate)
i_size_write(inode, outarg.attr.size);
if (is_truncate) {
- /* NOTE: this may release/reacquire fc->lock */
+ /* NOTE: this may release/reacquire fi->lock */
__fuse_release_nowrite(inode);
}
- spin_unlock(&fc->lock);
+ spin_unlock(&fi->lock);
/*
* Only call invalidate_inode_pages2() after removing
- * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
+ * FUSE_NOWRITE, otherwise fuse_launder_folio() would deadlock.
*/
if ((is_truncate || !is_wb) &&
S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
truncate_pagecache(inode, outarg.attr.size);
- invalidate_inode_pages2(inode->i_mapping);
+ invalidate_inode_pages2(mapping);
}
clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
+out:
+ if (fault_blocked)
+ filemap_invalidate_unlock(mapping);
+
return 0;
error:
@@ -1565,16 +2294,23 @@ error:
fuse_release_nowrite(inode);
clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
+
+ if (fault_blocked)
+ filemap_invalidate_unlock(mapping);
return err;
}
-static int fuse_setattr(struct dentry *entry, struct iattr *attr)
+static int fuse_setattr(struct mnt_idmap *idmap, struct dentry *entry,
+ struct iattr *attr)
{
struct inode *inode = d_inode(entry);
struct fuse_conn *fc = get_fuse_conn(inode);
struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL;
int ret;
+ if (fuse_is_bad(inode))
+ return -EIO;
+
if (!fuse_allow_current_process(get_fuse_conn(inode)))
return -EACCES;
@@ -1588,12 +2324,12 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr)
*
* This should be done on write(), truncate() and chown().
*/
- if (!fc->handle_killpriv) {
+ if (!fc->handle_killpriv && !fc->handle_killpriv_v2) {
/*
* ia_mode calculation may have used stale i_mode.
* Refresh and recalculate.
*/
- ret = fuse_do_getattr(inode, NULL, file);
+ ret = fuse_do_getattr(idmap, inode, NULL, file);
if (ret)
return ret;
@@ -1611,7 +2347,7 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr)
if (!attr->ia_valid)
return 0;
- ret = fuse_do_setattr(entry, attr, file);
+ ret = fuse_do_setattr(idmap, entry, attr, file);
if (!ret) {
/*
* If filesystem supports acls it may have updated acl xattrs in
@@ -1627,16 +2363,30 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr)
return ret;
}
-static int fuse_getattr(const struct path *path, struct kstat *stat,
+static int fuse_getattr(struct mnt_idmap *idmap,
+ const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int flags)
{
struct inode *inode = d_inode(path->dentry);
struct fuse_conn *fc = get_fuse_conn(inode);
- if (!fuse_allow_current_process(fc))
+ if (fuse_is_bad(inode))
+ return -EIO;
+
+ if (!fuse_allow_current_process(fc)) {
+ if (!request_mask) {
+ /*
+ * If user explicitly requested *nothing* then don't
+ * error out, but return st_dev only.
+ */
+ stat->result_mask = 0;
+ stat->dev = inode->i_sb->s_dev;
+ return 0;
+ }
return -EACCES;
+ }
- return fuse_update_get_attr(inode, NULL, stat, request_mask, flags);
+ return fuse_update_get_attr(idmap, inode, NULL, stat, request_mask, flags);
}
static const struct inode_operations fuse_dir_inode_operations = {
@@ -1650,12 +2400,16 @@ static const struct inode_operations fuse_dir_inode_operations = {
.setattr = fuse_setattr,
.create = fuse_create,
.atomic_open = fuse_atomic_open,
+ .tmpfile = fuse_tmpfile,
.mknod = fuse_mknod,
.permission = fuse_permission,
.getattr = fuse_getattr,
.listxattr = fuse_listxattr,
+ .get_inode_acl = fuse_get_inode_acl,
.get_acl = fuse_get_acl,
.set_acl = fuse_set_acl,
+ .fileattr_get = fuse_fileattr_get,
+ .fileattr_set = fuse_fileattr_set,
};
static const struct file_operations fuse_dir_operations = {
@@ -1667,6 +2421,7 @@ static const struct file_operations fuse_dir_operations = {
.fsync = fuse_dir_fsync,
.unlocked_ioctl = fuse_dir_ioctl,
.compat_ioctl = fuse_dir_compat_ioctl,
+ .setlease = simple_nosetlease,
};
static const struct inode_operations fuse_common_inode_operations = {
@@ -1674,8 +2429,11 @@ static const struct inode_operations fuse_common_inode_operations = {
.permission = fuse_permission,
.getattr = fuse_getattr,
.listxattr = fuse_listxattr,
+ .get_inode_acl = fuse_get_inode_acl,
.get_acl = fuse_get_acl,
.set_acl = fuse_set_acl,
+ .fileattr_get = fuse_fileattr_get,
+ .fileattr_set = fuse_fileattr_set,
};
static const struct inode_operations fuse_symlink_inode_operations = {
@@ -1704,20 +2462,20 @@ void fuse_init_dir(struct inode *inode)
fi->rdc.version = 0;
}
-static int fuse_symlink_readpage(struct file *null, struct page *page)
+static int fuse_symlink_read_folio(struct file *null, struct folio *folio)
{
- int err = fuse_readlink_page(page->mapping->host, page);
+ int err = fuse_readlink_folio(folio->mapping->host, folio);
if (!err)
- SetPageUptodate(page);
+ folio_mark_uptodate(folio);
- unlock_page(page);
+ folio_unlock(folio);
return err;
}
static const struct address_space_operations fuse_symlink_aops = {
- .readpage = fuse_symlink_readpage,
+ .read_folio = fuse_symlink_read_folio,
};
void fuse_init_symlink(struct inode *inode)