summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/autofs/dev-ioctl.c22
-rw-r--r--fs/autofs/inode.c2
-rw-r--r--fs/autofs/root.c11
-rw-r--r--fs/binfmt_misc.c69
-rw-r--r--fs/btrfs/compression.c4
-rw-r--r--fs/btrfs/verity.c2
-rw-r--r--fs/configfs/dir.c12
-rw-r--r--fs/configfs/file.c2
-rw-r--r--fs/configfs/inode.c3
-rw-r--r--fs/configfs/mount.c2
-rw-r--r--fs/dax.c2
-rw-r--r--fs/dcache.c129
-rw-r--r--fs/debugfs/inode.c53
-rw-r--r--fs/debugfs/internal.h13
-rw-r--r--fs/devpts/inode.c57
-rw-r--r--fs/efivarfs/inode.c7
-rw-r--r--fs/efivarfs/super.c5
-rw-r--r--fs/erofs/zdata.c2
-rw-r--r--fs/exfat/balloc.c30
-rw-r--r--fs/exfat/dir.c5
-rw-r--r--fs/exfat/exfat_fs.h5
-rw-r--r--fs/exfat/fatent.c6
-rw-r--r--fs/exfat/file.c5
-rw-r--r--fs/exfat/namei.c20
-rw-r--r--fs/exfat/super.c30
-rw-r--r--fs/f2fs/compress.c2
-rw-r--r--fs/fuse/control.c38
-rw-r--r--fs/fuse/dev.c9
-rw-r--r--fs/fuse/dev_uring.c12
-rw-r--r--fs/fuse/dir.c248
-rw-r--r--fs/fuse/file.c37
-rw-r--r--fs/fuse/fuse_dev_i.h1
-rw-r--r--fs/fuse/fuse_i.h28
-rw-r--r--fs/fuse/inode.c44
-rw-r--r--fs/hostfs/hostfs.h34
-rw-r--r--fs/hugetlbfs/inode.c61
-rw-r--r--fs/inode.c3
-rw-r--r--fs/internal.h1
-rw-r--r--fs/kernfs/dir.c5
-rw-r--r--fs/kernfs/mount.c1
-rw-r--r--fs/libfs.c52
-rw-r--r--fs/nfs/localio.c48
-rw-r--r--fs/nfsd/nfsctl.c18
-rw-r--r--fs/ntfs3/file.c2
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c8
-rw-r--r--fs/overlayfs/dir.c20
-rw-r--r--fs/overlayfs/super.c12
-rw-r--r--fs/pipe.c28
-rw-r--r--fs/proc/base.c6
-rw-r--r--fs/proc/inode.c2
-rw-r--r--fs/proc/internal.h1
-rw-r--r--fs/proc/root.c14
-rw-r--r--fs/proc/self.c10
-rw-r--r--fs/proc/task_mmu.c314
-rw-r--r--fs/proc/thread_self.c11
-rw-r--r--fs/pstore/inode.c7
-rw-r--r--fs/pstore/ram.c2
-rw-r--r--fs/ramfs/file-mmu.c2
-rw-r--r--fs/ramfs/inode.c8
-rw-r--r--fs/resctrl/pseudo_lock.c20
-rw-r--r--fs/super.c10
-rw-r--r--fs/sysfs/group.c10
-rw-r--r--fs/tracefs/event_inode.c7
-rw-r--r--fs/tracefs/inode.c13
-rw-r--r--fs/userfaultfd.c117
65 files changed, 1054 insertions, 710 deletions
diff --git a/fs/autofs/dev-ioctl.c b/fs/autofs/dev-ioctl.c
index a58f9248b0f5..6743b3b64217 100644
--- a/fs/autofs/dev-ioctl.c
+++ b/fs/autofs/dev-ioctl.c
@@ -432,16 +432,6 @@ static int autofs_dev_ioctl_timeout(struct file *fp,
if (!autofs_type_indirect(sbi->type))
return -EINVAL;
- /* An expire timeout greater than the superblock timeout
- * could be a problem at shutdown but the super block
- * timeout itself can change so all we can really do is
- * warn the user.
- */
- if (timeout >= sbi->exp_timeout)
- pr_warn("per-mount expire timeout is greater than "
- "the parent autofs mount timeout which could "
- "prevent shutdown\n");
-
dentry = try_lookup_noperm(&QSTR_LEN(param->path, path_len),
base);
if (IS_ERR_OR_NULL(dentry))
@@ -470,6 +460,18 @@ static int autofs_dev_ioctl_timeout(struct file *fp,
ino->flags |= AUTOFS_INF_EXPIRE_SET;
ino->exp_timeout = timeout * HZ;
}
+
+ /* An expire timeout greater than the superblock timeout
+ * could be a problem at shutdown but the super block
+ * timeout itself can change so all we can really do is
+ * warn the user.
+ */
+ if (ino->flags & AUTOFS_INF_EXPIRE_SET &&
+ ino->exp_timeout > sbi->exp_timeout)
+ pr_warn("per-mount expire timeout is greater than "
+ "the parent autofs mount timeout which could "
+ "prevent shutdown\n");
+
dput(dentry);
}
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index 732aee76a24c..b932b1719dfc 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -55,7 +55,7 @@ void autofs_kill_sb(struct super_block *sb)
}
pr_debug("shutting down\n");
- kill_litter_super(sb);
+ kill_anon_super(sb);
if (sbi)
kfree_rcu(sbi, rcu);
}
diff --git a/fs/autofs/root.c b/fs/autofs/root.c
index d10df9d89d1c..2c31002b314a 100644
--- a/fs/autofs/root.c
+++ b/fs/autofs/root.c
@@ -602,9 +602,8 @@ static int autofs_dir_symlink(struct mnt_idmap *idmap,
}
inode->i_private = cp;
inode->i_size = size;
- d_add(dentry, inode);
- dget(dentry);
+ d_make_persistent(dentry, inode);
p_ino = autofs_dentry_ino(dentry->d_parent);
p_ino->count++;
@@ -631,12 +630,11 @@ static int autofs_dir_symlink(struct mnt_idmap *idmap,
static int autofs_dir_unlink(struct inode *dir, struct dentry *dentry)
{
struct autofs_sb_info *sbi = autofs_sbi(dir->i_sb);
- struct autofs_info *ino = autofs_dentry_ino(dentry);
struct autofs_info *p_ino;
p_ino = autofs_dentry_ino(dentry->d_parent);
p_ino->count--;
- dput(ino->dentry);
+ d_make_discardable(dentry);
d_inode(dentry)->i_size = 0;
clear_nlink(d_inode(dentry));
@@ -718,7 +716,7 @@ static int autofs_dir_rmdir(struct inode *dir, struct dentry *dentry)
p_ino = autofs_dentry_ino(dentry->d_parent);
p_ino->count--;
- dput(ino->dentry);
+ d_make_discardable(dentry);
d_inode(dentry)->i_size = 0;
clear_nlink(d_inode(dentry));
@@ -748,12 +746,11 @@ static struct dentry *autofs_dir_mkdir(struct mnt_idmap *idmap,
inode = autofs_get_inode(dir->i_sb, S_IFDIR | mode);
if (!inode)
return ERR_PTR(-ENOMEM);
- d_add(dentry, inode);
if (sbi->version < 5)
autofs_set_leaf_automount_flags(dentry);
- dget(dentry);
+ d_make_persistent(dentry, inode);
p_ino = autofs_dentry_ino(dentry->d_parent);
p_ino->count++;
inc_nlink(dir);
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index d7aec5b87c2b..8cb1a94339b8 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -765,14 +765,41 @@ static const struct file_operations bm_entry_operations = {
/* /register */
+/* add to filesystem */
+static int add_entry(Node *e, struct super_block *sb)
+{
+ struct dentry *dentry = simple_start_creating(sb->s_root, e->name);
+ struct inode *inode;
+ struct binfmt_misc *misc;
+
+ if (IS_ERR(dentry))
+ return PTR_ERR(dentry);
+
+ inode = bm_get_inode(sb, S_IFREG | 0644);
+ if (unlikely(!inode)) {
+ simple_done_creating(dentry);
+ return -ENOMEM;
+ }
+
+ refcount_set(&e->users, 1);
+ e->dentry = dentry;
+ inode->i_private = e;
+ inode->i_fop = &bm_entry_operations;
+
+ d_make_persistent(dentry, inode);
+ misc = i_binfmt_misc(inode);
+ write_lock(&misc->entries_lock);
+ list_add(&e->list, &misc->entries);
+ write_unlock(&misc->entries_lock);
+ simple_done_creating(dentry);
+ return 0;
+}
+
static ssize_t bm_register_write(struct file *file, const char __user *buffer,
size_t count, loff_t *ppos)
{
Node *e;
- struct inode *inode;
struct super_block *sb = file_inode(file)->i_sb;
- struct dentry *root = sb->s_root, *dentry;
- struct binfmt_misc *misc;
int err = 0;
struct file *f = NULL;
@@ -800,39 +827,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
e->interp_file = f;
}
- inode_lock(d_inode(root));
- dentry = lookup_noperm(&QSTR(e->name), root);
- err = PTR_ERR(dentry);
- if (IS_ERR(dentry))
- goto out;
-
- err = -EEXIST;
- if (d_really_is_positive(dentry))
- goto out2;
-
- inode = bm_get_inode(sb, S_IFREG | 0644);
-
- err = -ENOMEM;
- if (!inode)
- goto out2;
-
- refcount_set(&e->users, 1);
- e->dentry = dget(dentry);
- inode->i_private = e;
- inode->i_fop = &bm_entry_operations;
-
- d_instantiate(dentry, inode);
- misc = i_binfmt_misc(inode);
- write_lock(&misc->entries_lock);
- list_add(&e->list, &misc->entries);
- write_unlock(&misc->entries_lock);
-
- err = 0;
-out2:
- dput(dentry);
-out:
- inode_unlock(d_inode(root));
-
+ err = add_entry(e, sb);
if (err) {
if (f) {
exe_file_allow_write_access(f);
@@ -1027,7 +1022,7 @@ static struct file_system_type bm_fs_type = {
.name = "binfmt_misc",
.init_fs_context = bm_init_fs_context,
.fs_flags = FS_USERNS_MOUNT,
- .kill_sb = kill_litter_super,
+ .kill_sb = kill_anon_super,
};
MODULE_ALIAS_FS("binfmt_misc");
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 7dda6cc68379..6b3357287b42 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -475,8 +475,8 @@ static noinline int add_ra_bio_pages(struct inode *inode,
continue;
}
- folio = filemap_alloc_folio(mapping_gfp_constraint(mapping,
- ~__GFP_FS), 0);
+ folio = filemap_alloc_folio(mapping_gfp_constraint(mapping, ~__GFP_FS),
+ 0, NULL);
if (!folio)
break;
diff --git a/fs/btrfs/verity.c b/fs/btrfs/verity.c
index 06dfcb461f53..a2ac3fb68bc8 100644
--- a/fs/btrfs/verity.c
+++ b/fs/btrfs/verity.c
@@ -736,7 +736,7 @@ again:
}
folio = filemap_alloc_folio(mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS),
- 0);
+ 0, NULL);
if (!folio)
return ERR_PTR(-ENOMEM);
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 81f4f06bc87e..ba95f636a5ab 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -400,8 +400,14 @@ static void remove_dir(struct dentry * d)
configfs_remove_dirent(d);
- if (d_really_is_positive(d))
- simple_rmdir(d_inode(parent),d);
+ if (d_really_is_positive(d)) {
+ if (likely(simple_empty(d))) {
+ __simple_rmdir(d_inode(parent),d);
+ dput(d);
+ } else {
+ pr_warn("remove_dir (%pd): attributes remain", d);
+ }
+ }
pr_debug(" o %pd removing done (%d)\n", d, d_count(d));
@@ -598,7 +604,7 @@ static void detach_attrs(struct config_item * item)
static int populate_attrs(struct config_item *item)
{
const struct config_item_type *t = item->ci_type;
- struct configfs_group_operations *ops;
+ const struct configfs_group_operations *ops;
struct configfs_attribute *attr;
struct configfs_bin_attribute *bin_attr;
int error = 0;
diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index 0ad32150611e..affe4742bbb5 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -30,7 +30,7 @@ struct configfs_buffer {
size_t count;
loff_t pos;
char * page;
- struct configfs_item_operations * ops;
+ const struct configfs_item_operations *ops;
struct mutex mutex;
int needs_read_fill;
bool read_in_progress;
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 1d2e3a5738d1..bcda3372e141 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -211,7 +211,8 @@ void configfs_drop_dentry(struct configfs_dirent * sd, struct dentry * parent)
dget_dlock(dentry);
__d_drop(dentry);
spin_unlock(&dentry->d_lock);
- simple_unlink(d_inode(parent), dentry);
+ __simple_unlink(d_inode(parent), dentry);
+ dput(dentry);
} else
spin_unlock(&dentry->d_lock);
}
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index 456c4a2efb53..4929f3431189 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -116,7 +116,7 @@ static struct file_system_type configfs_fs_type = {
.owner = THIS_MODULE,
.name = "configfs",
.init_fs_context = configfs_init_fs_context,
- .kill_sb = kill_litter_super,
+ .kill_sb = kill_anon_super,
};
MODULE_ALIAS_FS("configfs");
diff --git a/fs/dax.c b/fs/dax.c
index 38fae11ee419..289e6254aa30 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -24,7 +24,7 @@
#include <linux/mmu_notifier.h>
#include <linux/iomap.h>
#include <linux/rmap.h>
-#include <asm/pgalloc.h>
+#include <linux/pgalloc.h>
#define CREATE_TRACE_POINTS
#include <trace/events/fs_dax.h>
diff --git a/fs/dcache.c b/fs/dcache.c
index 9143fd502def..dc2fff4811d1 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -870,6 +870,24 @@ locked:
return false;
}
+static void finish_dput(struct dentry *dentry)
+ __releases(dentry->d_lock)
+ __releases(RCU)
+{
+ while (lock_for_kill(dentry)) {
+ rcu_read_unlock();
+ dentry = __dentry_kill(dentry);
+ if (!dentry)
+ return;
+ if (retain_dentry(dentry, true)) {
+ spin_unlock(&dentry->d_lock);
+ return;
+ }
+ rcu_read_lock();
+ }
+ rcu_read_unlock();
+ spin_unlock(&dentry->d_lock);
+}
/*
* This is dput
@@ -907,22 +925,21 @@ void dput(struct dentry *dentry)
rcu_read_unlock();
return;
}
- while (lock_for_kill(dentry)) {
- rcu_read_unlock();
- dentry = __dentry_kill(dentry);
- if (!dentry)
- return;
- if (retain_dentry(dentry, true)) {
- spin_unlock(&dentry->d_lock);
- return;
- }
- rcu_read_lock();
- }
- rcu_read_unlock();
- spin_unlock(&dentry->d_lock);
+ finish_dput(dentry);
}
EXPORT_SYMBOL(dput);
+void d_make_discardable(struct dentry *dentry)
+{
+ spin_lock(&dentry->d_lock);
+ WARN_ON(!(dentry->d_flags & DCACHE_PERSISTENT));
+ dentry->d_flags &= ~DCACHE_PERSISTENT;
+ dentry->d_lockref.count--;
+ rcu_read_lock();
+ finish_dput(dentry);
+}
+EXPORT_SYMBOL(d_make_discardable);
+
static void to_shrink_list(struct dentry *dentry, struct list_head *list)
__must_hold(&dentry->d_lock)
{
@@ -1087,6 +1104,15 @@ struct dentry *d_find_alias_rcu(struct inode *inode)
return de;
}
+void d_dispose_if_unused(struct dentry *dentry, struct list_head *dispose)
+{
+ spin_lock(&dentry->d_lock);
+ if (!dentry->d_lockref.count)
+ to_shrink_list(dentry, dispose);
+ spin_unlock(&dentry->d_lock);
+}
+EXPORT_SYMBOL(d_dispose_if_unused);
+
/*
* Try to kill dentries associated with this inode.
* WARNING: you must own a reference to inode.
@@ -1097,12 +1123,8 @@ void d_prune_aliases(struct inode *inode)
struct dentry *dentry;
spin_lock(&inode->i_lock);
- hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
- spin_lock(&dentry->d_lock);
- if (!dentry->d_lockref.count)
- to_shrink_list(dentry, &dispose);
- spin_unlock(&dentry->d_lock);
- }
+ hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias)
+ d_dispose_if_unused(dentry, &dispose);
spin_unlock(&inode->i_lock);
shrink_dentry_list(&dispose);
}
@@ -1142,6 +1164,7 @@ void shrink_dentry_list(struct list_head *list)
shrink_kill(dentry);
}
}
+EXPORT_SYMBOL(shrink_dentry_list);
static enum lru_status dentry_lru_isolate(struct list_head *item,
struct list_lru_one *lru, void *arg)
@@ -1512,6 +1535,15 @@ out:
return ret;
}
+static enum d_walk_ret select_collect_umount(void *_data, struct dentry *dentry)
+{
+ if (dentry->d_flags & DCACHE_PERSISTENT) {
+ dentry->d_flags &= ~DCACHE_PERSISTENT;
+ dentry->d_lockref.count--;
+ }
+ return select_collect(_data, dentry);
+}
+
static enum d_walk_ret select_collect2(void *_data, struct dentry *dentry)
{
struct select_data *data = _data;
@@ -1540,18 +1572,20 @@ out:
}
/**
- * shrink_dcache_parent - prune dcache
+ * shrink_dcache_tree - prune dcache
* @parent: parent of entries to prune
+ * @for_umount: true if we want to unpin the persistent ones
*
* Prune the dcache to remove unused children of the parent dentry.
*/
-void shrink_dcache_parent(struct dentry *parent)
+static void shrink_dcache_tree(struct dentry *parent, bool for_umount)
{
for (;;) {
struct select_data data = {.start = parent};
INIT_LIST_HEAD(&data.dispose);
- d_walk(parent, &data, select_collect);
+ d_walk(parent, &data,
+ for_umount ? select_collect_umount : select_collect);
if (!list_empty(&data.dispose)) {
shrink_dentry_list(&data.dispose);
@@ -1576,6 +1610,11 @@ void shrink_dcache_parent(struct dentry *parent)
shrink_dentry_list(&data.dispose);
}
}
+
+void shrink_dcache_parent(struct dentry *parent)
+{
+ shrink_dcache_tree(parent, false);
+}
EXPORT_SYMBOL(shrink_dcache_parent);
static enum d_walk_ret umount_check(void *_data, struct dentry *dentry)
@@ -1602,7 +1641,7 @@ static enum d_walk_ret umount_check(void *_data, struct dentry *dentry)
static void do_one_tree(struct dentry *dentry)
{
- shrink_dcache_parent(dentry);
+ shrink_dcache_tree(dentry, true);
d_walk(dentry, dentry, umount_check);
d_drop(dentry);
dput(dentry);
@@ -1924,7 +1963,6 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
unsigned add_flags = d_flags_for_inode(inode);
WARN_ON(d_in_lookup(dentry));
- spin_lock(&dentry->d_lock);
/*
* The negative counter only tracks dentries on the LRU. Don't dec if
* d_lru is on another list.
@@ -1937,7 +1975,6 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
__d_set_inode_and_type(dentry, inode, add_flags);
raw_write_seqcount_end(&dentry->d_seq);
fsnotify_update_flags(dentry);
- spin_unlock(&dentry->d_lock);
}
/**
@@ -1961,7 +1998,9 @@ void d_instantiate(struct dentry *entry, struct inode * inode)
if (inode) {
security_d_instantiate(entry, inode);
spin_lock(&inode->i_lock);
+ spin_lock(&entry->d_lock);
__d_instantiate(entry, inode);
+ spin_unlock(&entry->d_lock);
spin_unlock(&inode->i_lock);
}
}
@@ -1980,7 +2019,9 @@ void d_instantiate_new(struct dentry *entry, struct inode *inode)
lockdep_annotate_inode_mutex_key(inode);
security_d_instantiate(entry, inode);
spin_lock(&inode->i_lock);
+ spin_lock(&entry->d_lock);
__d_instantiate(entry, inode);
+ spin_unlock(&entry->d_lock);
WARN_ON(!(inode_state_read(inode) & I_NEW));
inode_state_clear(inode, I_NEW | I_CREATING);
inode_wake_up_bit(inode, __I_NEW);
@@ -2742,6 +2783,24 @@ void d_add(struct dentry *entry, struct inode *inode)
}
EXPORT_SYMBOL(d_add);
+struct dentry *d_make_persistent(struct dentry *dentry, struct inode *inode)
+{
+ WARN_ON(!hlist_unhashed(&dentry->d_u.d_alias));
+ WARN_ON(!inode);
+ security_d_instantiate(dentry, inode);
+ spin_lock(&inode->i_lock);
+ spin_lock(&dentry->d_lock);
+ __d_instantiate(dentry, inode);
+ dentry->d_flags |= DCACHE_PERSISTENT;
+ dget_dlock(dentry);
+ if (d_unhashed(dentry))
+ __d_rehash(dentry);
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&inode->i_lock);
+ return dentry;
+}
+EXPORT_SYMBOL(d_make_persistent);
+
static void swap_names(struct dentry *dentry, struct dentry *target)
{
if (unlikely(dname_external(target))) {
@@ -3111,26 +3170,6 @@ bool is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
}
EXPORT_SYMBOL(is_subdir);
-static enum d_walk_ret d_genocide_kill(void *data, struct dentry *dentry)
-{
- struct dentry *root = data;
- if (dentry != root) {
- if (d_unhashed(dentry) || !dentry->d_inode)
- return D_WALK_SKIP;
-
- if (!(dentry->d_flags & DCACHE_GENOCIDE)) {
- dentry->d_flags |= DCACHE_GENOCIDE;
- dentry->d_lockref.count--;
- }
- }
- return D_WALK_CONTINUE;
-}
-
-void d_genocide(struct dentry *parent)
-{
- d_walk(parent, parent, d_genocide_kill);
-}
-
void d_mark_tmpfile(struct file *file, struct inode *inode)
{
struct dentry *dentry = file->f_path.dentry;
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 532bd7c46baf..4b263c328ed2 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -35,7 +35,7 @@
static struct vfsmount *debugfs_mount;
static int debugfs_mount_count;
static bool debugfs_registered;
-static unsigned int debugfs_allow __ro_after_init = DEFAULT_DEBUGFS_ALLOW_BITS;
+static bool debugfs_enabled __ro_after_init = IS_ENABLED(CONFIG_DEBUG_FS_ALLOW_ALL);
/*
* Don't allow access attributes to be changed whilst the kernel is locked down
@@ -287,9 +287,6 @@ static int debugfs_get_tree(struct fs_context *fc)
{
int err;
- if (!(debugfs_allow & DEBUGFS_ALLOW_API))
- return -EPERM;
-
err = get_tree_single(fc, debugfs_fill_super);
if (err)
return err;
@@ -329,7 +326,7 @@ static struct file_system_type debug_fs_type = {
.name = "debugfs",
.init_fs_context = debugfs_init_fs_context,
.parameters = debugfs_param_specs,
- .kill_sb = kill_litter_super,
+ .kill_sb = kill_anon_super,
};
MODULE_ALIAS_FS("debugfs");
@@ -368,7 +365,7 @@ static struct dentry *debugfs_start_creating(const char *name,
struct dentry *dentry;
int error;
- if (!(debugfs_allow & DEBUGFS_ALLOW_API))
+ if (!debugfs_enabled)
return ERR_PTR(-EPERM);
if (!debugfs_initialized())
@@ -405,16 +402,15 @@ static struct dentry *debugfs_start_creating(const char *name,
static struct dentry *debugfs_failed_creating(struct dentry *dentry)
{
- inode_unlock(d_inode(dentry->d_parent));
- dput(dentry);
+ simple_done_creating(dentry);
simple_release_fs(&debugfs_mount, &debugfs_mount_count);
return ERR_PTR(-ENOMEM);
}
static struct dentry *debugfs_end_creating(struct dentry *dentry)
{
- inode_unlock(d_inode(dentry->d_parent));
- return dentry;
+ simple_done_creating(dentry);
+ return dentry; // borrowed
}
static struct dentry *__debugfs_create_file(const char *name, umode_t mode,
@@ -434,11 +430,6 @@ static struct dentry *__debugfs_create_file(const char *name, umode_t mode,
if (IS_ERR(dentry))
return dentry;
- if (!(debugfs_allow & DEBUGFS_ALLOW_API)) {
- debugfs_failed_creating(dentry);
- return ERR_PTR(-EPERM);
- }
-
inode = debugfs_get_inode(dentry->d_sb);
if (unlikely(!inode)) {
pr_err("out of free dentries, can not create file '%s'\n",
@@ -456,7 +447,7 @@ static struct dentry *__debugfs_create_file(const char *name, umode_t mode,
DEBUGFS_I(inode)->raw = real_fops;
DEBUGFS_I(inode)->aux = (void *)aux;
- d_instantiate(dentry, inode);
+ d_make_persistent(dentry, inode);
fsnotify_create(d_inode(dentry->d_parent), dentry);
return debugfs_end_creating(dentry);
}
@@ -584,11 +575,6 @@ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent)
if (IS_ERR(dentry))
return dentry;
- if (!(debugfs_allow & DEBUGFS_ALLOW_API)) {
- debugfs_failed_creating(dentry);
- return ERR_PTR(-EPERM);
- }
-
inode = debugfs_get_inode(dentry->d_sb);
if (unlikely(!inode)) {
pr_err("out of free dentries, can not create directory '%s'\n",
@@ -602,7 +588,7 @@ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent)
/* directory inodes start off with i_nlink == 2 (for "." entry) */
inc_nlink(inode);
- d_instantiate(dentry, inode);
+ d_make_persistent(dentry, inode);
inc_nlink(d_inode(dentry->d_parent));
fsnotify_mkdir(d_inode(dentry->d_parent), dentry);
return debugfs_end_creating(dentry);
@@ -631,11 +617,6 @@ struct dentry *debugfs_create_automount(const char *name,
if (IS_ERR(dentry))
return dentry;
- if (!(debugfs_allow & DEBUGFS_ALLOW_API)) {
- debugfs_failed_creating(dentry);
- return ERR_PTR(-EPERM);
- }
-
inode = debugfs_get_inode(dentry->d_sb);
if (unlikely(!inode)) {
pr_err("out of free dentries, can not create automount '%s'\n",
@@ -649,7 +630,7 @@ struct dentry *debugfs_create_automount(const char *name,
DEBUGFS_I(inode)->automount = f;
/* directory inodes start off with i_nlink == 2 (for "." entry) */
inc_nlink(inode);
- d_instantiate(dentry, inode);
+ d_make_persistent(dentry, inode);
inc_nlink(d_inode(dentry->d_parent));
fsnotify_mkdir(d_inode(dentry->d_parent), dentry);
return debugfs_end_creating(dentry);
@@ -704,7 +685,7 @@ struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent,
inode->i_mode = S_IFLNK | S_IRWXUGO;
inode->i_op = &debugfs_symlink_inode_operations;
inode->i_link = link;
- d_instantiate(dentry, inode);
+ d_make_persistent(dentry, inode);
return debugfs_end_creating(dentry);
}
EXPORT_SYMBOL_GPL(debugfs_create_symlink);
@@ -899,21 +880,25 @@ static int __init debugfs_kernel(char *str)
{
if (str) {
if (!strcmp(str, "on"))
- debugfs_allow = DEBUGFS_ALLOW_API | DEBUGFS_ALLOW_MOUNT;
- else if (!strcmp(str, "no-mount"))
- debugfs_allow = DEBUGFS_ALLOW_API;
+ debugfs_enabled = true;
else if (!strcmp(str, "off"))
- debugfs_allow = 0;
+ debugfs_enabled = false;
+ else if (!strcmp(str, "no-mount")) {
+ pr_notice("debugfs=no-mount is a deprecated alias "
+ "for debugfs=off\n");
+ debugfs_enabled = false;
+ }
}
return 0;
}
early_param("debugfs", debugfs_kernel);
+
static int __init debugfs_init(void)
{
int retval;
- if (!(debugfs_allow & DEBUGFS_ALLOW_MOUNT))
+ if (!debugfs_enabled)
return -EPERM;
retval = sysfs_create_mount_point(kernel_kobj, "debug");
diff --git a/fs/debugfs/internal.h b/fs/debugfs/internal.h
index 427987f81571..c95699b27a56 100644
--- a/fs/debugfs/internal.h
+++ b/fs/debugfs/internal.h
@@ -55,17 +55,4 @@ enum {
HAS_IOCTL = 16
};
-#define DEBUGFS_ALLOW_API BIT(0)
-#define DEBUGFS_ALLOW_MOUNT BIT(1)
-
-#ifdef CONFIG_DEBUG_FS_ALLOW_ALL
-#define DEFAULT_DEBUGFS_ALLOW_BITS (DEBUGFS_ALLOW_MOUNT | DEBUGFS_ALLOW_API)
-#endif
-#ifdef CONFIG_DEBUG_FS_DISALLOW_MOUNT
-#define DEFAULT_DEBUGFS_ALLOW_BITS (DEBUGFS_ALLOW_API)
-#endif
-#ifdef CONFIG_DEBUG_FS_ALLOW_NONE
-#define DEFAULT_DEBUGFS_ALLOW_BITS (0)
-#endif
-
#endif /* _DEBUGFS_INTERNAL_H_ */
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index fdf22264a8e9..9f3de528c358 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -102,7 +102,7 @@ struct pts_fs_info {
struct ida allocated_ptys;
struct pts_mount_opts mount_opts;
struct super_block *sb;
- struct dentry *ptmx_dentry;
+ struct inode *ptmx_inode; // borrowed
};
static inline struct pts_fs_info *DEVPTS_SB(struct super_block *sb)
@@ -259,7 +259,6 @@ static int devpts_parse_param(struct fs_context *fc, struct fs_parameter *param)
static int mknod_ptmx(struct super_block *sb, struct fs_context *fc)
{
int mode;
- int rc = -ENOMEM;
struct dentry *dentry;
struct inode *inode;
struct dentry *root = sb->s_root;
@@ -268,18 +267,10 @@ static int mknod_ptmx(struct super_block *sb, struct fs_context *fc)
kuid_t ptmx_uid = current_fsuid();
kgid_t ptmx_gid = current_fsgid();
- inode_lock(d_inode(root));
-
- /* If we have already created ptmx node, return */
- if (fsi->ptmx_dentry) {
- rc = 0;
- goto out;
- }
-
- dentry = d_alloc_name(root, "ptmx");
- if (!dentry) {
+ dentry = simple_start_creating(root, "ptmx");
+ if (IS_ERR(dentry)) {
pr_err("Unable to alloc dentry for ptmx node\n");
- goto out;
+ return PTR_ERR(dentry);
}
/*
@@ -287,9 +278,9 @@ static int mknod_ptmx(struct super_block *sb, struct fs_context *fc)
*/
inode = new_inode(sb);
if (!inode) {
+ simple_done_creating(dentry);
pr_err("Unable to alloc inode for ptmx node\n");
- dput(dentry);
- goto out;
+ return -ENOMEM;
}
inode->i_ino = 2;
@@ -299,23 +290,18 @@ static int mknod_ptmx(struct super_block *sb, struct fs_context *fc)
init_special_inode(inode, mode, MKDEV(TTYAUX_MAJOR, 2));
inode->i_uid = ptmx_uid;
inode->i_gid = ptmx_gid;
+ fsi->ptmx_inode = inode;
- d_add(dentry, inode);
+ d_make_persistent(dentry, inode);
- fsi->ptmx_dentry = dentry;
- rc = 0;
-out:
- inode_unlock(d_inode(root));
- return rc;
+ simple_done_creating(dentry);
+
+ return 0;
}
static void update_ptmx_mode(struct pts_fs_info *fsi)
{
- struct inode *inode;
- if (fsi->ptmx_dentry) {
- inode = d_inode(fsi->ptmx_dentry);
- inode->i_mode = S_IFCHR|fsi->mount_opts.ptmxmode;
- }
+ fsi->ptmx_inode->i_mode = S_IFCHR|fsi->mount_opts.ptmxmode;
}
static int devpts_reconfigure(struct fs_context *fc)
@@ -461,7 +447,7 @@ static void devpts_kill_sb(struct super_block *sb)
if (fsi)
ida_destroy(&fsi->allocated_ptys);
kfree(fsi);
- kill_litter_super(sb);
+ kill_anon_super(sb);
}
static struct file_system_type devpts_fs_type = {
@@ -534,16 +520,15 @@ struct dentry *devpts_pty_new(struct pts_fs_info *fsi, int index, void *priv)
sprintf(s, "%d", index);
dentry = d_alloc_name(root, s);
- if (dentry) {
- dentry->d_fsdata = priv;
- d_add(dentry, inode);
- fsnotify_create(d_inode(root), dentry);
- } else {
+ if (!dentry) {
iput(inode);
- dentry = ERR_PTR(-ENOMEM);
+ return ERR_PTR(-ENOMEM);
}
-
- return dentry;
+ dentry->d_fsdata = priv;
+ d_make_persistent(dentry, inode);
+ fsnotify_create(d_inode(root), dentry);
+ dput(dentry);
+ return dentry; // borrowed
}
/**
@@ -573,7 +558,7 @@ void devpts_pty_kill(struct dentry *dentry)
drop_nlink(dentry->d_inode);
d_drop(dentry);
fsnotify_unlink(d_inode(dentry->d_parent), dentry);
- dput(dentry); /* d_alloc_name() in devpts_pty_new() */
+ d_make_discardable(dentry);
}
static int __init init_devpts_fs(void)
diff --git a/fs/efivarfs/inode.c b/fs/efivarfs/inode.c
index 2891614abf8d..95dcad83da11 100644
--- a/fs/efivarfs/inode.c
+++ b/fs/efivarfs/inode.c
@@ -113,8 +113,7 @@ static int efivarfs_create(struct mnt_idmap *idmap, struct inode *dir,
inode->i_private = var;
- d_instantiate(dentry, inode);
- dget(dentry);
+ d_make_persistent(dentry, inode);
return 0;
}
@@ -126,9 +125,7 @@ static int efivarfs_unlink(struct inode *dir, struct dentry *dentry)
if (efivar_entry_delete(var))
return -EINVAL;
- drop_nlink(d_inode(dentry));
- dput(dentry);
- return 0;
+ return simple_unlink(dir, dentry);
};
const struct inode_operations efivarfs_dir_inode_operations = {
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index 6de97565d5f7..9da992925920 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -278,7 +278,8 @@ static int efivarfs_create_dentry(struct super_block *sb, efi_char16_t *name16,
inode->i_private = entry;
i_size_write(inode, size + sizeof(__u32)); /* attributes + data */
inode_unlock(inode);
- d_add(dentry, inode);
+ d_make_persistent(dentry, inode);
+ dput(dentry);
return 0;
@@ -522,7 +523,7 @@ static void efivarfs_kill_sb(struct super_block *sb)
struct efivarfs_fs_info *sfi = sb->s_fs_info;
blocking_notifier_chain_unregister(&efivar_ops_nh, &sfi->nb);
- kill_litter_super(sb);
+ kill_anon_super(sb);
kfree(sfi);
}
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 461a929e0825..65da21504632 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -562,7 +562,7 @@ static void z_erofs_bind_cache(struct z_erofs_frontend *fe)
* Allocate a managed folio for cached I/O, or it may be
* then filled with a file-backed folio for in-place I/O
*/
- newfolio = filemap_alloc_folio(gfp, 0);
+ newfolio = filemap_alloc_folio(gfp, 0, NULL);
if (!newfolio)
continue;
newfolio->private = Z_EROFS_PREALLOCATED_FOLIO;
diff --git a/fs/exfat/balloc.c b/fs/exfat/balloc.c
index 2d2d510f2372..5429041c7eaf 100644
--- a/fs/exfat/balloc.c
+++ b/fs/exfat/balloc.c
@@ -106,7 +106,7 @@ static int exfat_allocate_bitmap(struct super_block *sb,
(PAGE_SHIFT - sb->s_blocksize_bits);
for (i = 0; i < sbi->map_sectors; i++) {
/* Trigger the next readahead in advance. */
- if (0 == (i % max_ra_count)) {
+ if (max_ra_count && 0 == (i % max_ra_count)) {
blk_start_plug(&plug);
for (j = i; j < min(max_ra_count, sbi->map_sectors - i) + i; j++)
sb_breadahead(sb, sector + j);
@@ -183,11 +183,10 @@ void exfat_free_bitmap(struct exfat_sb_info *sbi)
kvfree(sbi->vol_amap);
}
-int exfat_set_bitmap(struct inode *inode, unsigned int clu, bool sync)
+int exfat_set_bitmap(struct super_block *sb, unsigned int clu, bool sync)
{
int i, b;
unsigned int ent_idx;
- struct super_block *sb = inode->i_sb;
struct exfat_sb_info *sbi = EXFAT_SB(sb);
if (!is_valid_cluster(sbi, clu))
@@ -202,11 +201,10 @@ int exfat_set_bitmap(struct inode *inode, unsigned int clu, bool sync)
return 0;
}
-int exfat_clear_bitmap(struct inode *inode, unsigned int clu, bool sync)
+int exfat_clear_bitmap(struct super_block *sb, unsigned int clu, bool sync)
{
int i, b;
unsigned int ent_idx;
- struct super_block *sb = inode->i_sb;
struct exfat_sb_info *sbi = EXFAT_SB(sb);
if (!is_valid_cluster(sbi, clu))
@@ -226,6 +224,28 @@ int exfat_clear_bitmap(struct inode *inode, unsigned int clu, bool sync)
return 0;
}
+bool exfat_test_bitmap(struct super_block *sb, unsigned int clu)
+{
+ int i, b;
+ unsigned int ent_idx;
+ struct exfat_sb_info *sbi = EXFAT_SB(sb);
+
+ if (!sbi->vol_amap)
+ return true;
+
+ if (!is_valid_cluster(sbi, clu))
+ return false;
+
+ ent_idx = CLUSTER_TO_BITMAP_ENT(clu);
+ i = BITMAP_OFFSET_SECTOR_INDEX(sb, ent_idx);
+ b = BITMAP_OFFSET_BIT_IN_SECTOR(sb, ent_idx);
+
+ if (!test_bit_le(b, sbi->vol_amap[i]->b_data))
+ return false;
+
+ return true;
+}
+
/*
* If the value of "clu" is 0, it means cluster 2 which is the first cluster of
* the cluster heap.
diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c
index 7229146fe2bf..3045a58e124a 100644
--- a/fs/exfat/dir.c
+++ b/fs/exfat/dir.c
@@ -604,6 +604,11 @@ static int exfat_find_location(struct super_block *sb, struct exfat_chain *p_dir
if (ret)
return ret;
+ if (!exfat_test_bitmap(sb, clu)) {
+ exfat_err(sb, "failed to test cluster bit(%u)", clu);
+ return -EIO;
+ }
+
/* byte offset in cluster */
off = EXFAT_CLU_OFFSET(off, sbi);
diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h
index 38210fb6901c..176fef62574c 100644
--- a/fs/exfat/exfat_fs.h
+++ b/fs/exfat/exfat_fs.h
@@ -452,8 +452,9 @@ int exfat_count_num_clusters(struct super_block *sb,
/* balloc.c */
int exfat_load_bitmap(struct super_block *sb);
void exfat_free_bitmap(struct exfat_sb_info *sbi);
-int exfat_set_bitmap(struct inode *inode, unsigned int clu, bool sync);
-int exfat_clear_bitmap(struct inode *inode, unsigned int clu, bool sync);
+int exfat_set_bitmap(struct super_block *sb, unsigned int clu, bool sync);
+int exfat_clear_bitmap(struct super_block *sb, unsigned int clu, bool sync);
+bool exfat_test_bitmap(struct super_block *sb, unsigned int clu);
unsigned int exfat_find_free_bitmap(struct super_block *sb, unsigned int clu);
int exfat_count_used_clusters(struct super_block *sb, unsigned int *ret_count);
int exfat_trim_fs(struct inode *inode, struct fstrim_range *range);
diff --git a/fs/exfat/fatent.c b/fs/exfat/fatent.c
index 825083634ba2..c9c5f2e3a05e 100644
--- a/fs/exfat/fatent.c
+++ b/fs/exfat/fatent.c
@@ -205,7 +205,7 @@ static int __exfat_free_cluster(struct inode *inode, struct exfat_chain *p_chain
cur_cmap_i = next_cmap_i;
}
- err = exfat_clear_bitmap(inode, clu, (sync && IS_DIRSYNC(inode)));
+ err = exfat_clear_bitmap(sb, clu, (sync && IS_DIRSYNC(inode)));
if (err)
break;
clu++;
@@ -233,7 +233,7 @@ static int __exfat_free_cluster(struct inode *inode, struct exfat_chain *p_chain
cur_cmap_i = next_cmap_i;
}
- if (exfat_clear_bitmap(inode, clu, (sync && IS_DIRSYNC(inode))))
+ if (exfat_clear_bitmap(sb, clu, (sync && IS_DIRSYNC(inode))))
break;
if (sbi->options.discard) {
@@ -409,7 +409,7 @@ int exfat_alloc_cluster(struct inode *inode, unsigned int num_alloc,
}
/* update allocation bitmap */
- if (exfat_set_bitmap(inode, new_clu, sync_bmap)) {
+ if (exfat_set_bitmap(sb, new_clu, sync_bmap)) {
ret = -EIO;
goto free_cluster;
}
diff --git a/fs/exfat/file.c b/fs/exfat/file.c
index adc37b4d7fc2..536c8078f0c1 100644
--- a/fs/exfat/file.c
+++ b/fs/exfat/file.c
@@ -25,6 +25,8 @@ static int exfat_cont_expand(struct inode *inode, loff_t size)
struct exfat_sb_info *sbi = EXFAT_SB(sb);
struct exfat_chain clu;
+ truncate_pagecache(inode, i_size_read(inode));
+
ret = inode_newsize_ok(inode, size);
if (ret)
return ret;
@@ -639,6 +641,9 @@ static ssize_t exfat_file_write_iter(struct kiocb *iocb, struct iov_iter *iter)
inode_lock(inode);
+ if (pos > i_size_read(inode))
+ truncate_pagecache(inode, i_size_read(inode));
+
valid_size = ei->valid_size;
ret = generic_write_checks(iocb, iter);
diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c
index 745dce29ddb5..dfe957493d49 100644
--- a/fs/exfat/namei.c
+++ b/fs/exfat/namei.c
@@ -645,16 +645,6 @@ static int exfat_find(struct inode *dir, const struct qstr *qname,
info->valid_size = le64_to_cpu(ep2->dentry.stream.valid_size);
info->size = le64_to_cpu(ep2->dentry.stream.size);
- if (info->valid_size < 0) {
- exfat_fs_error(sb, "data valid size is invalid(%lld)", info->valid_size);
- return -EIO;
- }
-
- if (unlikely(EXFAT_B_TO_CLU_ROUND_UP(info->size, sbi) > sbi->used_clusters)) {
- exfat_fs_error(sb, "data size is invalid(%lld)", info->size);
- return -EIO;
- }
-
info->start_clu = le32_to_cpu(ep2->dentry.stream.start_clu);
if (!is_valid_cluster(sbi, info->start_clu) && info->size) {
exfat_warn(sb, "start_clu is invalid cluster(0x%x)",
@@ -692,6 +682,16 @@ static int exfat_find(struct inode *dir, const struct qstr *qname,
0);
exfat_put_dentry_set(&es, false);
+ if (info->valid_size < 0) {
+ exfat_fs_error(sb, "data valid size is invalid(%lld)", info->valid_size);
+ return -EIO;
+ }
+
+ if (unlikely(EXFAT_B_TO_CLU_ROUND_UP(info->size, sbi) > sbi->used_clusters)) {
+ exfat_fs_error(sb, "data size is invalid(%lld)", info->size);
+ return -EIO;
+ }
+
if (ei->start_clu == EXFAT_FREE_CLUSTER) {
exfat_fs_error(sb,
"non-zero size file starts with zero cluster (size : %llu, p_dir : %u, entry : 0x%08x)",
diff --git a/fs/exfat/super.c b/fs/exfat/super.c
index 74d451f732c7..10e872a99663 100644
--- a/fs/exfat/super.c
+++ b/fs/exfat/super.c
@@ -629,6 +629,17 @@ static int __exfat_fill_super(struct super_block *sb,
goto free_bh;
}
+ if (!exfat_test_bitmap(sb, sbi->root_dir)) {
+ exfat_warn(sb, "failed to test first cluster bit of root dir(%u)",
+ sbi->root_dir);
+ /*
+ * The first cluster bit of the root directory should never
+ * be unset except when storage is corrupted. This bit is
+ * set to allow operations after mount.
+ */
+ exfat_set_bitmap(sb, sbi->root_dir, false);
+ }
+
ret = exfat_count_used_clusters(sb, &sbi->used_clusters);
if (ret) {
exfat_err(sb, "failed to scan clusters");
@@ -813,10 +824,21 @@ static int exfat_init_fs_context(struct fs_context *fc)
ratelimit_state_init(&sbi->ratelimit, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
- sbi->options.fs_uid = current_uid();
- sbi->options.fs_gid = current_gid();
- sbi->options.fs_fmask = current->fs->umask;
- sbi->options.fs_dmask = current->fs->umask;
+ if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE && fc->root) {
+ struct super_block *sb = fc->root->d_sb;
+ struct exfat_mount_options *cur_opts = &EXFAT_SB(sb)->options;
+
+ sbi->options.fs_uid = cur_opts->fs_uid;
+ sbi->options.fs_gid = cur_opts->fs_gid;
+ sbi->options.fs_fmask = cur_opts->fs_fmask;
+ sbi->options.fs_dmask = cur_opts->fs_dmask;
+ } else {
+ sbi->options.fs_uid = current_uid();
+ sbi->options.fs_gid = current_gid();
+ sbi->options.fs_fmask = current->fs->umask;
+ sbi->options.fs_dmask = current->fs->umask;
+ }
+
sbi->options.allow_utime = -1;
sbi->options.errors = EXFAT_ERRORS_RO;
exfat_set_iocharset(&sbi->options, exfat_default_iocharset);
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index be53e06caf3d..d7e6f563b3e4 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -1947,7 +1947,7 @@ static void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi,
return;
}
- cfolio = filemap_alloc_folio(__GFP_NOWARN | __GFP_IO, 0);
+ cfolio = filemap_alloc_folio(__GFP_NOWARN | __GFP_IO, 0, NULL);
if (!cfolio)
return;
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index bb407705603c..140bd5730d99 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -205,8 +205,7 @@ static const struct file_operations fuse_conn_congestion_threshold_ops = {
static struct dentry *fuse_ctl_add_dentry(struct dentry *parent,
struct fuse_conn *fc,
- const char *name,
- int mode, int nlink,
+ const char *name, int mode,
const struct inode_operations *iop,
const struct file_operations *fop)
{
@@ -232,10 +231,19 @@ static struct dentry *fuse_ctl_add_dentry(struct dentry *parent,
if (iop)
inode->i_op = iop;
inode->i_fop = fop;
- set_nlink(inode, nlink);
+ if (S_ISDIR(mode)) {
+ inc_nlink(d_inode(parent));
+ inc_nlink(inode);
+ }
inode->i_private = fc;
- d_add(dentry, inode);
-
+ d_make_persistent(dentry, inode);
+ dput(dentry);
+
+ /*
+ * We are returning a borrowed reference here - it's only good while
+ * fuse_mutex is held. Actually it's d_make_persistent() return
+ * value...
+ */
return dentry;
}
@@ -252,22 +260,21 @@ int fuse_ctl_add_conn(struct fuse_conn *fc)
return 0;
parent = fuse_control_sb->s_root;
- inc_nlink(d_inode(parent));
sprintf(name, "%u", fc->dev);
- parent = fuse_ctl_add_dentry(parent, fc, name, S_IFDIR | 0500, 2,
+ parent = fuse_ctl_add_dentry(parent, fc, name, S_IFDIR | 0500,
&simple_dir_inode_operations,
&simple_dir_operations);
if (!parent)
goto err;
- if (!fuse_ctl_add_dentry(parent, fc, "waiting", S_IFREG | 0400, 1,
+ if (!fuse_ctl_add_dentry(parent, fc, "waiting", S_IFREG | 0400,
NULL, &fuse_ctl_waiting_ops) ||
- !fuse_ctl_add_dentry(parent, fc, "abort", S_IFREG | 0200, 1,
+ !fuse_ctl_add_dentry(parent, fc, "abort", S_IFREG | 0200,
NULL, &fuse_ctl_abort_ops) ||
!fuse_ctl_add_dentry(parent, fc, "max_background", S_IFREG | 0600,
- 1, NULL, &fuse_conn_max_background_ops) ||
+ NULL, &fuse_conn_max_background_ops) ||
!fuse_ctl_add_dentry(parent, fc, "congestion_threshold",
- S_IFREG | 0600, 1, NULL,
+ S_IFREG | 0600, NULL,
&fuse_conn_congestion_threshold_ops))
goto err;
@@ -289,18 +296,13 @@ static void remove_one(struct dentry *dentry)
*/
void fuse_ctl_remove_conn(struct fuse_conn *fc)
{
- struct dentry *dentry;
char name[32];
if (!fuse_control_sb || fc->no_control)
return;
sprintf(name, "%u", fc->dev);
- dentry = lookup_noperm_positive_unlocked(&QSTR(name), fuse_control_sb->s_root);
- if (!IS_ERR(dentry)) {
- simple_recursive_removal(dentry, remove_one);
- dput(dentry); // paired with lookup_noperm_positive_unlocked()
- }
+ simple_remove_by_name(fuse_control_sb->s_root, name, remove_one);
}
static int fuse_ctl_fill_super(struct super_block *sb, struct fs_context *fsc)
@@ -350,7 +352,7 @@ static void fuse_ctl_kill_sb(struct super_block *sb)
fuse_control_sb = NULL;
mutex_unlock(&fuse_mutex);
- kill_litter_super(sb);
+ kill_anon_super(sb);
}
static struct file_system_type fuse_ctl_fs_type = {
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 132f38619d70..6d59cbc877c6 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -846,7 +846,7 @@ void fuse_copy_init(struct fuse_copy_state *cs, bool write,
}
/* Unmap and put previous page of userspace buffer */
-static void fuse_copy_finish(struct fuse_copy_state *cs)
+void fuse_copy_finish(struct fuse_copy_state *cs)
{
if (cs->currbuf) {
struct pipe_buffer *buf = cs->currbuf;
@@ -2041,13 +2041,14 @@ static int fuse_notify_resend(struct fuse_conn *fc)
/*
* Increments the fuse connection epoch. This will result of dentries from
- * previous epochs to be invalidated.
- *
- * XXX optimization: add call to shrink_dcache_sb()?
+ * previous epochs to be invalidated. Additionally, if inval_wq is set, a work
+ * queue is scheduled to trigger the invalidation.
*/
static int fuse_notify_inc_epoch(struct fuse_conn *fc)
{
atomic_inc(&fc->epoch);
+ if (inval_wq)
+ schedule_work(&fc->epoch_work);
return 0;
}
diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c
index f8c93dc45768..5ceb217ced1b 100644
--- a/fs/fuse/dev_uring.c
+++ b/fs/fuse/dev_uring.c
@@ -86,6 +86,7 @@ static void fuse_uring_req_end(struct fuse_ring_ent *ent, struct fuse_req *req,
lockdep_assert_not_held(&queue->lock);
spin_lock(&queue->lock);
ent->fuse_req = NULL;
+ list_del_init(&req->list);
if (test_bit(FR_BACKGROUND, &req->flags)) {
queue->active_background--;
spin_lock(&fc->bg_lock);
@@ -598,12 +599,14 @@ static int fuse_uring_copy_from_ring(struct fuse_ring *ring,
cs.is_uring = true;
cs.req = req;
- return fuse_copy_out_args(&cs, args, ring_in_out.payload_sz);
+ err = fuse_copy_out_args(&cs, args, ring_in_out.payload_sz);
+ fuse_copy_finish(&cs);
+ return err;
}
- /*
- * Copy data from the req to the ring buffer
- */
+/*
+ * Copy data from the req to the ring buffer
+ */
static int fuse_uring_args_to_ring(struct fuse_ring *ring, struct fuse_req *req,
struct fuse_ring_ent *ent)
{
@@ -649,6 +652,7 @@ static int fuse_uring_args_to_ring(struct fuse_ring *ring, struct fuse_req *req,
/* copy the payload */
err = fuse_copy_args(&cs, num_args, args->in_pages,
(struct fuse_arg *)in_args, 0);
+ fuse_copy_finish(&cs);
if (err) {
pr_info_ratelimited("%s fuse_copy_args failed\n", __func__);
return err;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 87a63ae93a45..4b6b3d2758ff 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -27,6 +27,67 @@ module_param(allow_sys_admin_access, bool, 0644);
MODULE_PARM_DESC(allow_sys_admin_access,
"Allow users with CAP_SYS_ADMIN in initial userns to bypass allow_other access check");
+struct dentry_bucket {
+ struct rb_root tree;
+ spinlock_t lock;
+};
+
+#define HASH_BITS 5
+#define HASH_SIZE (1 << HASH_BITS)
+static struct dentry_bucket dentry_hash[HASH_SIZE];
+struct delayed_work dentry_tree_work;
+
+/* Minimum invalidation work queue frequency */
+#define FUSE_DENTRY_INVAL_FREQ_MIN 5
+
+unsigned __read_mostly inval_wq;
+static int inval_wq_set(const char *val, const struct kernel_param *kp)
+{
+ unsigned int num;
+ unsigned int old = inval_wq;
+ int ret;
+
+ if (!val)
+ return -EINVAL;
+
+ ret = kstrtouint(val, 0, &num);
+ if (ret)
+ return ret;
+
+ if ((num < FUSE_DENTRY_INVAL_FREQ_MIN) && (num != 0))
+ return -EINVAL;
+
+ /* This should prevent overflow in secs_to_jiffies() */
+ if (num > USHRT_MAX)
+ return -EINVAL;
+
+ *((unsigned int *)kp->arg) = num;
+
+ if (num && !old)
+ schedule_delayed_work(&dentry_tree_work,
+ secs_to_jiffies(num));
+ else if (!num && old)
+ cancel_delayed_work_sync(&dentry_tree_work);
+
+ return 0;
+}
+static const struct kernel_param_ops inval_wq_ops = {
+ .set = inval_wq_set,
+ .get = param_get_uint,
+};
+module_param_cb(inval_wq, &inval_wq_ops, &inval_wq, 0644);
+__MODULE_PARM_TYPE(inval_wq, "uint");
+MODULE_PARM_DESC(inval_wq,
+ "Dentries invalidation work queue period in secs (>= "
+ __stringify(FUSE_DENTRY_INVAL_FREQ_MIN) ").");
+
+static inline struct dentry_bucket *get_dentry_bucket(struct dentry *dentry)
+{
+ int i = hash_ptr(dentry, HASH_BITS);
+
+ return &dentry_hash[i];
+}
+
static void fuse_advise_use_readdirplus(struct inode *dir)
{
struct fuse_inode *fi = get_fuse_inode(dir);
@@ -34,33 +95,151 @@ static void fuse_advise_use_readdirplus(struct inode *dir)
set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
}
-#if BITS_PER_LONG >= 64
-static inline void __fuse_dentry_settime(struct dentry *entry, u64 time)
+struct fuse_dentry {
+ u64 time;
+ union {
+ struct rcu_head rcu;
+ struct rb_node node;
+ };
+ struct dentry *dentry;
+};
+
+static void __fuse_dentry_tree_del_node(struct fuse_dentry *fd,
+ struct dentry_bucket *bucket)
{
- entry->d_fsdata = (void *) time;
+ if (!RB_EMPTY_NODE(&fd->node)) {
+ rb_erase(&fd->node, &bucket->tree);
+ RB_CLEAR_NODE(&fd->node);
+ }
}
-static inline u64 fuse_dentry_time(const struct dentry *entry)
+static void fuse_dentry_tree_del_node(struct dentry *dentry)
{
- return (u64)entry->d_fsdata;
+ struct fuse_dentry *fd = dentry->d_fsdata;
+ struct dentry_bucket *bucket = get_dentry_bucket(dentry);
+
+ spin_lock(&bucket->lock);
+ __fuse_dentry_tree_del_node(fd, bucket);
+ spin_unlock(&bucket->lock);
}
-#else
-union fuse_dentry {
- u64 time;
- struct rcu_head rcu;
-};
+static void fuse_dentry_tree_add_node(struct dentry *dentry)
+{
+ struct fuse_dentry *fd = dentry->d_fsdata;
+ struct dentry_bucket *bucket;
+ struct fuse_dentry *cur;
+ struct rb_node **p, *parent = NULL;
+
+ if (!inval_wq)
+ return;
+
+ bucket = get_dentry_bucket(dentry);
+
+ spin_lock(&bucket->lock);
+
+ __fuse_dentry_tree_del_node(fd, bucket);
+
+ p = &bucket->tree.rb_node;
+ while (*p) {
+ parent = *p;
+ cur = rb_entry(*p, struct fuse_dentry, node);
+ if (fd->time < cur->time)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+ rb_link_node(&fd->node, parent, p);
+ rb_insert_color(&fd->node, &bucket->tree);
+ spin_unlock(&bucket->lock);
+}
+
+/*
+ * work queue which, when enabled, will periodically check for expired dentries
+ * in the dentries tree.
+ */
+static void fuse_dentry_tree_work(struct work_struct *work)
+{
+ LIST_HEAD(dispose);
+ struct fuse_dentry *fd;
+ struct rb_node *node;
+ int i;
+
+ for (i = 0; i < HASH_SIZE; i++) {
+ spin_lock(&dentry_hash[i].lock);
+ node = rb_first(&dentry_hash[i].tree);
+ while (node) {
+ fd = rb_entry(node, struct fuse_dentry, node);
+ if (time_after64(get_jiffies_64(), fd->time)) {
+ rb_erase(&fd->node, &dentry_hash[i].tree);
+ RB_CLEAR_NODE(&fd->node);
+ spin_unlock(&dentry_hash[i].lock);
+ d_dispose_if_unused(fd->dentry, &dispose);
+ cond_resched();
+ spin_lock(&dentry_hash[i].lock);
+ } else
+ break;
+ node = rb_first(&dentry_hash[i].tree);
+ }
+ spin_unlock(&dentry_hash[i].lock);
+ shrink_dentry_list(&dispose);
+ }
+
+ if (inval_wq)
+ schedule_delayed_work(&dentry_tree_work,
+ secs_to_jiffies(inval_wq));
+}
+
+void fuse_epoch_work(struct work_struct *work)
+{
+ struct fuse_conn *fc = container_of(work, struct fuse_conn,
+ epoch_work);
+ struct fuse_mount *fm;
+ struct inode *inode;
+
+ down_read(&fc->killsb);
+
+ inode = fuse_ilookup(fc, FUSE_ROOT_ID, &fm);
+ if (inode) {
+ iput(inode);
+ /* Remove all possible active references to cached inodes */
+ shrink_dcache_sb(fm->sb);
+ } else
+ pr_warn("Failed to get root inode");
+
+ up_read(&fc->killsb);
+}
+
+void fuse_dentry_tree_init(void)
+{
+ int i;
+
+ for (i = 0; i < HASH_SIZE; i++) {
+ spin_lock_init(&dentry_hash[i].lock);
+ dentry_hash[i].tree = RB_ROOT;
+ }
+ INIT_DELAYED_WORK(&dentry_tree_work, fuse_dentry_tree_work);
+}
+
+void fuse_dentry_tree_cleanup(void)
+{
+ int i;
+
+ inval_wq = 0;
+ cancel_delayed_work_sync(&dentry_tree_work);
+
+ for (i = 0; i < HASH_SIZE; i++)
+ WARN_ON_ONCE(!RB_EMPTY_ROOT(&dentry_hash[i].tree));
+}
static inline void __fuse_dentry_settime(struct dentry *dentry, u64 time)
{
- ((union fuse_dentry *) dentry->d_fsdata)->time = time;
+ ((struct fuse_dentry *) dentry->d_fsdata)->time = time;
}
static inline u64 fuse_dentry_time(const struct dentry *entry)
{
- return ((union fuse_dentry *) entry->d_fsdata)->time;
+ return ((struct fuse_dentry *) entry->d_fsdata)->time;
}
-#endif
static void fuse_dentry_settime(struct dentry *dentry, u64 time)
{
@@ -81,6 +260,7 @@ static void fuse_dentry_settime(struct dentry *dentry, u64 time)
}
__fuse_dentry_settime(dentry, time);
+ fuse_dentry_tree_add_node(dentry);
}
/*
@@ -283,21 +463,36 @@ invalid:
goto out;
}
-#if BITS_PER_LONG < 64
static int fuse_dentry_init(struct dentry *dentry)
{
- dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry),
- GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE);
+ struct fuse_dentry *fd;
- return dentry->d_fsdata ? 0 : -ENOMEM;
+ fd = kzalloc(sizeof(struct fuse_dentry),
+ GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE);
+ if (!fd)
+ return -ENOMEM;
+
+ fd->dentry = dentry;
+ RB_CLEAR_NODE(&fd->node);
+ dentry->d_fsdata = fd;
+
+ return 0;
}
+
+static void fuse_dentry_prune(struct dentry *dentry)
+{
+ struct fuse_dentry *fd = dentry->d_fsdata;
+
+ if (!RB_EMPTY_NODE(&fd->node))
+ fuse_dentry_tree_del_node(dentry);
+}
+
static void fuse_dentry_release(struct dentry *dentry)
{
- union fuse_dentry *fd = dentry->d_fsdata;
+ struct fuse_dentry *fd = dentry->d_fsdata;
kfree_rcu(fd, rcu);
}
-#endif
static int fuse_dentry_delete(const struct dentry *dentry)
{
@@ -331,10 +526,9 @@ static struct vfsmount *fuse_dentry_automount(struct path *path)
const struct dentry_operations fuse_dentry_operations = {
.d_revalidate = fuse_dentry_revalidate,
.d_delete = fuse_dentry_delete,
-#if BITS_PER_LONG < 64
.d_init = fuse_dentry_init,
+ .d_prune = fuse_dentry_prune,
.d_release = fuse_dentry_release,
-#endif
.d_automount = fuse_dentry_automount,
};
@@ -471,7 +665,7 @@ static int get_security_context(struct dentry *entry, umode_t mode,
u32 total_len = sizeof(*header);
int err, nr_ctx = 0;
const char *name = NULL;
- size_t namelen;
+ size_t namesize;
err = security_dentry_init_security(entry, mode, &entry->d_name,
&name, &lsmctx);
@@ -482,12 +676,12 @@ static int get_security_context(struct dentry *entry, umode_t mode,
if (lsmctx.len) {
nr_ctx = 1;
- namelen = strlen(name) + 1;
+ namesize = strlen(name) + 1;
err = -EIO;
- if (WARN_ON(namelen > XATTR_NAME_MAX + 1 ||
+ if (WARN_ON(namesize > XATTR_NAME_MAX + 1 ||
lsmctx.len > S32_MAX))
goto out_err;
- total_len += FUSE_REC_ALIGN(sizeof(*fctx) + namelen +
+ total_len += FUSE_REC_ALIGN(sizeof(*fctx) + namesize +
lsmctx.len);
}
@@ -504,8 +698,8 @@ static int get_security_context(struct dentry *entry, umode_t mode,
fctx->size = lsmctx.len;
ptr += sizeof(*fctx);
- strcpy(ptr, name);
- ptr += namelen;
+ strscpy(ptr, name, namesize);
+ ptr += namesize;
memcpy(ptr, lsmctx.context, lsmctx.len);
}
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 7bcb650a9f26..01bc894e9c2b 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -110,7 +110,9 @@ static void fuse_file_put(struct fuse_file *ff, bool sync)
fuse_file_io_release(ff, ra->inode);
if (!args) {
- /* Do nothing when server does not implement 'open' */
+ /* Do nothing when server does not implement 'opendir' */
+ } else if (args->opcode == FUSE_RELEASE && ff->fm->fc->no_open) {
+ fuse_release_end(ff->fm, args, 0);
} else if (sync) {
fuse_simple_request(ff->fm, args);
fuse_release_end(ff->fm, args, 0);
@@ -131,8 +133,17 @@ struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid,
struct fuse_file *ff;
int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;
bool open = isdir ? !fc->no_opendir : !fc->no_open;
+ bool release = !isdir || open;
- ff = fuse_file_alloc(fm, open);
+ /*
+ * ff->args->release_args still needs to be allocated (so we can hold an
+ * inode reference while there are pending inflight file operations when
+ * ->release() is called, see fuse_prepare_release()) even if
+ * fc->no_open is set else it becomes possible for reclaim to deadlock
+ * if while servicing the readahead request the server triggers reclaim
+ * and reclaim evicts the inode of the file being read ahead.
+ */
+ ff = fuse_file_alloc(fm, release);
if (!ff)
return ERR_PTR(-ENOMEM);
@@ -152,13 +163,14 @@ struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid,
fuse_file_free(ff);
return ERR_PTR(err);
} else {
- /* No release needed */
- kfree(ff->args);
- ff->args = NULL;
- if (isdir)
+ if (isdir) {
+ /* No release needed */
+ kfree(ff->args);
+ ff->args = NULL;
fc->no_opendir = 1;
- else
+ } else {
fc->no_open = 1;
+ }
}
}
@@ -1652,7 +1664,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
if (!ia)
return -ENOMEM;
- if (fopen_direct_io && fc->direct_io_allow_mmap) {
+ if (fopen_direct_io) {
res = filemap_write_and_wait_range(mapping, pos, pos + count - 1);
if (res) {
fuse_io_free(ia);
@@ -1726,6 +1738,15 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
if (res > 0)
*ppos = pos;
+ if (res > 0 && write && fopen_direct_io) {
+ /*
+ * As in generic_file_direct_write(), invalidate after the
+ * write, to invalidate read-ahead cache that may have competed
+ * with the write.
+ */
+ invalidate_inode_pages2_range(mapping, idx_from, idx_to);
+ }
+
return res > 0 ? res : err;
}
EXPORT_SYMBOL_GPL(fuse_direct_io);
diff --git a/fs/fuse/fuse_dev_i.h b/fs/fuse/fuse_dev_i.h
index 6e8373f97040..134bf44aff0d 100644
--- a/fs/fuse/fuse_dev_i.h
+++ b/fs/fuse/fuse_dev_i.h
@@ -62,6 +62,7 @@ void fuse_dev_end_requests(struct list_head *head);
void fuse_copy_init(struct fuse_copy_state *cs, bool write,
struct iov_iter *iter);
+void fuse_copy_finish(struct fuse_copy_state *cs);
int fuse_copy_args(struct fuse_copy_state *cs, unsigned int numargs,
unsigned int argpages, struct fuse_arg *args,
int zeroing);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index f616c1991fed..7f16049387d1 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -54,6 +54,13 @@
/** Frequency (in jiffies) of request timeout checks, if opted into */
extern const unsigned long fuse_timeout_timer_freq;
+/*
+ * Dentries invalidation workqueue period, in seconds. The value of this
+ * parameter shall be >= FUSE_DENTRY_INVAL_FREQ_MIN seconds, or 0 (zero), in
+ * which case no workqueue will be created.
+ */
+extern unsigned inval_wq __read_mostly;
+
/** Maximum of max_pages received in init_out */
extern unsigned int fuse_max_pages_limit;
/*
@@ -232,6 +239,11 @@ enum {
FUSE_I_BTIME,
/* Wants or already has page cache IO */
FUSE_I_CACHE_IO_MODE,
+ /*
+ * Client has exclusive access to the inode, either because fs is local
+ * or the fuse server has an exclusive "lease" on distributed fs
+ */
+ FUSE_I_EXCLUSIVE,
};
struct fuse_conn;
@@ -642,6 +654,8 @@ struct fuse_conn {
/** Current epoch for up-to-date dentries */
atomic_t epoch;
+ struct work_struct epoch_work;
+
struct rcu_head rcu;
/** The user id for this mount */
@@ -1038,7 +1052,7 @@ static inline struct fuse_conn *get_fuse_conn(struct inode *inode)
return get_fuse_mount_super(inode->i_sb)->fc;
}
-static inline struct fuse_inode *get_fuse_inode(struct inode *inode)
+static inline struct fuse_inode *get_fuse_inode(const struct inode *inode)
{
return container_of(inode, struct fuse_inode, inode);
}
@@ -1080,6 +1094,13 @@ static inline bool fuse_is_bad(struct inode *inode)
return unlikely(test_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state));
}
+static inline bool fuse_inode_is_exclusive(const struct inode *inode)
+{
+ const struct fuse_inode *fi = get_fuse_inode(inode);
+
+ return test_bit(FUSE_I_EXCLUSIVE, &fi->state);
+}
+
static inline struct folio **fuse_folios_alloc(unsigned int nfolios, gfp_t flags,
struct fuse_folio_desc **desc)
{
@@ -1269,6 +1290,11 @@ void fuse_wait_aborted(struct fuse_conn *fc);
/* Check if any requests timed out */
void fuse_check_timeout(struct work_struct *work);
+void fuse_dentry_tree_init(void);
+void fuse_dentry_tree_cleanup(void);
+
+void fuse_epoch_work(struct work_struct *work);
+
/**
* Invalidate inode attributes
*/
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 1a397be53f49..819e50d66622 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -977,6 +977,7 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm,
refcount_set(&fc->count, 1);
atomic_set(&fc->dev_count, 1);
atomic_set(&fc->epoch, 1);
+ INIT_WORK(&fc->epoch_work, fuse_epoch_work);
init_waitqueue_head(&fc->blocked_waitq);
fuse_iqueue_init(&fc->iq, fiq_ops, fiq_priv);
INIT_LIST_HEAD(&fc->bg_queue);
@@ -1021,26 +1022,28 @@ static void delayed_release(struct rcu_head *p)
void fuse_conn_put(struct fuse_conn *fc)
{
- if (refcount_dec_and_test(&fc->count)) {
- struct fuse_iqueue *fiq = &fc->iq;
- struct fuse_sync_bucket *bucket;
-
- if (IS_ENABLED(CONFIG_FUSE_DAX))
- fuse_dax_conn_free(fc);
- if (fc->timeout.req_timeout)
- cancel_delayed_work_sync(&fc->timeout.work);
- if (fiq->ops->release)
- fiq->ops->release(fiq);
- put_pid_ns(fc->pid_ns);
- bucket = rcu_dereference_protected(fc->curr_bucket, 1);
- if (bucket) {
- WARN_ON(atomic_read(&bucket->count) != 1);
- kfree(bucket);
- }
- if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
- fuse_backing_files_free(fc);
- call_rcu(&fc->rcu, delayed_release);
+ struct fuse_iqueue *fiq = &fc->iq;
+ struct fuse_sync_bucket *bucket;
+
+ if (!refcount_dec_and_test(&fc->count))
+ return;
+
+ if (IS_ENABLED(CONFIG_FUSE_DAX))
+ fuse_dax_conn_free(fc);
+ if (fc->timeout.req_timeout)
+ cancel_delayed_work_sync(&fc->timeout.work);
+ cancel_work_sync(&fc->epoch_work);
+ if (fiq->ops->release)
+ fiq->ops->release(fiq);
+ put_pid_ns(fc->pid_ns);
+ bucket = rcu_dereference_protected(fc->curr_bucket, 1);
+ if (bucket) {
+ WARN_ON(atomic_read(&bucket->count) != 1);
+ kfree(bucket);
}
+ if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
+ fuse_backing_files_free(fc);
+ call_rcu(&fc->rcu, delayed_release);
}
EXPORT_SYMBOL_GPL(fuse_conn_put);
@@ -2283,6 +2286,8 @@ static int __init fuse_init(void)
if (res)
goto err_sysfs_cleanup;
+ fuse_dentry_tree_init();
+
sanitize_global_limit(&max_user_bgreq);
sanitize_global_limit(&max_user_congthresh);
@@ -2302,6 +2307,7 @@ static void __exit fuse_exit(void)
{
pr_debug("exit\n");
+ fuse_dentry_tree_cleanup();
fuse_ctl_cleanup();
fuse_sysfs_cleanup();
fuse_fs_cleanup();
diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h
index 15b2f094d36e..aa02599b770f 100644
--- a/fs/hostfs/hostfs.h
+++ b/fs/hostfs/hostfs.h
@@ -3,40 +3,8 @@
#define __UM_FS_HOSTFS
#include <os.h>
+#include <generated/asm-offsets.h>
-/*
- * These are exactly the same definitions as in fs.h, but the names are
- * changed so that this file can be included in both kernel and user files.
- */
-
-#define HOSTFS_ATTR_MODE 1
-#define HOSTFS_ATTR_UID 2
-#define HOSTFS_ATTR_GID 4
-#define HOSTFS_ATTR_SIZE 8
-#define HOSTFS_ATTR_ATIME 16
-#define HOSTFS_ATTR_MTIME 32
-#define HOSTFS_ATTR_CTIME 64
-#define HOSTFS_ATTR_ATIME_SET 128
-#define HOSTFS_ATTR_MTIME_SET 256
-
-/* This one is unused by hostfs. */
-#define HOSTFS_ATTR_FORCE 512 /* Not a change, but a change it */
-#define HOSTFS_ATTR_ATTR_FLAG 1024
-
-/*
- * If you are very careful, you'll notice that these two are missing:
- *
- * #define ATTR_KILL_SUID 2048
- * #define ATTR_KILL_SGID 4096
- *
- * and this is because they were added in 2.5 development.
- * Actually, they are not needed by most ->setattr() methods - they are set by
- * callers of notify_change() to notify that the setuid/setgid bits must be
- * dropped.
- * notify_change() will delete those flags, make sure attr->ia_valid & ATTR_MODE
- * is on, and remove the appropriate bits from attr->ia_mode (attr is a
- * "struct iattr *"). -BlaisorBlade
- */
struct hostfs_timespec {
long long tv_sec;
long long tv_nsec;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index f42548ee9083..3b4c152c5c73 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -96,8 +96,15 @@ static const struct fs_parameter_spec hugetlb_fs_parameters[] = {
#define PGOFF_LOFFT_MAX \
(((1UL << (PAGE_SHIFT + 1)) - 1) << (BITS_PER_LONG - (PAGE_SHIFT + 1)))
-static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
+static int hugetlb_file_mmap_prepare_success(const struct vm_area_struct *vma)
{
+ /* Unfortunate we have to reassign vma->vm_private_data. */
+ return hugetlb_vma_lock_alloc((struct vm_area_struct *)vma);
+}
+
+static int hugetlbfs_file_mmap_prepare(struct vm_area_desc *desc)
+{
+ struct file *file = desc->file;
struct inode *inode = file_inode(file);
loff_t len, vma_len;
int ret;
@@ -112,8 +119,8 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
* way when do_mmap unwinds (may be important on powerpc
* and ia64).
*/
- vm_flags_set(vma, VM_HUGETLB | VM_DONTEXPAND);
- vma->vm_ops = &hugetlb_vm_ops;
+ desc->vm_flags |= VM_HUGETLB | VM_DONTEXPAND;
+ desc->vm_ops = &hugetlb_vm_ops;
/*
* page based offset in vm_pgoff could be sufficiently large to
@@ -122,16 +129,16 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
* sizeof(unsigned long). So, only check in those instances.
*/
if (sizeof(unsigned long) == sizeof(loff_t)) {
- if (vma->vm_pgoff & PGOFF_LOFFT_MAX)
+ if (desc->pgoff & PGOFF_LOFFT_MAX)
return -EINVAL;
}
/* must be huge page aligned */
- if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT))
+ if (desc->pgoff & (~huge_page_mask(h) >> PAGE_SHIFT))
return -EINVAL;
- vma_len = (loff_t)(vma->vm_end - vma->vm_start);
- len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
+ vma_len = (loff_t)vma_desc_size(desc);
+ len = vma_len + ((loff_t)desc->pgoff << PAGE_SHIFT);
/* check for overflow */
if (len < vma_len)
return -EINVAL;
@@ -141,7 +148,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
ret = -ENOMEM;
- vm_flags = vma->vm_flags;
+ vm_flags = desc->vm_flags;
/*
* for SHM_HUGETLB, the pages are reserved in the shmget() call so skip
* reserving here. Note: only for SHM hugetlbfs file, the inode
@@ -151,17 +158,30 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
vm_flags |= VM_NORESERVE;
if (hugetlb_reserve_pages(inode,
- vma->vm_pgoff >> huge_page_order(h),
- len >> huge_page_shift(h), vma,
- vm_flags) < 0)
+ desc->pgoff >> huge_page_order(h),
+ len >> huge_page_shift(h), desc,
+ vm_flags) < 0)
goto out;
ret = 0;
- if (vma->vm_flags & VM_WRITE && inode->i_size < len)
+ if ((desc->vm_flags & VM_WRITE) && inode->i_size < len)
i_size_write(inode, len);
out:
inode_unlock(inode);
+ if (!ret) {
+ /* Allocate the VMA lock after we set it up. */
+ desc->action.success_hook = hugetlb_file_mmap_prepare_success;
+ /*
+ * We cannot permit the rmap finding this VMA in the time
+ * between the VMA being inserted into the VMA tree and the
+ * completion/success hook being invoked.
+ *
+ * This is because we establish a per-VMA hugetlb lock which can
+ * be raced by rmap.
+ */
+ desc->action.hide_from_rmap_until_complete = true;
+ }
return ret;
}
@@ -184,8 +204,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
if (addr)
addr0 = ALIGN(addr, huge_page_size(h));
- return mm_get_unmapped_area_vmflags(current->mm, file, addr0, len, pgoff,
- flags, 0);
+ return mm_get_unmapped_area_vmflags(file, addr0, len, pgoff, flags, 0);
}
/*
@@ -975,8 +994,7 @@ static int hugetlbfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
if (!inode)
return -ENOSPC;
inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
- d_instantiate(dentry, inode);
- dget(dentry);/* Extra count - pin the dentry in core */
+ d_make_persistent(dentry, inode);
return 0;
}
@@ -1023,10 +1041,9 @@ static int hugetlbfs_symlink(struct mnt_idmap *idmap,
if (inode) {
int l = strlen(symname)+1;
error = page_symlink(inode, symname, l);
- if (!error) {
- d_instantiate(dentry, inode);
- dget(dentry);
- } else
+ if (!error)
+ d_make_persistent(dentry, inode);
+ else
iput(inode);
}
inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
@@ -1221,7 +1238,7 @@ static void init_once(void *foo)
static const struct file_operations hugetlbfs_file_operations = {
.read_iter = hugetlbfs_read_iter,
- .mmap = hugetlbfs_file_mmap,
+ .mmap_prepare = hugetlbfs_file_mmap_prepare,
.fsync = noop_fsync,
.get_unmapped_area = hugetlb_get_unmapped_area,
.llseek = default_llseek,
@@ -1483,7 +1500,7 @@ static struct file_system_type hugetlbfs_fs_type = {
.name = "hugetlbfs",
.init_fs_context = hugetlbfs_init_fs_context,
.parameters = hugetlb_fs_parameters,
- .kill_sb = kill_litter_super,
+ .kill_sb = kill_anon_super,
.fs_flags = FS_ALLOW_IDMAP,
};
diff --git a/fs/inode.c b/fs/inode.c
index cc8265cfe80e..521383223d8a 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1968,7 +1968,7 @@ void iput(struct inode *inode)
retry:
lockdep_assert_not_held(&inode->i_lock);
- VFS_BUG_ON_INODE(inode_state_read_once(inode) & I_CLEAR, inode);
+ VFS_BUG_ON_INODE(inode_state_read_once(inode) & (I_FREEING | I_CLEAR), inode);
/*
* Note this assert is technically racy as if the count is bogusly
* equal to one, then two CPUs racing to further drop it can both
@@ -2010,6 +2010,7 @@ EXPORT_SYMBOL(iput);
*/
void iput_not_last(struct inode *inode)
{
+ VFS_BUG_ON_INODE(inode_state_read_once(inode) & (I_FREEING | I_CLEAR), inode);
VFS_BUG_ON_INODE(atomic_read(&inode->i_count) < 2, inode);
WARN_ON(atomic_sub_return(1, &inode->i_count) == 0);
diff --git a/fs/internal.h b/fs/internal.h
index d08d5e2235e9..ab638d41ab81 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -230,7 +230,6 @@ extern void shrink_dcache_for_umount(struct super_block *);
extern struct dentry *__d_lookup(const struct dentry *, const struct qstr *);
extern struct dentry *__d_lookup_rcu(const struct dentry *parent,
const struct qstr *name, unsigned *seq);
-extern void d_genocide(struct dentry *);
/*
* pipe.c
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index a670ba3e565e..5c0efd6b239f 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -675,11 +675,14 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
if (parent) {
ret = security_kernfs_init_security(parent, kn);
if (ret)
- goto err_out3;
+ goto err_out4;
}
return kn;
+ err_out4:
+ simple_xattrs_free(&kn->iattr->xattrs, NULL);
+ kmem_cache_free(kernfs_iattrs_cache, kn->iattr);
err_out3:
spin_lock(&root->kernfs_idr_lock);
idr_remove(&root->ino_idr, (u32)kernfs_ino(kn));
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index 76eaf64b9d9e..3ac52e141766 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -298,6 +298,7 @@ static int kernfs_fill_super(struct super_block *sb, struct kernfs_fs_context *k
if (info->root->flags & KERNFS_ROOT_SUPPORT_EXPORTOP)
sb->s_export_op = &kernfs_export_ops;
sb->s_time_gran = 1;
+ sb->s_maxbytes = MAX_LFS_FILESIZE;
/* sysfs dentries and inodes don't require IO to create */
sb->s_shrink->seeks = 0;
diff --git a/fs/libfs.c b/fs/libfs.c
index 2d6657947abd..9264523be85c 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -630,7 +630,7 @@ static void __simple_recursive_removal(struct dentry *dentry,
if (callback)
callback(victim);
fsnotify_delete(inode, d_inode(victim), victim);
- dput(victim); // unpin it
+ d_make_discardable(victim);
}
if (victim == dentry) {
inode_set_mtime_to_ts(inode,
@@ -655,6 +655,19 @@ void simple_recursive_removal(struct dentry *dentry,
}
EXPORT_SYMBOL(simple_recursive_removal);
+void simple_remove_by_name(struct dentry *parent, const char *name,
+ void (*callback)(struct dentry *))
+{
+ struct dentry *dentry;
+
+ dentry = lookup_noperm_positive_unlocked(&QSTR(name), parent);
+ if (!IS_ERR(dentry)) {
+ simple_recursive_removal(dentry, callback);
+ dput(dentry); // paired with lookup_noperm_positive_unlocked()
+ }
+}
+EXPORT_SYMBOL(simple_remove_by_name);
+
/* caller holds parent directory with I_MUTEX_PARENT */
void locked_recursive_removal(struct dentry *dentry,
void (*callback)(struct dentry *))
@@ -752,8 +765,7 @@ int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *den
inode_set_ctime_to_ts(dir, inode_set_ctime_current(inode)));
inc_nlink(inode);
ihold(inode);
- dget(dentry);
- d_instantiate(dentry, inode);
+ d_make_persistent(dentry, inode);
return 0;
}
EXPORT_SYMBOL(simple_link);
@@ -779,14 +791,28 @@ out:
}
EXPORT_SYMBOL(simple_empty);
-int simple_unlink(struct inode *dir, struct dentry *dentry)
+void __simple_unlink(struct inode *dir, struct dentry *dentry)
{
struct inode *inode = d_inode(dentry);
inode_set_mtime_to_ts(dir,
inode_set_ctime_to_ts(dir, inode_set_ctime_current(inode)));
drop_nlink(inode);
- dput(dentry);
+}
+EXPORT_SYMBOL(__simple_unlink);
+
+void __simple_rmdir(struct inode *dir, struct dentry *dentry)
+{
+ drop_nlink(d_inode(dentry));
+ __simple_unlink(dir, dentry);
+ drop_nlink(dir);
+}
+EXPORT_SYMBOL(__simple_rmdir);
+
+int simple_unlink(struct inode *dir, struct dentry *dentry)
+{
+ __simple_unlink(dir, dentry);
+ d_make_discardable(dentry);
return 0;
}
EXPORT_SYMBOL(simple_unlink);
@@ -796,9 +822,8 @@ int simple_rmdir(struct inode *dir, struct dentry *dentry)
if (!simple_empty(dentry))
return -ENOTEMPTY;
- drop_nlink(d_inode(dentry));
- simple_unlink(dir, dentry);
- drop_nlink(dir);
+ __simple_rmdir(dir, dentry);
+ d_make_discardable(dentry);
return 0;
}
EXPORT_SYMBOL(simple_rmdir);
@@ -1066,7 +1091,8 @@ int simple_fill_super(struct super_block *s, unsigned long magic,
simple_inode_init_ts(inode);
inode->i_fop = files->ops;
inode->i_ino = i;
- d_add(dentry, inode);
+ d_make_persistent(dentry, inode);
+ dput(dentry);
}
return 0;
}
@@ -2312,3 +2338,11 @@ struct dentry *simple_start_creating(struct dentry *parent, const char *name)
return start_dirop(parent, &qname, LOOKUP_CREATE | LOOKUP_EXCL);
}
EXPORT_SYMBOL(simple_start_creating);
+
+/* parent must have been held exclusive since simple_start_creating() */
+void simple_done_creating(struct dentry *child)
+{
+ inode_unlock(child->d_parent->d_inode);
+ dput(child);
+}
+EXPORT_SYMBOL(simple_done_creating);
diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c
index 49ed90c6b9f2..f33bfa7b58e6 100644
--- a/fs/nfs/localio.c
+++ b/fs/nfs/localio.c
@@ -615,8 +615,11 @@ static void nfs_local_read_aio_complete(struct kiocb *kiocb, long ret)
nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_read_aio_complete_work */
}
-static void do_nfs_local_call_read(struct nfs_local_kiocb *iocb, struct file *filp)
+static void nfs_local_call_read(struct work_struct *work)
{
+ struct nfs_local_kiocb *iocb =
+ container_of(work, struct nfs_local_kiocb, work);
+ struct file *filp = iocb->kiocb.ki_filp;
bool force_done = false;
ssize_t status;
int n_iters;
@@ -633,7 +636,9 @@ static void do_nfs_local_call_read(struct nfs_local_kiocb *iocb, struct file *fi
} else
iocb->kiocb.ki_flags &= ~IOCB_DIRECT;
- status = filp->f_op->read_iter(&iocb->kiocb, &iocb->iters[i]);
+ scoped_with_creds(filp->f_cred)
+ status = filp->f_op->read_iter(&iocb->kiocb, &iocb->iters[i]);
+
if (status != -EIOCBQUEUED) {
if (unlikely(status >= 0 && status < iocb->iters[i].count))
force_done = true; /* Partial read */
@@ -645,16 +650,6 @@ static void do_nfs_local_call_read(struct nfs_local_kiocb *iocb, struct file *fi
}
}
-static void nfs_local_call_read(struct work_struct *work)
-{
- struct nfs_local_kiocb *iocb =
- container_of(work, struct nfs_local_kiocb, work);
- struct file *filp = iocb->kiocb.ki_filp;
-
- scoped_with_creds(filp->f_cred)
- do_nfs_local_call_read(iocb, filp);
-}
-
static int
nfs_local_do_read(struct nfs_local_kiocb *iocb,
const struct rpc_call_ops *call_ops)
@@ -822,13 +817,18 @@ static void nfs_local_write_aio_complete(struct kiocb *kiocb, long ret)
nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_write_aio_complete_work */
}
-static ssize_t do_nfs_local_call_write(struct nfs_local_kiocb *iocb,
- struct file *filp)
+static void nfs_local_call_write(struct work_struct *work)
{
+ struct nfs_local_kiocb *iocb =
+ container_of(work, struct nfs_local_kiocb, work);
+ struct file *filp = iocb->kiocb.ki_filp;
+ unsigned long old_flags = current->flags;
bool force_done = false;
ssize_t status;
int n_iters;
+ current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO;
+
file_start_write(filp);
n_iters = atomic_read(&iocb->n_iters);
for (int i = 0; i < n_iters ; i++) {
@@ -842,7 +842,9 @@ static ssize_t do_nfs_local_call_write(struct nfs_local_kiocb *iocb,
} else
iocb->kiocb.ki_flags &= ~IOCB_DIRECT;
- status = filp->f_op->write_iter(&iocb->kiocb, &iocb->iters[i]);
+ scoped_with_creds(filp->f_cred)
+ status = filp->f_op->write_iter(&iocb->kiocb, &iocb->iters[i]);
+
if (status != -EIOCBQUEUED) {
if (unlikely(status >= 0 && status < iocb->iters[i].count))
force_done = true; /* Partial write */
@@ -854,22 +856,6 @@ static ssize_t do_nfs_local_call_write(struct nfs_local_kiocb *iocb,
}
file_end_write(filp);
- return status;
-}
-
-static void nfs_local_call_write(struct work_struct *work)
-{
- struct nfs_local_kiocb *iocb =
- container_of(work, struct nfs_local_kiocb, work);
- struct file *filp = iocb->kiocb.ki_filp;
- unsigned long old_flags = current->flags;
- ssize_t status;
-
- current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO;
-
- scoped_with_creds(filp->f_cred)
- status = do_nfs_local_call_write(iocb, filp);
-
current->flags = old_flags;
}
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 2b79129703d5..5ce9a49e76ba 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1137,11 +1137,11 @@ static struct dentry *nfsd_mkdir(struct dentry *parent, struct nfsdfs_client *nc
inode->i_private = ncl;
kref_get(&ncl->cl_ref);
}
- d_instantiate(dentry, inode);
+ d_make_persistent(dentry, inode);
inc_nlink(dir);
fsnotify_mkdir(dir, dentry);
- inode_unlock(dir);
- return dentry;
+ simple_done_creating(dentry);
+ return dentry; // borrowed
}
#if IS_ENABLED(CONFIG_SUNRPC_GSS)
@@ -1170,9 +1170,9 @@ static void _nfsd_symlink(struct dentry *parent, const char *name,
inode->i_link = (char *)content;
inode->i_size = strlen(content);
- d_instantiate(dentry, inode);
+ d_make_persistent(dentry, inode);
fsnotify_create(dir, dentry);
- inode_unlock(dir);
+ simple_done_creating(dentry);
}
#else
static inline void _nfsd_symlink(struct dentry *parent, const char *name,
@@ -1228,11 +1228,11 @@ static int nfsdfs_create_files(struct dentry *root,
kref_get(&ncl->cl_ref);
inode->i_fop = files->ops;
inode->i_private = ncl;
- d_instantiate(dentry, inode);
+ d_make_persistent(dentry, inode);
fsnotify_create(dir, dentry);
if (fdentries)
- fdentries[i] = dentry;
- inode_unlock(dir);
+ fdentries[i] = dentry; // borrowed
+ simple_done_creating(dentry);
}
return 0;
}
@@ -1346,7 +1346,7 @@ static void nfsd_umount(struct super_block *sb)
nfsd_shutdown_threads(net);
- kill_litter_super(sb);
+ kill_anon_super(sb);
put_net(net);
}
diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c
index 5016bccc2ac5..2e7b2e566ebe 100644
--- a/fs/ntfs3/file.c
+++ b/fs/ntfs3/file.c
@@ -379,7 +379,7 @@ static int ntfs_file_mmap_prepare(struct vm_area_desc *desc)
if (rw) {
u64 to = min_t(loff_t, i_size_read(inode),
- from + desc->end - desc->start);
+ from + vma_desc_size(desc));
if (is_sparsed(ni)) {
/* Allocate clusters for rw map. */
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index cccaa1d6fbba..339f0b11cdc8 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -441,8 +441,7 @@ static struct dentry *dlmfs_mkdir(struct mnt_idmap * idmap,
ip->ip_conn = conn;
inc_nlink(dir);
- d_instantiate(dentry, inode);
- dget(dentry); /* Extra count - pin the dentry in core */
+ d_make_persistent(dentry, inode);
status = 0;
bail:
@@ -480,8 +479,7 @@ static int dlmfs_create(struct mnt_idmap *idmap,
goto bail;
}
- d_instantiate(dentry, inode);
- dget(dentry); /* Extra count - pin the dentry in core */
+ d_make_persistent(dentry, inode);
bail:
return status;
}
@@ -574,7 +572,7 @@ static int dlmfs_init_fs_context(struct fs_context *fc)
static struct file_system_type dlmfs_fs_type = {
.owner = THIS_MODULE,
.name = "ocfs2_dlmfs",
- .kill_sb = kill_litter_super,
+ .kill_sb = kill_anon_super,
.init_fs_context = dlmfs_init_fs_context,
};
MODULE_ALIAS_FS("ocfs2_dlmfs");
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 06b860b9ded6..ff3dbd1ca61f 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -581,7 +581,8 @@ out_cleanup_unlocked:
goto out_dput;
}
-static const struct cred *ovl_override_creator_creds(struct dentry *dentry, struct inode *inode, umode_t mode)
+static const struct cred *ovl_override_creator_creds(const struct cred *original_creds,
+ struct dentry *dentry, struct inode *inode, umode_t mode)
{
int err;
@@ -596,7 +597,7 @@ static const struct cred *ovl_override_creator_creds(struct dentry *dentry, stru
override_cred->fsgid = inode->i_gid;
err = security_dentry_create_files_as(dentry, mode, &dentry->d_name,
- current->cred, override_cred);
+ original_creds, override_cred);
if (err)
return ERR_PTR(err);
@@ -614,8 +615,11 @@ static void ovl_revert_creator_creds(const struct cred *old_cred)
DEFINE_CLASS(ovl_override_creator_creds,
const struct cred *,
if (!IS_ERR_OR_NULL(_T)) ovl_revert_creator_creds(_T),
- ovl_override_creator_creds(dentry, inode, mode),
- struct dentry *dentry, struct inode *inode, umode_t mode)
+ ovl_override_creator_creds(original_creds, dentry, inode, mode),
+ const struct cred *original_creds,
+ struct dentry *dentry,
+ struct inode *inode,
+ umode_t mode)
static int ovl_create_handle_whiteouts(struct dentry *dentry,
struct inode *inode,
@@ -633,7 +637,7 @@ static int ovl_create_or_link(struct dentry *dentry, struct inode *inode,
int err;
struct dentry *parent = dentry->d_parent;
- with_ovl_creds(dentry->d_sb) {
+ scoped_class(override_creds_ovl, original_creds, dentry->d_sb) {
/*
* When linking a file with copy up origin into a new parent, mark the
* new parent dir "impure".
@@ -661,7 +665,7 @@ static int ovl_create_or_link(struct dentry *dentry, struct inode *inode,
if (attr->hardlink)
return ovl_create_handle_whiteouts(dentry, inode, attr);
- scoped_class(ovl_override_creator_creds, cred, dentry, inode, attr->mode) {
+ scoped_class(ovl_override_creator_creds, cred, original_creds, dentry, inode, attr->mode) {
if (IS_ERR(cred))
return PTR_ERR(cred);
return ovl_create_handle_whiteouts(dentry, inode, attr);
@@ -1364,8 +1368,8 @@ static int ovl_create_tmpfile(struct file *file, struct dentry *dentry,
int flags = file->f_flags | OVL_OPEN_FLAGS;
int err;
- with_ovl_creds(dentry->d_sb) {
- scoped_class(ovl_override_creator_creds, cred, dentry, inode, mode) {
+ scoped_class(override_creds_ovl, original_creds, dentry->d_sb) {
+ scoped_class(ovl_override_creator_creds, cred, original_creds, dentry, inode, mode) {
if (IS_ERR(cred))
return PTR_ERR(cred);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 28b2f707cfbc..ba9146f22a2c 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -128,9 +128,17 @@ static int ovl_dentry_revalidate_common(struct dentry *dentry,
unsigned int i;
int ret = 1;
- /* Careful in RCU mode */
- if (!inode)
+ if (!inode) {
+ /*
+ * Lookup of negative dentries will call ovl_dentry_init_flags()
+ * with NULL upperdentry and NULL oe, resulting in the
+ * DCACHE_OP*_REVALIDATE flags being cleared. Hence the only
+ * way to get a negative inode is due to a race with dentry
+ * destruction.
+ */
+ WARN_ON(!(flags & LOOKUP_RCU));
return -ECHILD;
+ }
oe = OVL_I_E(inode);
lowerstack = ovl_lowerstack(oe);
diff --git a/fs/pipe.c b/fs/pipe.c
index 2d0fed2ecbfd..9e6a01475815 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1481,31 +1481,16 @@ static struct file_system_type pipe_fs_type = {
};
#ifdef CONFIG_SYSCTL
-static int do_proc_dopipe_max_size_conv(unsigned long *lvalp,
- unsigned int *valp,
- int write, void *data)
-{
- if (write) {
- unsigned int val;
-
- val = round_pipe_size(*lvalp);
- if (val == 0)
- return -EINVAL;
-
- *valp = val;
- } else {
- unsigned int val = *valp;
- *lvalp = (unsigned long) val;
- }
-
- return 0;
-}
+static SYSCTL_USER_TO_KERN_UINT_CONV(_pipe_maxsz, round_pipe_size)
+static SYSCTL_UINT_CONV_CUSTOM(_pipe_maxsz,
+ sysctl_user_to_kern_uint_conv_pipe_maxsz,
+ sysctl_kern_to_user_uint_conv, true)
static int proc_dopipe_max_size(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
- return do_proc_douintvec(table, write, buffer, lenp, ppos,
- do_proc_dopipe_max_size_conv, NULL);
+ return proc_douintvec_conv(table, write, buffer, lenp, ppos,
+ do_proc_uint_conv_pipe_maxsz);
}
static const struct ctl_table fs_pipe_sysctls[] = {
@@ -1515,6 +1500,7 @@ static const struct ctl_table fs_pipe_sysctls[] = {
.maxlen = sizeof(pipe_max_size),
.mode = 0644,
.proc_handler = proc_dopipe_max_size,
+ .extra1 = SYSCTL_ONE,
},
{
.procname = "pipe-user-pages-hard",
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 407b41cb6e7c..4eec684baca9 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -3578,14 +3578,12 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx)
return 0;
if (pos == TGID_OFFSET - 2) {
- struct inode *inode = d_inode(fs_info->proc_self);
- if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK))
+ if (!dir_emit(ctx, "self", 4, self_inum, DT_LNK))
return 0;
ctx->pos = pos = pos + 1;
}
if (pos == TGID_OFFSET - 1) {
- struct inode *inode = d_inode(fs_info->proc_thread_self);
- if (!dir_emit(ctx, "thread-self", 11, inode->i_ino, DT_LNK))
+ if (!dir_emit(ctx, "thread-self", 11, thread_self_inum, DT_LNK))
return 0;
ctx->pos = pos = pos + 1;
}
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index d9b7ef122343..2d3425cfa94b 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -443,7 +443,7 @@ pde_get_unmapped_area(struct proc_dir_entry *pde, struct file *file, unsigned lo
return pde->proc_ops->proc_get_unmapped_area(file, orig_addr, len, pgoff, flags);
#ifdef CONFIG_MMU
- return mm_get_unmapped_area(current->mm, file, orig_addr, len, pgoff, flags);
+ return mm_get_unmapped_area(file, orig_addr, len, pgoff, flags);
#endif
return orig_addr;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index d1598576506c..c1e8eb984da8 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -373,6 +373,7 @@ static inline void proc_tty_init(void) {}
extern struct proc_dir_entry proc_root;
extern void proc_self_init(void);
+extern unsigned self_inum, thread_self_inum;
/*
* task_[no]mmu.c
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 1e24e085c7d5..d8ca41d823e4 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -347,17 +347,11 @@ static void proc_kill_sb(struct super_block *sb)
{
struct proc_fs_info *fs_info = proc_sb_info(sb);
- if (!fs_info) {
- kill_anon_super(sb);
- return;
- }
-
- dput(fs_info->proc_self);
- dput(fs_info->proc_thread_self);
-
kill_anon_super(sb);
- put_pid_ns(fs_info->pid_ns);
- kfree_rcu(fs_info, rcu);
+ if (fs_info) {
+ put_pid_ns(fs_info->pid_ns);
+ kfree_rcu(fs_info, rcu);
+ }
}
static struct file_system_type proc_fs_type = {
diff --git a/fs/proc/self.c b/fs/proc/self.c
index b46fbfd22681..62d2c0cfe35c 100644
--- a/fs/proc/self.c
+++ b/fs/proc/self.c
@@ -31,12 +31,11 @@ static const struct inode_operations proc_self_inode_operations = {
.get_link = proc_self_get_link,
};
-static unsigned self_inum __ro_after_init;
+unsigned self_inum __ro_after_init;
int proc_setup_self(struct super_block *s)
{
struct inode *root_inode = d_inode(s->s_root);
- struct proc_fs_info *fs_info = proc_sb_info(s);
struct dentry *self;
int ret = -ENOMEM;
@@ -51,18 +50,15 @@ int proc_setup_self(struct super_block *s)
inode->i_uid = GLOBAL_ROOT_UID;
inode->i_gid = GLOBAL_ROOT_GID;
inode->i_op = &proc_self_inode_operations;
- d_add(self, inode);
+ d_make_persistent(self, inode);
ret = 0;
- } else {
- dput(self);
}
+ dput(self);
}
inode_unlock(root_inode);
if (ret)
pr_err("proc_fill_super: can't allocate /proc/self\n");
- else
- fs_info->proc_self = self;
return ret;
}
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index fc35a0543f01..81dfc26bfae8 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -14,7 +14,7 @@
#include <linux/rmap.h>
#include <linux/swap.h>
#include <linux/sched/mm.h>
-#include <linux/swapops.h>
+#include <linux/leafops.h>
#include <linux/mmu_notifier.h>
#include <linux/page_idle.h>
#include <linux/shmem_fs.h>
@@ -1017,14 +1017,16 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
young = pte_young(ptent);
dirty = pte_dirty(ptent);
present = true;
- } else if (is_swap_pte(ptent)) {
- swp_entry_t swpent = pte_to_swp_entry(ptent);
+ } else if (pte_none(ptent)) {
+ smaps_pte_hole_lookup(addr, walk);
+ } else {
+ const softleaf_t entry = softleaf_from_pte(ptent);
- if (!non_swap_entry(swpent)) {
+ if (softleaf_is_swap(entry)) {
int mapcount;
mss->swap += PAGE_SIZE;
- mapcount = swp_swapcount(swpent);
+ mapcount = swp_swapcount(entry);
if (mapcount >= 2) {
u64 pss_delta = (u64)PAGE_SIZE << PSS_SHIFT;
@@ -1033,14 +1035,11 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
} else {
mss->swap_pss += (u64)PAGE_SIZE << PSS_SHIFT;
}
- } else if (is_pfn_swap_entry(swpent)) {
- if (is_device_private_entry(swpent))
+ } else if (softleaf_has_pfn(entry)) {
+ if (softleaf_is_device_private(entry))
present = true;
- page = pfn_swap_entry_to_page(swpent);
+ page = softleaf_to_page(entry);
}
- } else {
- smaps_pte_hole_lookup(addr, walk);
- return;
}
if (!page)
@@ -1060,14 +1059,16 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
bool present = false;
struct folio *folio;
+ if (pmd_none(*pmd))
+ return;
if (pmd_present(*pmd)) {
page = vm_normal_page_pmd(vma, addr, *pmd);
present = true;
- } else if (unlikely(thp_migration_supported() && is_swap_pmd(*pmd))) {
- swp_entry_t entry = pmd_to_swp_entry(*pmd);
+ } else if (unlikely(thp_migration_supported())) {
+ const softleaf_t entry = softleaf_from_pmd(*pmd);
- if (is_pfn_swap_entry(entry))
- page = pfn_swap_entry_to_page(entry);
+ if (softleaf_has_pfn(entry))
+ page = softleaf_to_page(entry);
}
if (IS_ERR_OR_NULL(page))
return;
@@ -1146,6 +1147,7 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
[ilog2(VM_MAYSHARE)] = "ms",
[ilog2(VM_GROWSDOWN)] = "gd",
[ilog2(VM_PFNMAP)] = "pf",
+ [ilog2(VM_MAYBE_GUARD)] = "gu",
[ilog2(VM_LOCKED)] = "lo",
[ilog2(VM_IO)] = "io",
[ilog2(VM_SEQ_READ)] = "sr",
@@ -1181,10 +1183,10 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
[ilog2(VM_PKEY_BIT0)] = "",
[ilog2(VM_PKEY_BIT1)] = "",
[ilog2(VM_PKEY_BIT2)] = "",
-#if VM_PKEY_BIT3
+#if CONFIG_ARCH_PKEY_BITS > 3
[ilog2(VM_PKEY_BIT3)] = "",
#endif
-#if VM_PKEY_BIT4
+#if CONFIG_ARCH_PKEY_BITS > 4
[ilog2(VM_PKEY_BIT4)] = "",
#endif
#endif /* CONFIG_ARCH_HAS_PKEYS */
@@ -1230,11 +1232,11 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
if (pte_present(ptent)) {
folio = page_folio(pte_page(ptent));
present = true;
- } else if (is_swap_pte(ptent)) {
- swp_entry_t swpent = pte_to_swp_entry(ptent);
+ } else {
+ const softleaf_t entry = softleaf_from_pte(ptent);
- if (is_pfn_swap_entry(swpent))
- folio = pfn_swap_entry_folio(swpent);
+ if (softleaf_has_pfn(entry))
+ folio = softleaf_to_folio(entry);
}
if (folio) {
@@ -1582,8 +1584,6 @@ struct clear_refs_private {
enum clear_refs_types type;
};
-#ifdef CONFIG_MEM_SOFT_DIRTY
-
static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr, pte_t pte)
{
struct folio *folio;
@@ -1603,6 +1603,8 @@ static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr,
static inline void clear_soft_dirty(struct vm_area_struct *vma,
unsigned long addr, pte_t *pte)
{
+ if (!pgtable_supports_soft_dirty())
+ return;
/*
* The soft-dirty tracker uses #PF-s to catch writes
* to pages, so write-protect the pte as well. See the
@@ -1611,6 +1613,9 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
*/
pte_t ptent = ptep_get(pte);
+ if (pte_none(ptent))
+ return;
+
if (pte_present(ptent)) {
pte_t old_pte;
@@ -1620,24 +1625,21 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
ptent = pte_wrprotect(old_pte);
ptent = pte_clear_soft_dirty(ptent);
ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent);
- } else if (is_swap_pte(ptent)) {
+ } else {
ptent = pte_swp_clear_soft_dirty(ptent);
set_pte_at(vma->vm_mm, addr, pte, ptent);
}
}
-#else
-static inline void clear_soft_dirty(struct vm_area_struct *vma,
- unsigned long addr, pte_t *pte)
-{
-}
-#endif
-#if defined(CONFIG_MEM_SOFT_DIRTY) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE)
static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmdp)
{
pmd_t old, pmd = *pmdp;
+ if (!pgtable_supports_soft_dirty())
+ return;
+
if (pmd_present(pmd)) {
/* See comment in change_huge_pmd() */
old = pmdp_invalidate(vma, addr, pmdp);
@@ -1650,7 +1652,7 @@ static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
pmd = pmd_clear_soft_dirty(pmd);
set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
- } else if (is_migration_entry(pmd_to_swp_entry(pmd))) {
+ } else if (pmd_is_migration_entry(pmd)) {
pmd = pmd_swp_clear_soft_dirty(pmd);
set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
}
@@ -1923,6 +1925,9 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
struct page *page = NULL;
struct folio *folio;
+ if (pte_none(pte))
+ goto out;
+
if (pte_present(pte)) {
if (pm->show_pfn)
frame = pte_pfn(pte);
@@ -1932,32 +1937,34 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
flags |= PM_SOFT_DIRTY;
if (pte_uffd_wp(pte))
flags |= PM_UFFD_WP;
- } else if (is_swap_pte(pte)) {
- swp_entry_t entry;
+ } else {
+ softleaf_t entry;
+
if (pte_swp_soft_dirty(pte))
flags |= PM_SOFT_DIRTY;
if (pte_swp_uffd_wp(pte))
flags |= PM_UFFD_WP;
- entry = pte_to_swp_entry(pte);
+ entry = softleaf_from_pte(pte);
if (pm->show_pfn) {
pgoff_t offset;
+
/*
* For PFN swap offsets, keeping the offset field
* to be PFN only to be compatible with old smaps.
*/
- if (is_pfn_swap_entry(entry))
- offset = swp_offset_pfn(entry);
+ if (softleaf_has_pfn(entry))
+ offset = softleaf_to_pfn(entry);
else
offset = swp_offset(entry);
frame = swp_type(entry) |
(offset << MAX_SWAPFILES_SHIFT);
}
flags |= PM_SWAP;
- if (is_pfn_swap_entry(entry))
- page = pfn_swap_entry_to_page(entry);
- if (pte_marker_entry_uffd_wp(entry))
+ if (softleaf_has_pfn(entry))
+ page = softleaf_to_page(entry);
+ if (softleaf_is_uffd_wp_marker(entry))
flags |= PM_UFFD_WP;
- if (is_guard_swp_entry(entry))
+ if (softleaf_is_guard_marker(entry))
flags |= PM_GUARD_REGION;
}
@@ -1969,96 +1976,110 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
__folio_page_mapped_exclusively(folio, page))
flags |= PM_MMAP_EXCLUSIVE;
}
+
+out:
if (vma->vm_flags & VM_SOFTDIRTY)
flags |= PM_SOFT_DIRTY;
return make_pme(frame, flags);
}
-static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
- struct mm_walk *walk)
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static int pagemap_pmd_range_thp(pmd_t *pmdp, unsigned long addr,
+ unsigned long end, struct vm_area_struct *vma,
+ struct pagemapread *pm)
{
- struct vm_area_struct *vma = walk->vma;
- struct pagemapread *pm = walk->private;
- spinlock_t *ptl;
- pte_t *pte, *orig_pte;
+ unsigned int idx = (addr & ~PMD_MASK) >> PAGE_SHIFT;
+ u64 flags = 0, frame = 0;
+ pmd_t pmd = *pmdp;
+ struct page *page = NULL;
+ struct folio *folio = NULL;
int err = 0;
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- ptl = pmd_trans_huge_lock(pmdp, vma);
- if (ptl) {
- unsigned int idx = (addr & ~PMD_MASK) >> PAGE_SHIFT;
- u64 flags = 0, frame = 0;
- pmd_t pmd = *pmdp;
- struct page *page = NULL;
- struct folio *folio = NULL;
+ if (vma->vm_flags & VM_SOFTDIRTY)
+ flags |= PM_SOFT_DIRTY;
- if (vma->vm_flags & VM_SOFTDIRTY)
- flags |= PM_SOFT_DIRTY;
+ if (pmd_none(pmd))
+ goto populate_pagemap;
- if (pmd_present(pmd)) {
- page = pmd_page(pmd);
+ if (pmd_present(pmd)) {
+ page = pmd_page(pmd);
- flags |= PM_PRESENT;
- if (pmd_soft_dirty(pmd))
- flags |= PM_SOFT_DIRTY;
- if (pmd_uffd_wp(pmd))
- flags |= PM_UFFD_WP;
- if (pm->show_pfn)
- frame = pmd_pfn(pmd) + idx;
- }
-#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
- else if (is_swap_pmd(pmd)) {
- swp_entry_t entry = pmd_to_swp_entry(pmd);
- unsigned long offset;
-
- if (pm->show_pfn) {
- if (is_pfn_swap_entry(entry))
- offset = swp_offset_pfn(entry) + idx;
- else
- offset = swp_offset(entry) + idx;
- frame = swp_type(entry) |
- (offset << MAX_SWAPFILES_SHIFT);
- }
- flags |= PM_SWAP;
- if (pmd_swp_soft_dirty(pmd))
- flags |= PM_SOFT_DIRTY;
- if (pmd_swp_uffd_wp(pmd))
- flags |= PM_UFFD_WP;
- VM_BUG_ON(!is_pmd_migration_entry(pmd));
- page = pfn_swap_entry_to_page(entry);
- }
-#endif
+ flags |= PM_PRESENT;
+ if (pmd_soft_dirty(pmd))
+ flags |= PM_SOFT_DIRTY;
+ if (pmd_uffd_wp(pmd))
+ flags |= PM_UFFD_WP;
+ if (pm->show_pfn)
+ frame = pmd_pfn(pmd) + idx;
+ } else if (thp_migration_supported()) {
+ const softleaf_t entry = softleaf_from_pmd(pmd);
+ unsigned long offset;
- if (page) {
- folio = page_folio(page);
- if (!folio_test_anon(folio))
- flags |= PM_FILE;
+ if (pm->show_pfn) {
+ if (softleaf_has_pfn(entry))
+ offset = softleaf_to_pfn(entry) + idx;
+ else
+ offset = swp_offset(entry) + idx;
+ frame = swp_type(entry) |
+ (offset << MAX_SWAPFILES_SHIFT);
}
+ flags |= PM_SWAP;
+ if (pmd_swp_soft_dirty(pmd))
+ flags |= PM_SOFT_DIRTY;
+ if (pmd_swp_uffd_wp(pmd))
+ flags |= PM_UFFD_WP;
+ VM_WARN_ON_ONCE(!pmd_is_migration_entry(pmd));
+ page = softleaf_to_page(entry);
+ }
+
+ if (page) {
+ folio = page_folio(page);
+ if (!folio_test_anon(folio))
+ flags |= PM_FILE;
+ }
- for (; addr != end; addr += PAGE_SIZE, idx++) {
- u64 cur_flags = flags;
- pagemap_entry_t pme;
+populate_pagemap:
+ for (; addr != end; addr += PAGE_SIZE, idx++) {
+ u64 cur_flags = flags;
+ pagemap_entry_t pme;
- if (folio && (flags & PM_PRESENT) &&
- __folio_page_mapped_exclusively(folio, page))
- cur_flags |= PM_MMAP_EXCLUSIVE;
+ if (folio && (flags & PM_PRESENT) &&
+ __folio_page_mapped_exclusively(folio, page))
+ cur_flags |= PM_MMAP_EXCLUSIVE;
- pme = make_pme(frame, cur_flags);
- err = add_to_pagemap(&pme, pm);
- if (err)
- break;
- if (pm->show_pfn) {
- if (flags & PM_PRESENT)
- frame++;
- else if (flags & PM_SWAP)
- frame += (1 << MAX_SWAPFILES_SHIFT);
- }
+ pme = make_pme(frame, cur_flags);
+ err = add_to_pagemap(&pme, pm);
+ if (err)
+ break;
+ if (pm->show_pfn) {
+ if (flags & PM_PRESENT)
+ frame++;
+ else if (flags & PM_SWAP)
+ frame += (1 << MAX_SWAPFILES_SHIFT);
}
+ }
+ return err;
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
+ struct mm_walk *walk)
+{
+ struct vm_area_struct *vma = walk->vma;
+ struct pagemapread *pm = walk->private;
+ spinlock_t *ptl;
+ pte_t *pte, *orig_pte;
+ int err = 0;
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ ptl = pmd_trans_huge_lock(pmdp, vma);
+ if (ptl) {
+ err = pagemap_pmd_range_thp(pmdp, addr, end, vma, pm);
spin_unlock(ptl);
return err;
}
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+#endif
/*
* We can assume that @vma always points to a valid one and @end never
@@ -2310,12 +2331,16 @@ static unsigned long pagemap_page_category(struct pagemap_scan_private *p,
struct vm_area_struct *vma,
unsigned long addr, pte_t pte)
{
- unsigned long categories = 0;
+ unsigned long categories;
+
+ if (pte_none(pte))
+ return 0;
if (pte_present(pte)) {
struct page *page;
- categories |= PAGE_IS_PRESENT;
+ categories = PAGE_IS_PRESENT;
+
if (!pte_uffd_wp(pte))
categories |= PAGE_IS_WRITTEN;
@@ -2329,19 +2354,20 @@ static unsigned long pagemap_page_category(struct pagemap_scan_private *p,
categories |= PAGE_IS_PFNZERO;
if (pte_soft_dirty(pte))
categories |= PAGE_IS_SOFT_DIRTY;
- } else if (is_swap_pte(pte)) {
- swp_entry_t swp;
+ } else {
+ softleaf_t entry;
+
+ categories = PAGE_IS_SWAPPED;
- categories |= PAGE_IS_SWAPPED;
if (!pte_swp_uffd_wp_any(pte))
categories |= PAGE_IS_WRITTEN;
- swp = pte_to_swp_entry(pte);
- if (is_guard_swp_entry(swp))
+ entry = softleaf_from_pte(pte);
+ if (softleaf_is_guard_marker(entry))
categories |= PAGE_IS_GUARD;
else if ((p->masks_of_interest & PAGE_IS_FILE) &&
- is_pfn_swap_entry(swp) &&
- !folio_test_anon(pfn_swap_entry_folio(swp)))
+ softleaf_has_pfn(entry) &&
+ !folio_test_anon(softleaf_to_folio(entry)))
categories |= PAGE_IS_FILE;
if (pte_swp_soft_dirty(pte))
@@ -2360,12 +2386,12 @@ static void make_uffd_wp_pte(struct vm_area_struct *vma,
old_pte = ptep_modify_prot_start(vma, addr, pte);
ptent = pte_mkuffd_wp(old_pte);
ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent);
- } else if (is_swap_pte(ptent)) {
- ptent = pte_swp_mkuffd_wp(ptent);
- set_pte_at(vma->vm_mm, addr, pte, ptent);
- } else {
+ } else if (pte_none(ptent)) {
set_pte_at(vma->vm_mm, addr, pte,
make_pte_marker(PTE_MARKER_UFFD_WP));
+ } else {
+ ptent = pte_swp_mkuffd_wp(ptent);
+ set_pte_at(vma->vm_mm, addr, pte, ptent);
}
}
@@ -2376,6 +2402,9 @@ static unsigned long pagemap_thp_category(struct pagemap_scan_private *p,
{
unsigned long categories = PAGE_IS_HUGE;
+ if (pmd_none(pmd))
+ return categories;
+
if (pmd_present(pmd)) {
struct page *page;
@@ -2393,9 +2422,7 @@ static unsigned long pagemap_thp_category(struct pagemap_scan_private *p,
categories |= PAGE_IS_PFNZERO;
if (pmd_soft_dirty(pmd))
categories |= PAGE_IS_SOFT_DIRTY;
- } else if (is_swap_pmd(pmd)) {
- swp_entry_t swp;
-
+ } else {
categories |= PAGE_IS_SWAPPED;
if (!pmd_swp_uffd_wp(pmd))
categories |= PAGE_IS_WRITTEN;
@@ -2403,9 +2430,10 @@ static unsigned long pagemap_thp_category(struct pagemap_scan_private *p,
categories |= PAGE_IS_SOFT_DIRTY;
if (p->masks_of_interest & PAGE_IS_FILE) {
- swp = pmd_to_swp_entry(pmd);
- if (is_pfn_swap_entry(swp) &&
- !folio_test_anon(pfn_swap_entry_folio(swp)))
+ const softleaf_t entry = softleaf_from_pmd(pmd);
+
+ if (softleaf_has_pfn(entry) &&
+ !folio_test_anon(softleaf_to_folio(entry)))
categories |= PAGE_IS_FILE;
}
}
@@ -2422,7 +2450,7 @@ static void make_uffd_wp_pmd(struct vm_area_struct *vma,
old = pmdp_invalidate_ad(vma, addr, pmdp);
pmd = pmd_mkuffd_wp(old);
set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
- } else if (is_migration_entry(pmd_to_swp_entry(pmd))) {
+ } else if (pmd_is_migration_entry(pmd)) {
pmd = pmd_swp_mkuffd_wp(pmd);
set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
}
@@ -2434,6 +2462,9 @@ static unsigned long pagemap_hugetlb_category(pte_t pte)
{
unsigned long categories = PAGE_IS_HUGE;
+ if (pte_none(pte))
+ return categories;
+
/*
* According to pagemap_hugetlb_range(), file-backed HugeTLB
* page cannot be swapped. So PAGE_IS_FILE is not checked for
@@ -2441,6 +2472,7 @@ static unsigned long pagemap_hugetlb_category(pte_t pte)
*/
if (pte_present(pte)) {
categories |= PAGE_IS_PRESENT;
+
if (!huge_pte_uffd_wp(pte))
categories |= PAGE_IS_WRITTEN;
if (!PageAnon(pte_page(pte)))
@@ -2449,8 +2481,9 @@ static unsigned long pagemap_hugetlb_category(pte_t pte)
categories |= PAGE_IS_PFNZERO;
if (pte_soft_dirty(pte))
categories |= PAGE_IS_SOFT_DIRTY;
- } else if (is_swap_pte(pte)) {
+ } else {
categories |= PAGE_IS_SWAPPED;
+
if (!pte_swp_uffd_wp_any(pte))
categories |= PAGE_IS_WRITTEN;
if (pte_swp_soft_dirty(pte))
@@ -2464,22 +2497,25 @@ static void make_uffd_wp_huge_pte(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
pte_t ptent)
{
- unsigned long psize;
+ const unsigned long psize = huge_page_size(hstate_vma(vma));
+ softleaf_t entry;
- if (is_hugetlb_entry_hwpoisoned(ptent) || is_pte_marker(ptent))
+ if (huge_pte_none(ptent)) {
+ set_huge_pte_at(vma->vm_mm, addr, ptep,
+ make_pte_marker(PTE_MARKER_UFFD_WP), psize);
return;
+ }
- psize = huge_page_size(hstate_vma(vma));
+ entry = softleaf_from_pte(ptent);
+ if (softleaf_is_hwpoison(entry) || softleaf_is_marker(entry))
+ return;
- if (is_hugetlb_entry_migration(ptent))
+ if (softleaf_is_migration(entry))
set_huge_pte_at(vma->vm_mm, addr, ptep,
pte_swp_mkuffd_wp(ptent), psize);
- else if (!huge_pte_none(ptent))
+ else
huge_ptep_modify_prot_commit(vma, addr, ptep, ptent,
huge_pte_mkuffd_wp(ptent));
- else
- set_huge_pte_at(vma->vm_mm, addr, ptep,
- make_pte_marker(PTE_MARKER_UFFD_WP), psize);
}
#endif /* CONFIG_HUGETLB_PAGE */
diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c
index 0e5050d6ab64..d6113dbe58e0 100644
--- a/fs/proc/thread_self.c
+++ b/fs/proc/thread_self.c
@@ -31,12 +31,11 @@ static const struct inode_operations proc_thread_self_inode_operations = {
.get_link = proc_thread_self_get_link,
};
-static unsigned thread_self_inum __ro_after_init;
+unsigned thread_self_inum __ro_after_init;
int proc_setup_thread_self(struct super_block *s)
{
struct inode *root_inode = d_inode(s->s_root);
- struct proc_fs_info *fs_info = proc_sb_info(s);
struct dentry *thread_self;
int ret = -ENOMEM;
@@ -51,19 +50,15 @@ int proc_setup_thread_self(struct super_block *s)
inode->i_uid = GLOBAL_ROOT_UID;
inode->i_gid = GLOBAL_ROOT_GID;
inode->i_op = &proc_thread_self_inode_operations;
- d_add(thread_self, inode);
+ d_make_persistent(thread_self, inode);
ret = 0;
- } else {
- dput(thread_self);
}
+ dput(thread_self);
}
inode_unlock(root_inode);
if (ret)
pr_err("proc_fill_super: can't allocate /proc/thread-self\n");
- else
- fs_info->proc_thread_self = thread_self;
-
return ret;
}
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index b4e55c90f8dc..71deffcc3356 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -373,7 +373,7 @@ int pstore_mkfile(struct dentry *root, struct pstore_record *record)
if (!dentry)
return -ENOMEM;
- private->dentry = dentry;
+ private->dentry = dentry; // borrowed
private->record = record;
inode->i_size = private->total_size = size;
inode->i_private = private;
@@ -382,7 +382,8 @@ int pstore_mkfile(struct dentry *root, struct pstore_record *record)
inode_set_mtime_to_ts(inode,
inode_set_ctime_to_ts(inode, record->time));
- d_add(dentry, no_free_ptr(inode));
+ d_make_persistent(dentry, no_free_ptr(inode));
+ dput(dentry);
list_add(&(no_free_ptr(private))->list, &records_list);
@@ -465,7 +466,7 @@ static void pstore_kill_sb(struct super_block *sb)
guard(mutex)(&pstore_sb_lock);
WARN_ON(pstore_sb && pstore_sb != sb);
- kill_litter_super(sb);
+ kill_anon_super(sb);
pstore_sb = NULL;
guard(mutex)(&records_list_lock);
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index bc68b4de5287..39936d6da0dd 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -864,6 +864,8 @@ static int ramoops_probe(struct platform_device *pdev)
ramoops_console_size = pdata->console_size;
ramoops_pmsg_size = pdata->pmsg_size;
ramoops_ftrace_size = pdata->ftrace_size;
+ mem_type = pdata->mem_type;
+ ramoops_ecc = pdata->ecc_info.ecc_size;
pr_info("using 0x%lx@0x%llx, ecc: %d\n",
cxt->size, (unsigned long long)cxt->phys_addr,
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index b11f5b20b78b..c3ed1c5117b2 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -35,7 +35,7 @@ static unsigned long ramfs_mmu_get_unmapped_area(struct file *file,
unsigned long addr, unsigned long len, unsigned long pgoff,
unsigned long flags)
{
- return mm_get_unmapped_area(current->mm, file, addr, len, pgoff, flags);
+ return mm_get_unmapped_area(file, addr, len, pgoff, flags);
}
const struct file_operations ramfs_file_operations = {
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 41f9995da7ca..505d10a0cb36 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -110,8 +110,7 @@ ramfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
goto out;
}
- d_instantiate(dentry, inode);
- dget(dentry); /* Extra count - pin the dentry in core */
+ d_make_persistent(dentry, inode);
error = 0;
inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
}
@@ -154,8 +153,7 @@ static int ramfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
error = page_symlink(inode, symname, l);
if (!error) {
- d_instantiate(dentry, inode);
- dget(dentry);
+ d_make_persistent(dentry, inode);
inode_set_mtime_to_ts(dir,
inode_set_ctime_current(dir));
} else
@@ -313,7 +311,7 @@ int ramfs_init_fs_context(struct fs_context *fc)
void ramfs_kill_sb(struct super_block *sb)
{
kfree(sb->s_fs_info);
- kill_litter_super(sb);
+ kill_anon_super(sb);
}
static struct file_system_type ramfs_fs_type = {
diff --git a/fs/resctrl/pseudo_lock.c b/fs/resctrl/pseudo_lock.c
index 87bbc2605de1..0bfc13c5b96d 100644
--- a/fs/resctrl/pseudo_lock.c
+++ b/fs/resctrl/pseudo_lock.c
@@ -995,10 +995,11 @@ static const struct vm_operations_struct pseudo_mmap_ops = {
.mremap = pseudo_lock_dev_mremap,
};
-static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma)
+static int pseudo_lock_dev_mmap_prepare(struct vm_area_desc *desc)
{
- unsigned long vsize = vma->vm_end - vma->vm_start;
- unsigned long off = vma->vm_pgoff << PAGE_SHIFT;
+ unsigned long off = desc->pgoff << PAGE_SHIFT;
+ unsigned long vsize = vma_desc_size(desc);
+ struct file *filp = desc->file;
struct pseudo_lock_region *plr;
struct rdtgroup *rdtgrp;
unsigned long physical;
@@ -1043,7 +1044,7 @@ static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma)
* Ensure changes are carried directly to the memory being mapped,
* do not allow copy-on-write mapping.
*/
- if (!(vma->vm_flags & VM_SHARED)) {
+ if (!(desc->vm_flags & VM_SHARED)) {
mutex_unlock(&rdtgroup_mutex);
return -EINVAL;
}
@@ -1055,12 +1056,9 @@ static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma)
memset(plr->kmem + off, 0, vsize);
- if (remap_pfn_range(vma, vma->vm_start, physical + vma->vm_pgoff,
- vsize, vma->vm_page_prot)) {
- mutex_unlock(&rdtgroup_mutex);
- return -EAGAIN;
- }
- vma->vm_ops = &pseudo_mmap_ops;
+ desc->vm_ops = &pseudo_mmap_ops;
+ mmap_action_remap_full(desc, physical + desc->pgoff);
+
mutex_unlock(&rdtgroup_mutex);
return 0;
}
@@ -1071,7 +1069,7 @@ static const struct file_operations pseudo_lock_dev_fops = {
.write = NULL,
.open = pseudo_lock_dev_open,
.release = pseudo_lock_dev_release,
- .mmap = pseudo_lock_dev_mmap,
+ .mmap_prepare = pseudo_lock_dev_mmap_prepare,
};
int rdt_pseudo_lock_init(void)
diff --git a/fs/super.c b/fs/super.c
index 7c66b96b59be..3d85265d1400 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1189,7 +1189,7 @@ static void filesystems_freeze_callback(struct super_block *sb, void *freeze_all
if (!sb->s_op->freeze_fs && !sb->s_op->freeze_super)
return;
- if (freeze_all_ptr && !(sb->s_type->fs_flags & FS_POWER_FREEZE))
+ if (!freeze_all_ptr && !(sb->s_type->fs_flags & FS_POWER_FREEZE))
return;
if (!get_active_super(sb))
@@ -1292,14 +1292,6 @@ void kill_anon_super(struct super_block *sb)
}
EXPORT_SYMBOL(kill_anon_super);
-void kill_litter_super(struct super_block *sb)
-{
- if (sb->s_root)
- d_genocide(sb->s_root);
- kill_anon_super(sb);
-}
-EXPORT_SYMBOL(kill_litter_super);
-
int set_anon_super_fc(struct super_block *sb, struct fs_context *fc)
{
return set_anon_super(sb, NULL);
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index e142bac4f9f8..e1e639f515a0 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -36,6 +36,9 @@ static umode_t __first_visible(const struct attribute_group *grp, struct kobject
if (grp->attrs && grp->attrs[0] && grp->is_visible)
return grp->is_visible(kobj, grp->attrs[0], 0);
+ if (grp->attrs && grp->attrs[0] && grp->is_visible_const)
+ return grp->is_visible_const(kobj, grp->attrs[0], 0);
+
if (grp->bin_attrs && grp->bin_attrs[0] && grp->is_bin_visible)
return grp->is_bin_visible(kobj, grp->bin_attrs[0], 0);
@@ -61,8 +64,11 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj,
*/
if (update)
kernfs_remove_by_name(parent, (*attr)->name);
- if (grp->is_visible) {
- mode = grp->is_visible(kobj, *attr, i);
+ if (grp->is_visible || grp->is_visible_const) {
+ if (grp->is_visible)
+ mode = grp->is_visible(kobj, *attr, i);
+ else
+ mode = grp->is_visible_const(kobj, *attr, i);
mode &= ~SYSFS_GROUP_INVISIBLE;
if (!mode)
continue;
diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c
index 8705c77a9e75..61cbdafa2411 100644
--- a/fs/tracefs/event_inode.c
+++ b/fs/tracefs/event_inode.c
@@ -757,7 +757,7 @@ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry
const struct eventfs_entry *entries,
int size, void *data)
{
- struct dentry *dentry = tracefs_start_creating(name, parent);
+ struct dentry *dentry;
struct eventfs_root_inode *rei;
struct eventfs_inode *ei;
struct tracefs_inode *ti;
@@ -768,6 +768,7 @@ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry
if (security_locked_down(LOCKDOWN_TRACEFS))
return NULL;
+ dentry = tracefs_start_creating(name, parent);
if (IS_ERR(dentry))
return ERR_CAST(dentry);
@@ -822,7 +823,7 @@ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry
* something not worth much. Keeping directory links at 1
* tells userspace not to trust the link number.
*/
- d_instantiate(dentry, inode);
+ d_make_persistent(dentry, inode);
/* The dentry of the "events" parent does keep track though */
inc_nlink(dentry->d_parent->d_inode);
fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
@@ -909,5 +910,5 @@ void eventfs_remove_events_dir(struct eventfs_inode *ei)
* and destroyed dynamically.
*/
d_invalidate(dentry);
- dput(dentry);
+ d_make_discardable(dentry);
}
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
index 0c023941a316..d9d8932a7b9c 100644
--- a/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@ -538,7 +538,7 @@ static struct file_system_type trace_fs_type = {
.name = "tracefs",
.init_fs_context = tracefs_init_fs_context,
.parameters = tracefs_param_specs,
- .kill_sb = kill_litter_super,
+ .kill_sb = kill_anon_super,
};
MODULE_ALIAS_FS("tracefs");
@@ -571,16 +571,15 @@ struct dentry *tracefs_start_creating(const char *name, struct dentry *parent)
struct dentry *tracefs_failed_creating(struct dentry *dentry)
{
- inode_unlock(d_inode(dentry->d_parent));
- dput(dentry);
+ simple_done_creating(dentry);
simple_release_fs(&tracefs_mount, &tracefs_mount_count);
return NULL;
}
struct dentry *tracefs_end_creating(struct dentry *dentry)
{
- inode_unlock(d_inode(dentry->d_parent));
- return dentry;
+ simple_done_creating(dentry);
+ return dentry; // borrowed
}
/* Find the inode that this will use for default */
@@ -661,7 +660,7 @@ struct dentry *tracefs_create_file(const char *name, umode_t mode,
inode->i_private = data;
inode->i_uid = d_inode(dentry->d_parent)->i_uid;
inode->i_gid = d_inode(dentry->d_parent)->i_gid;
- d_instantiate(dentry, inode);
+ d_make_persistent(dentry, inode);
fsnotify_create(d_inode(dentry->d_parent), dentry);
return tracefs_end_creating(dentry);
}
@@ -692,7 +691,7 @@ static struct dentry *__create_dir(const char *name, struct dentry *parent,
/* directory inodes start off with i_nlink == 2 (for "." entry) */
inc_nlink(inode);
- d_instantiate(dentry, inode);
+ d_make_persistent(dentry, inode);
inc_nlink(d_inode(dentry->d_parent));
fsnotify_mkdir(d_inode(dentry->d_parent), dentry);
return tracefs_end_creating(dentry);
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index e6e74b384087..c5ba1f4487bd 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -29,7 +29,7 @@
#include <linux/ioctl.h>
#include <linux/security.h>
#include <linux/hugetlb.h>
-#include <linux/swapops.h>
+#include <linux/leafops.h>
#include <linux/miscdevice.h>
#include <linux/uio.h>
@@ -233,40 +233,48 @@ static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
{
struct vm_area_struct *vma = vmf->vma;
pte_t *ptep, pte;
- bool ret = true;
assert_fault_locked(vmf);
ptep = hugetlb_walk(vma, vmf->address, vma_mmu_pagesize(vma));
if (!ptep)
- goto out;
+ return true;
- ret = false;
pte = huge_ptep_get(vma->vm_mm, vmf->address, ptep);
/*
* Lockless access: we're in a wait_event so it's ok if it
- * changes under us. PTE markers should be handled the same as none
- * ptes here.
+ * changes under us.
+ */
+
+ /* Entry is still missing, wait for userspace to resolve the fault. */
+ if (huge_pte_none(pte))
+ return true;
+ /* UFFD PTE markers require userspace to resolve the fault. */
+ if (pte_is_uffd_marker(pte))
+ return true;
+ /*
+ * If VMA has UFFD WP faults enabled and WP fault, wait for userspace to
+ * resolve the fault.
*/
- if (huge_pte_none_mostly(pte))
- ret = true;
if (!huge_pte_write(pte) && (reason & VM_UFFD_WP))
- ret = true;
-out:
- return ret;
+ return true;
+
+ return false;
}
#else
static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
struct vm_fault *vmf,
unsigned long reason)
{
- return false; /* should never get here */
+ /* Should never get here. */
+ VM_WARN_ON_ONCE(1);
+ return false;
}
#endif /* CONFIG_HUGETLB_PAGE */
/*
- * Verify the pagetables are still not ok after having reigstered into
+ * Verify the pagetables are still not ok after having registered into
* the fault_pending_wqh to avoid userland having to UFFDIO_WAKE any
* userfault that has already been resolved, if userfaultfd_read_iter and
* UFFDIO_COPY|ZEROPAGE are being run simultaneously on two different
@@ -284,53 +292,63 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
pmd_t *pmd, _pmd;
pte_t *pte;
pte_t ptent;
- bool ret = true;
+ bool ret;
assert_fault_locked(vmf);
pgd = pgd_offset(mm, address);
if (!pgd_present(*pgd))
- goto out;
+ return true;
p4d = p4d_offset(pgd, address);
if (!p4d_present(*p4d))
- goto out;
+ return true;
pud = pud_offset(p4d, address);
if (!pud_present(*pud))
- goto out;
+ return true;
pmd = pmd_offset(pud, address);
again:
_pmd = pmdp_get_lockless(pmd);
if (pmd_none(_pmd))
- goto out;
+ return true;
- ret = false;
+ /*
+ * A race could arise which would result in a softleaf entry such as
+ * migration entry unexpectedly being present in the PMD, so explicitly
+ * check for this and bail out if so.
+ */
if (!pmd_present(_pmd))
- goto out;
+ return false;
- if (pmd_trans_huge(_pmd)) {
- if (!pmd_write(_pmd) && (reason & VM_UFFD_WP))
- ret = true;
- goto out;
- }
+ if (pmd_trans_huge(_pmd))
+ return !pmd_write(_pmd) && (reason & VM_UFFD_WP);
pte = pte_offset_map(pmd, address);
- if (!pte) {
- ret = true;
+ if (!pte)
goto again;
- }
+
/*
* Lockless access: we're in a wait_event so it's ok if it
- * changes under us. PTE markers should be handled the same as none
- * ptes here.
+ * changes under us.
*/
ptent = ptep_get(pte);
- if (pte_none_mostly(ptent))
- ret = true;
+
+ ret = true;
+ /* Entry is still missing, wait for userspace to resolve the fault. */
+ if (pte_none(ptent))
+ goto out;
+ /* UFFD PTE markers require userspace to resolve the fault. */
+ if (pte_is_uffd_marker(ptent))
+ goto out;
+ /*
+ * If VMA has UFFD WP faults enabled and WP fault, wait for userspace to
+ * resolve the fault.
+ */
if (!pte_write(ptent) && (reason & VM_UFFD_WP))
- ret = true;
- pte_unmap(pte);
+ goto out;
+ ret = false;
out:
+ pte_unmap(pte);
return ret;
}
@@ -490,12 +508,13 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
set_current_state(blocking_state);
spin_unlock_irq(&ctx->fault_pending_wqh.lock);
- if (!is_vm_hugetlb_page(vma))
- must_wait = userfaultfd_must_wait(ctx, vmf, reason);
- else
+ if (is_vm_hugetlb_page(vma)) {
must_wait = userfaultfd_huge_must_wait(ctx, vmf, reason);
- if (is_vm_hugetlb_page(vma))
hugetlb_vma_unlock_read(vma);
+ } else {
+ must_wait = userfaultfd_must_wait(ctx, vmf, reason);
+ }
+
release_fault_lock(vmf);
if (likely(must_wait && !READ_ONCE(ctx->released))) {
@@ -1270,9 +1289,9 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
if (uffdio_register.mode & UFFDIO_REGISTER_MODE_MISSING)
vm_flags |= VM_UFFD_MISSING;
if (uffdio_register.mode & UFFDIO_REGISTER_MODE_WP) {
-#ifndef CONFIG_HAVE_ARCH_USERFAULTFD_WP
- goto out;
-#endif
+ if (!pgtable_supports_uffd_wp())
+ goto out;
+
vm_flags |= VM_UFFD_WP;
}
if (uffdio_register.mode & UFFDIO_REGISTER_MODE_MINOR) {
@@ -1980,14 +1999,14 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
uffdio_api.features &=
~(UFFD_FEATURE_MINOR_HUGETLBFS | UFFD_FEATURE_MINOR_SHMEM);
#endif
-#ifndef CONFIG_HAVE_ARCH_USERFAULTFD_WP
- uffdio_api.features &= ~UFFD_FEATURE_PAGEFAULT_FLAG_WP;
-#endif
-#ifndef CONFIG_PTE_MARKER_UFFD_WP
- uffdio_api.features &= ~UFFD_FEATURE_WP_HUGETLBFS_SHMEM;
- uffdio_api.features &= ~UFFD_FEATURE_WP_UNPOPULATED;
- uffdio_api.features &= ~UFFD_FEATURE_WP_ASYNC;
-#endif
+ if (!pgtable_supports_uffd_wp())
+ uffdio_api.features &= ~UFFD_FEATURE_PAGEFAULT_FLAG_WP;
+
+ if (!uffd_supports_wp_marker()) {
+ uffdio_api.features &= ~UFFD_FEATURE_WP_HUGETLBFS_SHMEM;
+ uffdio_api.features &= ~UFFD_FEATURE_WP_UNPOPULATED;
+ uffdio_api.features &= ~UFFD_FEATURE_WP_ASYNC;
+ }
ret = -EINVAL;
if (features & ~uffdio_api.features)