summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig.binfmt33
-rw-r--r--fs/Makefile1
-rw-r--r--fs/binfmt_aout.c342
-rw-r--r--fs/btrfs/disk-io.c42
-rw-r--r--fs/btrfs/zoned.c40
-rw-r--r--fs/cifs/cifsfs.h4
-rw-r--r--fs/cifs/connect.c14
-rw-r--r--fs/cifs/file.c3
-rw-r--r--fs/cifs/transport.c6
-rw-r--r--fs/coredump.c39
-rw-r--r--fs/dax.c3
-rw-r--r--fs/exec.c10
-rw-r--r--fs/exfat/fatent.c3
-rw-r--r--fs/ext4/ext4.h10
-rw-r--r--fs/ext4/extents.c4
-rw-r--r--fs/ext4/ialloc.c2
-rw-r--r--fs/ext4/mballoc.c317
-rw-r--r--fs/ext4/mballoc.h1
-rw-r--r--fs/fat/file.c5
-rw-r--r--fs/internal.h3
-rw-r--r--fs/nfs/internal.h25
-rw-r--r--fs/nfs/nfs42proc.c9
-rw-r--r--fs/nfs/super.c27
-rw-r--r--fs/nfs/write.c25
-rw-r--r--fs/nfsd/vfs.c31
-rw-r--r--fs/ntfs/super.c3
-rw-r--r--fs/ntfs3/inode.c2
-rw-r--r--fs/ntfs3/xattr.c102
-rw-r--r--fs/open.c2
-rw-r--r--fs/overlayfs/overlayfs.h2
-rw-r--r--fs/overlayfs/super.c15
-rw-r--r--fs/posix_acl.c288
-rw-r--r--fs/pstore/platform.c63
-rw-r--r--fs/read_write.c22
-rw-r--r--fs/userfaultfd.c4
-rw-r--r--fs/xattr.c15
-rw-r--r--fs/xfs/xfs_notify_failure.c6
37 files changed, 632 insertions, 891 deletions
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 21e154516bf2..f14478643b91 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -142,39 +142,6 @@ config BINFMT_ZFLAT
help
Support FLAT format compressed binaries
-config HAVE_AOUT
- def_bool n
-
-config BINFMT_AOUT
- tristate "Kernel support for a.out and ECOFF binaries"
- depends on HAVE_AOUT
- help
- A.out (Assembler.OUTput) is a set of formats for libraries and
- executables used in the earliest versions of UNIX. Linux used
- the a.out formats QMAGIC and ZMAGIC until they were replaced
- with the ELF format.
-
- The conversion to ELF started in 1995. This option is primarily
- provided for historical interest and for the benefit of those
- who need to run binaries from that era.
-
- Most people should answer N here. If you think you may have
- occasional use for this format, enable module support above
- and answer M here to compile this support as a module called
- binfmt_aout.
-
- If any crucial components of your system (such as /sbin/init
- or /lib/ld.so) are still in a.out format, you will have to
- say Y here.
-
-config OSF4_COMPAT
- bool "OSF/1 v4 readv/writev compatibility"
- depends on ALPHA && BINFMT_AOUT
- help
- Say Y if you are using OSF/1 binaries (like Netscape and Acrobat)
- with v4 shared libraries freely available from Compaq. If you're
- going to use shared libraries from Tru64 version 5.0 or later, say N.
-
config BINFMT_MISC
tristate "Kernel support for MISC binaries"
help
diff --git a/fs/Makefile b/fs/Makefile
index 93b80529f8e8..4dea17840761 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -38,7 +38,6 @@ obj-$(CONFIG_FS_DAX) += dax.o
obj-$(CONFIG_FS_ENCRYPTION) += crypto/
obj-$(CONFIG_FS_VERITY) += verity/
obj-$(CONFIG_FILE_LOCKING) += locks.o
-obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o
obj-$(CONFIG_BINFMT_MISC) += binfmt_misc.o
obj-$(CONFIG_BINFMT_SCRIPT) += binfmt_script.o
obj-$(CONFIG_BINFMT_ELF) += binfmt_elf.o
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
deleted file mode 100644
index 0dcfc691e7e2..000000000000
--- a/fs/binfmt_aout.c
+++ /dev/null
@@ -1,342 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/fs/binfmt_aout.c
- *
- * Copyright (C) 1991, 1992, 1996 Linus Torvalds
- */
-
-#include <linux/module.h>
-
-#include <linux/time.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/mman.h>
-#include <linux/a.out.h>
-#include <linux/errno.h>
-#include <linux/signal.h>
-#include <linux/string.h>
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/stat.h>
-#include <linux/fcntl.h>
-#include <linux/ptrace.h>
-#include <linux/user.h>
-#include <linux/binfmts.h>
-#include <linux/personality.h>
-#include <linux/init.h>
-#include <linux/coredump.h>
-#include <linux/slab.h>
-#include <linux/sched/task_stack.h>
-
-#include <linux/uaccess.h>
-#include <asm/cacheflush.h>
-
-static int load_aout_binary(struct linux_binprm *);
-static int load_aout_library(struct file*);
-
-static struct linux_binfmt aout_format = {
- .module = THIS_MODULE,
- .load_binary = load_aout_binary,
- .load_shlib = load_aout_library,
-};
-
-#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
-
-static int set_brk(unsigned long start, unsigned long end)
-{
- start = PAGE_ALIGN(start);
- end = PAGE_ALIGN(end);
- if (end > start)
- return vm_brk(start, end - start);
- return 0;
-}
-
-/*
- * create_aout_tables() parses the env- and arg-strings in new user
- * memory and creates the pointer tables from them, and puts their
- * addresses on the "stack", returning the new stack pointer value.
- */
-static unsigned long __user *create_aout_tables(char __user *p, struct linux_binprm * bprm)
-{
- char __user * __user *argv;
- char __user * __user *envp;
- unsigned long __user *sp;
- int argc = bprm->argc;
- int envc = bprm->envc;
-
- sp = (void __user *)((-(unsigned long)sizeof(char *)) & (unsigned long) p);
-#ifdef __alpha__
-/* whee.. test-programs are so much fun. */
- put_user(0, --sp);
- put_user(0, --sp);
- if (bprm->loader) {
- put_user(0, --sp);
- put_user(1003, --sp);
- put_user(bprm->loader, --sp);
- put_user(1002, --sp);
- }
- put_user(bprm->exec, --sp);
- put_user(1001, --sp);
-#endif
- sp -= envc+1;
- envp = (char __user * __user *) sp;
- sp -= argc+1;
- argv = (char __user * __user *) sp;
-#ifndef __alpha__
- put_user((unsigned long) envp,--sp);
- put_user((unsigned long) argv,--sp);
-#endif
- put_user(argc,--sp);
- current->mm->arg_start = (unsigned long) p;
- while (argc-->0) {
- char c;
- put_user(p,argv++);
- do {
- get_user(c,p++);
- } while (c);
- }
- put_user(NULL,argv);
- current->mm->arg_end = current->mm->env_start = (unsigned long) p;
- while (envc-->0) {
- char c;
- put_user(p,envp++);
- do {
- get_user(c,p++);
- } while (c);
- }
- put_user(NULL,envp);
- current->mm->env_end = (unsigned long) p;
- return sp;
-}
-
-/*
- * These are the functions used to load a.out style executables and shared
- * libraries. There is no binary dependent code anywhere else.
- */
-
-static int load_aout_binary(struct linux_binprm * bprm)
-{
- struct pt_regs *regs = current_pt_regs();
- struct exec ex;
- unsigned long error;
- unsigned long fd_offset;
- unsigned long rlim;
- int retval;
-
- ex = *((struct exec *) bprm->buf); /* exec-header */
- if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC &&
- N_MAGIC(ex) != QMAGIC && N_MAGIC(ex) != NMAGIC) ||
- N_TRSIZE(ex) || N_DRSIZE(ex) ||
- i_size_read(file_inode(bprm->file)) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
- return -ENOEXEC;
- }
-
- /*
- * Requires a mmap handler. This prevents people from using a.out
- * as part of an exploit attack against /proc-related vulnerabilities.
- */
- if (!bprm->file->f_op->mmap)
- return -ENOEXEC;
-
- fd_offset = N_TXTOFF(ex);
-
- /* Check initial limits. This avoids letting people circumvent
- * size limits imposed on them by creating programs with large
- * arrays in the data or bss.
- */
- rlim = rlimit(RLIMIT_DATA);
- if (rlim >= RLIM_INFINITY)
- rlim = ~0;
- if (ex.a_data + ex.a_bss > rlim)
- return -ENOMEM;
-
- /* Flush all traces of the currently running executable */
- retval = begin_new_exec(bprm);
- if (retval)
- return retval;
-
- /* OK, This is the point of no return */
-#ifdef __alpha__
- SET_AOUT_PERSONALITY(bprm, ex);
-#else
- set_personality(PER_LINUX);
-#endif
- setup_new_exec(bprm);
-
- current->mm->end_code = ex.a_text +
- (current->mm->start_code = N_TXTADDR(ex));
- current->mm->end_data = ex.a_data +
- (current->mm->start_data = N_DATADDR(ex));
- current->mm->brk = ex.a_bss +
- (current->mm->start_brk = N_BSSADDR(ex));
-
- retval = setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT);
- if (retval < 0)
- return retval;
-
-
- if (N_MAGIC(ex) == OMAGIC) {
- unsigned long text_addr, map_size;
- loff_t pos;
-
- text_addr = N_TXTADDR(ex);
-
-#ifdef __alpha__
- pos = fd_offset;
- map_size = ex.a_text+ex.a_data + PAGE_SIZE - 1;
-#else
- pos = 32;
- map_size = ex.a_text+ex.a_data;
-#endif
- error = vm_brk(text_addr & PAGE_MASK, map_size);
- if (error)
- return error;
-
- error = read_code(bprm->file, text_addr, pos,
- ex.a_text+ex.a_data);
- if ((signed long)error < 0)
- return error;
- } else {
- if ((ex.a_text & 0xfff || ex.a_data & 0xfff) &&
- (N_MAGIC(ex) != NMAGIC) && printk_ratelimit())
- {
- printk(KERN_NOTICE "executable not page aligned\n");
- }
-
- if ((fd_offset & ~PAGE_MASK) != 0 && printk_ratelimit())
- {
- printk(KERN_WARNING
- "fd_offset is not page aligned. Please convert program: %pD\n",
- bprm->file);
- }
-
- if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) {
- error = vm_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
- if (error)
- return error;
-
- read_code(bprm->file, N_TXTADDR(ex), fd_offset,
- ex.a_text + ex.a_data);
- goto beyond_if;
- }
-
- error = vm_mmap(bprm->file, N_TXTADDR(ex), ex.a_text,
- PROT_READ | PROT_EXEC, MAP_FIXED | MAP_PRIVATE,
- fd_offset);
-
- if (error != N_TXTADDR(ex))
- return error;
-
- error = vm_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
- PROT_READ | PROT_WRITE | PROT_EXEC,
- MAP_FIXED | MAP_PRIVATE,
- fd_offset + ex.a_text);
- if (error != N_DATADDR(ex))
- return error;
- }
-beyond_if:
- set_binfmt(&aout_format);
-
- retval = set_brk(current->mm->start_brk, current->mm->brk);
- if (retval < 0)
- return retval;
-
- current->mm->start_stack =
- (unsigned long) create_aout_tables((char __user *) bprm->p, bprm);
-#ifdef __alpha__
- regs->gp = ex.a_gpvalue;
-#endif
- finalize_exec(bprm);
- start_thread(regs, ex.a_entry, current->mm->start_stack);
- return 0;
-}
-
-static int load_aout_library(struct file *file)
-{
- struct inode * inode;
- unsigned long bss, start_addr, len;
- unsigned long error;
- int retval;
- struct exec ex;
- loff_t pos = 0;
-
- inode = file_inode(file);
-
- retval = -ENOEXEC;
- error = kernel_read(file, &ex, sizeof(ex), &pos);
- if (error != sizeof(ex))
- goto out;
-
- /* We come in here for the regular a.out style of shared libraries */
- if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != QMAGIC) || N_TRSIZE(ex) ||
- N_DRSIZE(ex) || ((ex.a_entry & 0xfff) && N_MAGIC(ex) == ZMAGIC) ||
- i_size_read(inode) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
- goto out;
- }
-
- /*
- * Requires a mmap handler. This prevents people from using a.out
- * as part of an exploit attack against /proc-related vulnerabilities.
- */
- if (!file->f_op->mmap)
- goto out;
-
- if (N_FLAGS(ex))
- goto out;
-
- /* For QMAGIC, the starting address is 0x20 into the page. We mask
- this off to get the starting address for the page */
-
- start_addr = ex.a_entry & 0xfffff000;
-
- if ((N_TXTOFF(ex) & ~PAGE_MASK) != 0) {
- if (printk_ratelimit())
- {
- printk(KERN_WARNING
- "N_TXTOFF is not page aligned. Please convert library: %pD\n",
- file);
- }
- retval = vm_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
- if (retval)
- goto out;
-
- read_code(file, start_addr, N_TXTOFF(ex),
- ex.a_text + ex.a_data);
- retval = 0;
- goto out;
- }
- /* Now use mmap to map the library into memory. */
- error = vm_mmap(file, start_addr, ex.a_text + ex.a_data,
- PROT_READ | PROT_WRITE | PROT_EXEC,
- MAP_FIXED | MAP_PRIVATE,
- N_TXTOFF(ex));
- retval = error;
- if (error != start_addr)
- goto out;
-
- len = PAGE_ALIGN(ex.a_text + ex.a_data);
- bss = ex.a_text + ex.a_data + ex.a_bss;
- if (bss > len) {
- retval = vm_brk(start_addr + len, bss - len);
- if (retval)
- goto out;
- }
- retval = 0;
-out:
- return retval;
-}
-
-static int __init init_aout_binfmt(void)
-{
- register_binfmt(&aout_format);
- return 0;
-}
-
-static void __exit exit_aout_binfmt(void)
-{
- unregister_binfmt(&aout_format);
-}
-
-core_initcall(init_aout_binfmt);
-module_exit(exit_aout_binfmt);
-MODULE_LICENSE("GPL");
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 1af28b066b42..2633137c3e9f 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -4475,6 +4475,17 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
set_bit(BTRFS_FS_CLOSING_START, &fs_info->flags);
/*
+ * If we had UNFINISHED_DROPS we could still be processing them, so
+ * clear that bit and wake up relocation so it can stop.
+ * We must do this before stopping the block group reclaim task, because
+ * at btrfs_relocate_block_group() we wait for this bit, and after the
+ * wait we stop with -EINTR if btrfs_fs_closing() returns non-zero - we
+ * have just set BTRFS_FS_CLOSING_START, so btrfs_fs_closing() will
+ * return 1.
+ */
+ btrfs_wake_unfinished_drop(fs_info);
+
+ /*
* We may have the reclaim task running and relocating a data block group,
* in which case it may create delayed iputs. So stop it before we park
* the cleaner kthread otherwise we can get new delayed iputs after
@@ -4492,12 +4503,6 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
*/
kthread_park(fs_info->cleaner_kthread);
- /*
- * If we had UNFINISHED_DROPS we could still be processing them, so
- * clear that bit and wake up relocation so it can stop.
- */
- btrfs_wake_unfinished_drop(fs_info);
-
/* wait for the qgroup rescan worker to stop */
btrfs_qgroup_wait_for_completion(fs_info, false);
@@ -4520,6 +4525,31 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
/* clear out the rbtree of defraggable inodes */
btrfs_cleanup_defrag_inodes(fs_info);
+ /*
+ * After we parked the cleaner kthread, ordered extents may have
+ * completed and created new delayed iputs. If one of the async reclaim
+ * tasks is running and in the RUN_DELAYED_IPUTS flush state, then we
+ * can hang forever trying to stop it, because if a delayed iput is
+ * added after it ran btrfs_run_delayed_iputs() and before it called
+ * btrfs_wait_on_delayed_iputs(), it will hang forever since there is
+ * no one else to run iputs.
+ *
+ * So wait for all ongoing ordered extents to complete and then run
+ * delayed iputs. This works because once we reach this point no one
+ * can either create new ordered extents nor create delayed iputs
+ * through some other means.
+ *
+ * Also note that btrfs_wait_ordered_roots() is not safe here, because
+ * it waits for BTRFS_ORDERED_COMPLETE to be set on an ordered extent,
+ * but the delayed iput for the respective inode is made only when doing
+ * the final btrfs_put_ordered_extent() (which must happen at
+ * btrfs_finish_ordered_io() when we are unmounting).
+ */
+ btrfs_flush_workqueue(fs_info->endio_write_workers);
+ /* Ordered extents for free space inodes. */
+ btrfs_flush_workqueue(fs_info->endio_freespace_worker);
+ btrfs_run_delayed_iputs(fs_info);
+
cancel_work_sync(&fs_info->async_reclaim_work);
cancel_work_sync(&fs_info->async_data_reclaim_work);
cancel_work_sync(&fs_info->preempt_reclaim_work);
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 62e7007a7e46..73c6929f7be6 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1918,10 +1918,44 @@ out_unlock:
return ret;
}
+static void wait_eb_writebacks(struct btrfs_block_group *block_group)
+{
+ struct btrfs_fs_info *fs_info = block_group->fs_info;
+ const u64 end = block_group->start + block_group->length;
+ struct radix_tree_iter iter;
+ struct extent_buffer *eb;
+ void __rcu **slot;
+
+ rcu_read_lock();
+ radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter,
+ block_group->start >> fs_info->sectorsize_bits) {
+ eb = radix_tree_deref_slot(slot);
+ if (!eb)
+ continue;
+ if (radix_tree_deref_retry(eb)) {
+ slot = radix_tree_iter_retry(&iter);
+ continue;
+ }
+
+ if (eb->start < block_group->start)
+ continue;
+ if (eb->start >= end)
+ break;
+
+ slot = radix_tree_iter_resume(slot, &iter);
+ rcu_read_unlock();
+ wait_on_extent_buffer_writeback(eb);
+ rcu_read_lock();
+ }
+ rcu_read_unlock();
+}
+
static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_written)
{
struct btrfs_fs_info *fs_info = block_group->fs_info;
struct map_lookup *map;
+ const bool is_metadata = (block_group->flags &
+ (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM));
int ret = 0;
int i;
@@ -1932,8 +1966,7 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
}
/* Check if we have unwritten allocated space */
- if ((block_group->flags &
- (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)) &&
+ if (is_metadata &&
block_group->start + block_group->alloc_offset > block_group->meta_write_pointer) {
spin_unlock(&block_group->lock);
return -EAGAIN;
@@ -1958,6 +1991,9 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
/* No need to wait for NOCOW writers. Zoned mode does not allow that */
btrfs_wait_ordered_roots(fs_info, U64_MAX, block_group->start,
block_group->length);
+ /* Wait for extent buffers to be written. */
+ if (is_metadata)
+ wait_eb_writebacks(block_group);
spin_lock(&block_group->lock);
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 81f4c15936d0..5b4a7a32bdc5 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -153,6 +153,6 @@ extern const struct export_operations cifs_export_ops;
#endif /* CONFIG_CIFS_NFSD_EXPORT */
/* when changing internal version - update following two lines at same time */
-#define SMB3_PRODUCT_BUILD 38
-#define CIFS_VERSION "2.38"
+#define SMB3_PRODUCT_BUILD 39
+#define CIFS_VERSION "2.39"
#endif /* _CIFSFS_H */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index a0a06b6f252b..7ae6f2c08153 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -702,9 +702,6 @@ cifs_readv_from_socket(struct TCP_Server_Info *server, struct msghdr *smb_msg)
int length = 0;
int total_read;
- smb_msg->msg_control = NULL;
- smb_msg->msg_controllen = 0;
-
for (total_read = 0; msg_data_left(smb_msg); total_read += length) {
try_to_freeze();
@@ -760,7 +757,7 @@ int
cifs_read_from_socket(struct TCP_Server_Info *server, char *buf,
unsigned int to_read)
{
- struct msghdr smb_msg;
+ struct msghdr smb_msg = {};
struct kvec iov = {.iov_base = buf, .iov_len = to_read};
iov_iter_kvec(&smb_msg.msg_iter, READ, &iov, 1, to_read);
@@ -770,15 +767,13 @@ cifs_read_from_socket(struct TCP_Server_Info *server, char *buf,
ssize_t
cifs_discard_from_socket(struct TCP_Server_Info *server, size_t to_read)
{
- struct msghdr smb_msg;
+ struct msghdr smb_msg = {};
/*
* iov_iter_discard already sets smb_msg.type and count and iov_offset
* and cifs_readv_from_socket sets msg_control and msg_controllen
* so little to initialize in struct msghdr
*/
- smb_msg.msg_name = NULL;
- smb_msg.msg_namelen = 0;
iov_iter_discard(&smb_msg.msg_iter, READ, to_read);
return cifs_readv_from_socket(server, &smb_msg);
@@ -788,7 +783,7 @@ int
cifs_read_page_from_socket(struct TCP_Server_Info *server, struct page *page,
unsigned int page_offset, unsigned int to_read)
{
- struct msghdr smb_msg;
+ struct msghdr smb_msg = {};
struct bio_vec bv = {
.bv_page = page, .bv_len = to_read, .bv_offset = page_offset};
iov_iter_bvec(&smb_msg.msg_iter, READ, &bv, 1, to_read);
@@ -2350,7 +2345,9 @@ cifs_put_tcon(struct cifs_tcon *tcon)
ses = tcon->ses;
cifs_dbg(FYI, "%s: tc_count=%d\n", __func__, tcon->tc_count);
spin_lock(&cifs_tcp_ses_lock);
+ spin_lock(&tcon->tc_lock);
if (--tcon->tc_count > 0) {
+ spin_unlock(&tcon->tc_lock);
spin_unlock(&cifs_tcp_ses_lock);
return;
}
@@ -2359,6 +2356,7 @@ cifs_put_tcon(struct cifs_tcon *tcon)
WARN_ON(tcon->tc_count < 0);
list_del_init(&tcon->tcon_list);
+ spin_unlock(&tcon->tc_lock);
spin_unlock(&cifs_tcp_ses_lock);
/* cancel polling of interfaces */
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index fa738adc031f..6f38b134a346 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3575,6 +3575,9 @@ static ssize_t __cifs_writev(
ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
{
+ struct file *file = iocb->ki_filp;
+
+ cifs_revalidate_mapping(file->f_inode);
return __cifs_writev(iocb, from, true);
}
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index c2fe035e573b..9a2753e21170 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -194,10 +194,6 @@ smb_send_kvec(struct TCP_Server_Info *server, struct msghdr *smb_msg,
*sent = 0;
- smb_msg->msg_name = (struct sockaddr *) &server->dstaddr;
- smb_msg->msg_namelen = sizeof(struct sockaddr);
- smb_msg->msg_control = NULL;
- smb_msg->msg_controllen = 0;
if (server->noblocksnd)
smb_msg->msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL;
else
@@ -309,7 +305,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
sigset_t mask, oldmask;
size_t total_len = 0, sent, size;
struct socket *ssocket = server->ssocket;
- struct msghdr smb_msg;
+ struct msghdr smb_msg = {};
__be32 rfc1002_marker;
if (cifs_rdma_enabled(server)) {
diff --git a/fs/coredump.c b/fs/coredump.c
index 9f4aae202109..3538f3a63965 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -832,6 +832,39 @@ static int __dump_skip(struct coredump_params *cprm, size_t nr)
}
}
+static int dump_emit_page(struct coredump_params *cprm, struct page *page)
+{
+ struct bio_vec bvec = {
+ .bv_page = page,
+ .bv_offset = 0,
+ .bv_len = PAGE_SIZE,
+ };
+ struct iov_iter iter;
+ struct file *file = cprm->file;
+ loff_t pos;
+ ssize_t n;
+
+ if (cprm->to_skip) {
+ if (!__dump_skip(cprm, cprm->to_skip))
+ return 0;
+ cprm->to_skip = 0;
+ }
+ if (cprm->written + PAGE_SIZE > cprm->limit)
+ return 0;
+ if (dump_interrupted())
+ return 0;
+ pos = file->f_pos;
+ iov_iter_bvec(&iter, WRITE, &bvec, 1, PAGE_SIZE);
+ n = __kernel_write_iter(cprm->file, &iter, &pos);
+ if (n != PAGE_SIZE)
+ return 0;
+ file->f_pos = pos;
+ cprm->written += PAGE_SIZE;
+ cprm->pos += PAGE_SIZE;
+
+ return 1;
+}
+
int dump_emit(struct coredump_params *cprm, const void *addr, int nr)
{
if (cprm->to_skip) {
@@ -863,7 +896,6 @@ int dump_user_range(struct coredump_params *cprm, unsigned long start,
for (addr = start; addr < start + len; addr += PAGE_SIZE) {
struct page *page;
- int stop;
/*
* To avoid having to allocate page tables for virtual address
@@ -874,10 +906,7 @@ int dump_user_range(struct coredump_params *cprm, unsigned long start,
*/
page = get_dump_page(addr);
if (page) {
- void *kaddr = kmap_local_page(page);
-
- stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
- kunmap_local(kaddr);
+ int stop = !dump_emit_page(cprm, page);
put_page(page);
if (stop)
return 0;
diff --git a/fs/dax.c b/fs/dax.c
index c440dcef4b1b..1c6867810cbd 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1445,6 +1445,9 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
loff_t done = 0;
int ret;
+ if (!iomi.len)
+ return 0;
+
if (iov_iter_rw(iter) == WRITE) {
lockdep_assert_held_write(&iomi.inode->i_rwsem);
iomi.flags |= IOMAP_WRITE;
diff --git a/fs/exec.c b/fs/exec.c
index 9a5ca7b82bfc..69a572fc57db 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -65,7 +65,6 @@
#include <linux/io_uring.h>
#include <linux/syscall_user_dispatch.h>
#include <linux/coredump.h>
-#include <linux/time_namespace.h>
#include <linux/uaccess.h>
#include <asm/mmu_context.h>
@@ -958,8 +957,7 @@ struct file *open_exec(const char *name)
}
EXPORT_SYMBOL(open_exec);
-#if defined(CONFIG_HAVE_AOUT) || defined(CONFIG_BINFMT_FLAT) || \
- defined(CONFIG_BINFMT_ELF_FDPIC)
+#if defined(CONFIG_BINFMT_FLAT) || defined(CONFIG_BINFMT_ELF_FDPIC)
ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len)
{
ssize_t res = vfs_read(file, (void __user *)addr, len, &pos);
@@ -979,12 +977,10 @@ static int exec_mmap(struct mm_struct *mm)
{
struct task_struct *tsk;
struct mm_struct *old_mm, *active_mm;
- bool vfork;
int ret;
/* Notify parent that we're no longer interested in the old VM */
tsk = current;
- vfork = !!tsk->vfork_done;
old_mm = current->mm;
exec_mm_release(tsk, old_mm);
if (old_mm)
@@ -1029,10 +1025,6 @@ static int exec_mmap(struct mm_struct *mm)
tsk->mm->vmacache_seqnum = 0;
vmacache_flush(tsk);
task_unlock(tsk);
-
- if (vfork)
- timens_on_fork(tsk->nsproxy, tsk);
-
if (old_mm) {
mmap_read_unlock(old_mm);
BUG_ON(active_mm != old_mm);
diff --git a/fs/exfat/fatent.c b/fs/exfat/fatent.c
index ee0b7cf51157..41ae4cce1f42 100644
--- a/fs/exfat/fatent.c
+++ b/fs/exfat/fatent.c
@@ -270,8 +270,7 @@ int exfat_zeroed_cluster(struct inode *dir, unsigned int clu)
struct super_block *sb = dir->i_sb;
struct exfat_sb_info *sbi = EXFAT_SB(sb);
struct buffer_head *bh;
- sector_t blknr, last_blknr;
- int i;
+ sector_t blknr, last_blknr, i;
blknr = exfat_cluster_to_sector(sbi, clu);
last_blknr = blknr + sbi->sect_per_clus;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 9bca5565547b..3bf9a6926798 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -167,8 +167,6 @@ enum SHIFT_DIRECTION {
#define EXT4_MB_CR0_OPTIMIZED 0x8000
/* Avg fragment size rb tree lookup succeeded at least once for cr = 1 */
#define EXT4_MB_CR1_OPTIMIZED 0x00010000
-/* Perform linear traversal for one group */
-#define EXT4_MB_SEARCH_NEXT_LINEAR 0x00020000
struct ext4_allocation_request {
/* target inode for block we're allocating */
struct inode *inode;
@@ -1600,8 +1598,8 @@ struct ext4_sb_info {
struct list_head s_discard_list;
struct work_struct s_discard_work;
atomic_t s_retry_alloc_pending;
- struct rb_root s_mb_avg_fragment_size_root;
- rwlock_t s_mb_rb_lock;
+ struct list_head *s_mb_avg_fragment_size;
+ rwlock_t *s_mb_avg_fragment_size_locks;
struct list_head *s_mb_largest_free_orders;
rwlock_t *s_mb_largest_free_orders_locks;
@@ -3413,6 +3411,8 @@ struct ext4_group_info {
ext4_grpblk_t bb_first_free; /* first free block */
ext4_grpblk_t bb_free; /* total free blocks */
ext4_grpblk_t bb_fragments; /* nr of freespace fragments */
+ int bb_avg_fragment_size_order; /* order of average
+ fragment in BG */
ext4_grpblk_t bb_largest_free_order;/* order of largest frag in BG */
ext4_group_t bb_group; /* Group number */
struct list_head bb_prealloc_list;
@@ -3420,7 +3420,7 @@ struct ext4_group_info {
void *bb_bitmap;
#endif
struct rw_semaphore alloc_sem;
- struct rb_node bb_avg_fragment_size_rb;
+ struct list_head bb_avg_fragment_size_node;
struct list_head bb_largest_free_order_node;
ext4_grpblk_t bb_counters[]; /* Nr of free power-of-two-block
* regions, index is order.
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index c148bb97b527..5235974126bd 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -460,6 +460,10 @@ static int __ext4_ext_check(const char *function, unsigned int line,
error_msg = "invalid eh_entries";
goto corrupted;
}
+ if (unlikely((eh->eh_entries == 0) && (depth > 0))) {
+ error_msg = "eh_entries is 0 but eh_depth is > 0";
+ goto corrupted;
+ }
if (!ext4_valid_extent_entries(inode, eh, lblk, &pblk, depth)) {
error_msg = "invalid extent entries";
goto corrupted;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index f73e5eb43eae..208b87ce8858 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -510,7 +510,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
goto fallback;
}
- max_dirs = ndirs / ngroups + inodes_per_group / 16;
+ max_dirs = ndirs / ngroups + inodes_per_group*flex_size / 16;
min_inodes = avefreei - inodes_per_group*flex_size / 4;
if (min_inodes < 1)
min_inodes = 1;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index bd8f8b5c3d30..9dad93059945 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -140,13 +140,15 @@
* number of buddy bitmap orders possible) number of lists. Group-infos are
* placed in appropriate lists.
*
- * 2) Average fragment size rb tree (sbi->s_mb_avg_fragment_size_root)
+ * 2) Average fragment size lists (sbi->s_mb_avg_fragment_size)
*
- * Locking: sbi->s_mb_rb_lock (rwlock)
+ * Locking: sbi->s_mb_avg_fragment_size_locks(array of rw locks)
*
- * This is a red black tree consisting of group infos and the tree is sorted
- * by average fragment sizes (which is calculated as ext4_group_info->bb_free
- * / ext4_group_info->bb_fragments).
+ * This is an array of lists where in the i-th list there are groups with
+ * average fragment size >= 2^i and < 2^(i+1). The average fragment size
+ * is computed as ext4_group_info->bb_free / ext4_group_info->bb_fragments.
+ * Note that we don't bother with a special list for completely empty groups
+ * so we only have MB_NUM_ORDERS(sb) lists.
*
* When "mb_optimize_scan" mount option is set, mballoc consults the above data
* structures to decide the order in which groups are to be traversed for
@@ -160,7 +162,8 @@
*
* At CR = 1, we only consider groups where average fragment size > request
* size. So, we lookup a group which has average fragment size just above or
- * equal to request size using our rb tree (data structure 2) in O(log N) time.
+ * equal to request size using our average fragment size group lists (data
+ * structure 2) in O(1) time.
*
* If "mb_optimize_scan" mount option is not set, mballoc traverses groups in
* linear order which requires O(N) search time for each CR 0 and CR 1 phase.
@@ -802,65 +805,51 @@ static void ext4_mb_mark_free_simple(struct super_block *sb,
}
}
-static void ext4_mb_rb_insert(struct rb_root *root, struct rb_node *new,
- int (*cmp)(struct rb_node *, struct rb_node *))
+static int mb_avg_fragment_size_order(struct super_block *sb, ext4_grpblk_t len)
{
- struct rb_node **iter = &root->rb_node, *parent = NULL;
+ int order;
- while (*iter) {
- parent = *iter;
- if (cmp(new, *iter) > 0)
- iter = &((*iter)->rb_left);
- else
- iter = &((*iter)->rb_right);
- }
-
- rb_link_node(new, parent, iter);
- rb_insert_color(new, root);
-}
-
-static int
-ext4_mb_avg_fragment_size_cmp(struct rb_node *rb1, struct rb_node *rb2)
-{
- struct ext4_group_info *grp1 = rb_entry(rb1,
- struct ext4_group_info,
- bb_avg_fragment_size_rb);
- struct ext4_group_info *grp2 = rb_entry(rb2,
- struct ext4_group_info,
- bb_avg_fragment_size_rb);
- int num_frags_1, num_frags_2;
-
- num_frags_1 = grp1->bb_fragments ?
- grp1->bb_free / grp1->bb_fragments : 0;
- num_frags_2 = grp2->bb_fragments ?
- grp2->bb_free / grp2->bb_fragments : 0;
-
- return (num_frags_2 - num_frags_1);
+ /*
+ * We don't bother with a special lists groups with only 1 block free
+ * extents and for completely empty groups.
+ */
+ order = fls(len) - 2;
+ if (order < 0)
+ return 0;
+ if (order == MB_NUM_ORDERS(sb))
+ order--;
+ return order;
}
-/*
- * Reinsert grpinfo into the avg_fragment_size tree with new average
- * fragment size.
- */
+/* Move group to appropriate avg_fragment_size list */
static void
mb_update_avg_fragment_size(struct super_block *sb, struct ext4_group_info *grp)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
+ int new_order;
if (!test_opt2(sb, MB_OPTIMIZE_SCAN) || grp->bb_free == 0)
return;
- write_lock(&sbi->s_mb_rb_lock);
- if (!RB_EMPTY_NODE(&grp->bb_avg_fragment_size_rb)) {
- rb_erase(&grp->bb_avg_fragment_size_rb,
- &sbi->s_mb_avg_fragment_size_root);
- RB_CLEAR_NODE(&grp->bb_avg_fragment_size_rb);
- }
+ new_order = mb_avg_fragment_size_order(sb,
+ grp->bb_free / grp->bb_fragments);
+ if (new_order == grp->bb_avg_fragment_size_order)
+ return;
- ext4_mb_rb_insert(&sbi->s_mb_avg_fragment_size_root,
- &grp->bb_avg_fragment_size_rb,
- ext4_mb_avg_fragment_size_cmp);
- write_unlock(&sbi->s_mb_rb_lock);
+ if (grp->bb_avg_fragment_size_order != -1) {
+ write_lock(&sbi->s_mb_avg_fragment_size_locks[
+ grp->bb_avg_fragment_size_order]);
+ list_del(&grp->bb_avg_fragment_size_node);
+ write_unlock(&sbi->s_mb_avg_fragment_size_locks[
+ grp->bb_avg_fragment_size_order]);
+ }
+ grp->bb_avg_fragment_size_order = new_order;
+ write_lock(&sbi->s_mb_avg_fragment_size_locks[
+ grp->bb_avg_fragment_size_order]);
+ list_add_tail(&grp->bb_avg_fragment_size_node,
+ &sbi->s_mb_avg_fragment_size[grp->bb_avg_fragment_size_order]);
+ write_unlock(&sbi->s_mb_avg_fragment_size_locks[
+ grp->bb_avg_fragment_size_order]);
}
/*
@@ -909,86 +898,55 @@ static void ext4_mb_choose_next_group_cr0(struct ext4_allocation_context *ac,
*new_cr = 1;
} else {
*group = grp->bb_group;
- ac->ac_last_optimal_group = *group;
ac->ac_flags |= EXT4_MB_CR0_OPTIMIZED;
}
}
/*
- * Choose next group by traversing average fragment size tree. Updates *new_cr
- * if cr lvel needs an update. Sets EXT4_MB_SEARCH_NEXT_LINEAR to indicate that
- * the linear search should continue for one iteration since there's lock
- * contention on the rb tree lock.
+ * Choose next group by traversing average fragment size list of suitable
+ * order. Updates *new_cr if cr level needs an update.
*/
static void ext4_mb_choose_next_group_cr1(struct ext4_allocation_context *ac,
int *new_cr, ext4_group_t *group, ext4_group_t ngroups)
{
struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
- int avg_fragment_size, best_so_far;
- struct rb_node *node, *found;
- struct ext4_group_info *grp;
-
- /*
- * If there is contention on the lock, instead of waiting for the lock
- * to become available, just continue searching lineraly. We'll resume
- * our rb tree search later starting at ac->ac_last_optimal_group.
- */
- if (!read_trylock(&sbi->s_mb_rb_lock)) {
- ac->ac_flags |= EXT4_MB_SEARCH_NEXT_LINEAR;
- return;
- }
+ struct ext4_group_info *grp = NULL, *iter;
+ int i;
if (unlikely(ac->ac_flags & EXT4_MB_CR1_OPTIMIZED)) {
if (sbi->s_mb_stats)
atomic_inc(&sbi->s_bal_cr1_bad_suggestions);
- /* We have found something at CR 1 in the past */
- grp = ext4_get_group_info(ac->ac_sb, ac->ac_last_optimal_group);
- for (found = rb_next(&grp->bb_avg_fragment_size_rb); found != NULL;
- found = rb_next(found)) {
- grp = rb_entry(found, struct ext4_group_info,
- bb_avg_fragment_size_rb);
+ }
+
+ for (i = mb_avg_fragment_size_order(ac->ac_sb, ac->ac_g_ex.fe_len);
+ i < MB_NUM_ORDERS(ac->ac_sb); i++) {
+ if (list_empty(&sbi->s_mb_avg_fragment_size[i]))
+ continue;
+ read_lock(&sbi->s_mb_avg_fragment_size_locks[i]);
+ if (list_empty(&sbi->s_mb_avg_fragment_size[i])) {
+ read_unlock(&sbi->s_mb_avg_fragment_size_locks[i]);
+ continue;
+ }
+ list_for_each_entry(iter, &sbi->s_mb_avg_fragment_size[i],
+ bb_avg_fragment_size_node) {
if (sbi->s_mb_stats)
atomic64_inc(&sbi->s_bal_cX_groups_considered[1]);
- if (likely(ext4_mb_good_group(ac, grp->bb_group, 1)))
+ if (likely(ext4_mb_good_group(ac, iter->bb_group, 1))) {
+ grp = iter;
break;
- }
- goto done;
- }
-
- node = sbi->s_mb_avg_fragment_size_root.rb_node;
- best_so_far = 0;
- found = NULL;
-
- while (node) {
- grp = rb_entry(node, struct ext4_group_info,
- bb_avg_fragment_size_rb);
- avg_fragment_size = 0;
- if (ext4_mb_good_group(ac, grp->bb_group, 1)) {
- avg_fragment_size = grp->bb_fragments ?
- grp->bb_free / grp->bb_fragments : 0;
- if (!best_so_far || avg_fragment_size < best_so_far) {
- best_so_far = avg_fragment_size;
- found = node;
}
}
- if (avg_fragment_size > ac->ac_g_ex.fe_len)
- node = node->rb_right;
- else
- node = node->rb_left;
+ read_unlock(&sbi->s_mb_avg_fragment_size_locks[i]);
+ if (grp)
+ break;
}
-done:
- if (found) {
- grp = rb_entry(found, struct ext4_group_info,
- bb_avg_fragment_size_rb);
+ if (grp) {
*group = grp->bb_group;
ac->ac_flags |= EXT4_MB_CR1_OPTIMIZED;
} else {
*new_cr = 2;
}
-
- read_unlock(&sbi->s_mb_rb_lock);
- ac->ac_last_optimal_group = *group;
}
static inline int should_optimize_scan(struct ext4_allocation_context *ac)
@@ -1017,11 +975,6 @@ next_linear_group(struct ext4_allocation_context *ac, int group, int ngroups)
goto inc_and_return;
}
- if (ac->ac_flags & EXT4_MB_SEARCH_NEXT_LINEAR) {
- ac->ac_flags &= ~EXT4_MB_SEARCH_NEXT_LINEAR;
- goto inc_and_return;
- }
-
return group;
inc_and_return:
/*
@@ -1049,8 +1002,10 @@ static void ext4_mb_choose_next_group(struct ext4_allocation_context *ac,
{
*new_cr = ac->ac_criteria;
- if (!should_optimize_scan(ac) || ac->ac_groups_linear_remaining)
+ if (!should_optimize_scan(ac) || ac->ac_groups_linear_remaining) {
+ *group = next_linear_group(ac, *group, ngroups);
return;
+ }
if (*new_cr == 0) {
ext4_mb_choose_next_group_cr0(ac, new_cr, group, ngroups);
@@ -1075,23 +1030,25 @@ mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp)
struct ext4_sb_info *sbi = EXT4_SB(sb);
int i;
- if (test_opt2(sb, MB_OPTIMIZE_SCAN) && grp->bb_largest_free_order >= 0) {
+ for (i = MB_NUM_ORDERS(sb) - 1; i >= 0; i--)
+ if (grp->bb_counters[i] > 0)
+ break;
+ /* No need to move between order lists? */
+ if (!test_opt2(sb, MB_OPTIMIZE_SCAN) ||
+ i == grp->bb_largest_free_order) {
+ grp->bb_largest_free_order = i;
+ return;
+ }
+
+ if (grp->bb_largest_free_order >= 0) {
write_lock(&sbi->s_mb_largest_free_orders_locks[
grp->bb_largest_free_order]);
list_del_init(&grp->bb_largest_free_order_node);
write_unlock(&sbi->s_mb_largest_free_orders_locks[
grp->bb_largest_free_order]);
}
- grp->bb_largest_free_order = -1; /* uninit */
-
- for (i = MB_NUM_ORDERS(sb) - 1; i >= 0; i--) {
- if (grp->bb_counters[i] > 0) {
- grp->bb_largest_free_order = i;
- break;
- }
- }
- if (test_opt2(sb, MB_OPTIMIZE_SCAN) &&
- grp->bb_largest_free_order >= 0 && grp->bb_free) {
+ grp->bb_largest_free_order = i;
+ if (grp->bb_largest_free_order >= 0 && grp->bb_free) {
write_lock(&sbi->s_mb_largest_free_orders_locks[
grp->bb_largest_free_order]);
list_add_tail(&grp->bb_largest_free_order_node,
@@ -1148,13 +1105,13 @@ void ext4_mb_generate_buddy(struct super_block *sb,
EXT4_GROUP_INFO_BBITMAP_CORRUPT);
}
mb_set_largest_free_order(sb, grp);
+ mb_update_avg_fragment_size(sb, grp);
clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
period = get_cycles() - period;
atomic_inc(&sbi->s_mb_buddies_generated);
atomic64_add(period, &sbi->s_mb_generation_time);
- mb_update_avg_fragment_size(sb, grp);
}
/* The buddy information is attached the buddy cache inode
@@ -2636,7 +2593,7 @@ static noinline_for_stack int
ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
{
ext4_group_t prefetch_grp = 0, ngroups, group, i;
- int cr = -1;
+ int cr = -1, new_cr;
int err = 0, first_err = 0;
unsigned int nr = 0, prefetch_ios = 0;
struct ext4_sb_info *sbi;
@@ -2707,17 +2664,14 @@ repeat:
* from the goal value specified
*/
group = ac->ac_g_ex.fe_group;
- ac->ac_last_optimal_group = group;
ac->ac_groups_linear_remaining = sbi->s_mb_max_linear_groups;
prefetch_grp = group;
- for (i = 0; i < ngroups; group = next_linear_group(ac, group, ngroups),
- i++) {
- int ret = 0, new_cr;
+ for (i = 0, new_cr = cr; i < ngroups; i++,
+ ext4_mb_choose_next_group(ac, &new_cr, &group, ngroups)) {
+ int ret = 0;
cond_resched();
-
- ext4_mb_choose_next_group(ac, &new_cr, &group, ngroups);
if (new_cr != cr) {
cr = new_cr;
goto repeat;
@@ -2991,9 +2945,7 @@ __acquires(&EXT4_SB(sb)->s_mb_rb_lock)
struct super_block *sb = pde_data(file_inode(seq->file));
unsigned long position;
- read_lock(&EXT4_SB(sb)->s_mb_rb_lock);
-
- if (*pos < 0 || *pos >= MB_NUM_ORDERS(sb) + 1)
+ if (*pos < 0 || *pos >= 2*MB_NUM_ORDERS(sb))
return NULL;
position = *pos + 1;
return (void *) ((unsigned long) position);
@@ -3005,7 +2957,7 @@ static void *ext4_mb_seq_structs_summary_next(struct seq_file *seq, void *v, lof
unsigned long position;
++*pos;
- if (*pos < 0 || *pos >= MB_NUM_ORDERS(sb) + 1)
+ if (*pos < 0 || *pos >= 2*MB_NUM_ORDERS(sb))
return NULL;
position = *pos + 1;
return (void *) ((unsigned long) position);
@@ -3017,29 +2969,22 @@ static int ext4_mb_seq_structs_summary_show(struct seq_file *seq, void *v)
struct ext4_sb_info *sbi = EXT4_SB(sb);
unsigned long position = ((unsigned long) v);
struct ext4_group_info *grp;
- struct rb_node *n;
- unsigned int count, min, max;
+ unsigned int count;
position--;
if (position >= MB_NUM_ORDERS(sb)) {
- seq_puts(seq, "fragment_size_tree:\n");
- n = rb_first(&sbi->s_mb_avg_fragment_size_root);
- if (!n) {
- seq_puts(seq, "\ttree_min: 0\n\ttree_max: 0\n\ttree_nodes: 0\n");
- return 0;
- }
- grp = rb_entry(n, struct ext4_group_info, bb_avg_fragment_size_rb);
- min = grp->bb_fragments ? grp->bb_free / grp->bb_fragments : 0;
- count = 1;
- while (rb_next(n)) {
- count++;
- n = rb_next(n);
- }
- grp = rb_entry(n, struct ext4_group_info, bb_avg_fragment_size_rb);
- max = grp->bb_fragments ? grp->bb_free / grp->bb_fragments : 0;
+ position -= MB_NUM_ORDERS(sb);
+ if (position == 0)
+ seq_puts(seq, "avg_fragment_size_lists:\n");
- seq_printf(seq, "\ttree_min: %u\n\ttree_max: %u\n\ttree_nodes: %u\n",
- min, max, count);
+ count = 0;
+ read_lock(&sbi->s_mb_avg_fragment_size_locks[position]);
+ list_for_each_entry(grp, &sbi->s_mb_avg_fragment_size[position],
+ bb_avg_fragment_size_node)
+ count++;
+ read_unlock(&sbi->s_mb_avg_fragment_size_locks[position]);
+ seq_printf(seq, "\tlist_order_%u_groups: %u\n",
+ (unsigned int)position, count);
return 0;
}
@@ -3049,9 +2994,11 @@ static int ext4_mb_seq_structs_summary_show(struct seq_file *seq, void *v)
seq_puts(seq, "max_free_order_lists:\n");
}
count = 0;
+ read_lock(&sbi->s_mb_largest_free_orders_locks[position]);
list_for_each_entry(grp, &sbi->s_mb_largest_free_orders[position],
bb_largest_free_order_node)
count++;
+ read_unlock(&sbi->s_mb_largest_free_orders_locks[position]);
seq_printf(seq, "\tlist_order_%u_groups: %u\n",
(unsigned int)position, count);
@@ -3059,11 +3006,7 @@ static int ext4_mb_seq_structs_summary_show(struct seq_file *seq, void *v)
}
static void ext4_mb_seq_structs_summary_stop(struct seq_file *seq, void *v)
-__releases(&EXT4_SB(sb)->s_mb_rb_lock)
{
- struct super_block *sb = pde_data(file_inode(seq->file));
-
- read_unlock(&EXT4_SB(sb)->s_mb_rb_lock);
}
const struct seq_operations ext4_mb_seq_structs_summary_ops = {
@@ -3176,8 +3119,9 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
init_rwsem(&meta_group_info[i]->alloc_sem);
meta_group_info[i]->bb_free_root = RB_ROOT;
INIT_LIST_HEAD(&meta_group_info[i]->bb_largest_free_order_node);
- RB_CLEAR_NODE(&meta_group_info[i]->bb_avg_fragment_size_rb);
+ INIT_LIST_HEAD(&meta_group_info[i]->bb_avg_fragment_size_node);
meta_group_info[i]->bb_largest_free_order = -1; /* uninit */
+ meta_group_info[i]->bb_avg_fragment_size_order = -1; /* uninit */
meta_group_info[i]->bb_group = group;
mb_group_bb_bitmap_alloc(sb, meta_group_info[i], group);
@@ -3426,7 +3370,24 @@ int ext4_mb_init(struct super_block *sb)
i++;
} while (i < MB_NUM_ORDERS(sb));
- sbi->s_mb_avg_fragment_size_root = RB_ROOT;
+ sbi->s_mb_avg_fragment_size =
+ kmalloc_array(MB_NUM_ORDERS(sb), sizeof(struct list_head),
+ GFP_KERNEL);
+ if (!sbi->s_mb_avg_fragment_size) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ sbi->s_mb_avg_fragment_size_locks =
+ kmalloc_array(MB_NUM_ORDERS(sb), sizeof(rwlock_t),
+ GFP_KERNEL);
+ if (!sbi->s_mb_avg_fragment_size_locks) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ for (i = 0; i < MB_NUM_ORDERS(sb); i++) {
+ INIT_LIST_HEAD(&sbi->s_mb_avg_fragment_size[i]);
+ rwlock_init(&sbi->s_mb_avg_fragment_size_locks[i]);
+ }
sbi->s_mb_largest_free_orders =
kmalloc_array(MB_NUM_ORDERS(sb), sizeof(struct list_head),
GFP_KERNEL);
@@ -3445,7 +3406,6 @@ int ext4_mb_init(struct super_block *sb)
INIT_LIST_HEAD(&sbi->s_mb_largest_free_orders[i]);
rwlock_init(&sbi->s_mb_largest_free_orders_locks[i]);
}
- rwlock_init(&sbi->s_mb_rb_lock);
spin_lock_init(&sbi->s_md_lock);
sbi->s_mb_free_pending = 0;
@@ -3516,6 +3476,8 @@ out_free_locality_groups:
free_percpu(sbi->s_locality_groups);
sbi->s_locality_groups = NULL;
out:
+ kfree(sbi->s_mb_avg_fragment_size);
+ kfree(sbi->s_mb_avg_fragment_size_locks);
kfree(sbi->s_mb_largest_free_orders);
kfree(sbi->s_mb_largest_free_orders_locks);
kfree(sbi->s_mb_offsets);
@@ -3582,6 +3544,8 @@ int ext4_mb_release(struct super_block *sb)
kvfree(group_info);
rcu_read_unlock();
}
+ kfree(sbi->s_mb_avg_fragment_size);
+ kfree(sbi->s_mb_avg_fragment_size_locks);
kfree(sbi->s_mb_largest_free_orders);
kfree(sbi->s_mb_largest_free_orders_locks);
kfree(sbi->s_mb_offsets);
@@ -5193,6 +5157,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
int bsbits = ac->ac_sb->s_blocksize_bits;
loff_t size, isize;
+ bool inode_pa_eligible, group_pa_eligible;
if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
return;
@@ -5200,25 +5165,27 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
return;
+ group_pa_eligible = sbi->s_mb_group_prealloc > 0;
+ inode_pa_eligible = true;
size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1)
>> bsbits;
+ /* No point in using inode preallocation for closed files */
if ((size == isize) && !ext4_fs_is_busy(sbi) &&
- !inode_is_open_for_write(ac->ac_inode)) {
- ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC;
- return;
- }
+ !inode_is_open_for_write(ac->ac_inode))
+ inode_pa_eligible = false;
- if (sbi->s_mb_group_prealloc <= 0) {
- ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
- return;
- }
-
- /* don't use group allocation for large files */
size = max(size, isize);
- if (size > sbi->s_mb_stream_request) {
- ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
+ /* Don't use group allocation for large files */
+ if (size > sbi->s_mb_stream_request)
+ group_pa_eligible = false;
+
+ if (!group_pa_eligible) {
+ if (inode_pa_eligible)
+ ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
+ else
+ ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC;
return;
}
@@ -5565,6 +5532,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
ext4_fsblk_t block = 0;
unsigned int inquota = 0;
unsigned int reserv_clstrs = 0;
+ int retries = 0;
u64 seq;
might_sleep();
@@ -5667,7 +5635,8 @@ repeat:
ar->len = ac->ac_b_ex.fe_len;
}
} else {
- if (ext4_mb_discard_preallocations_should_retry(sb, ac, &seq))
+ if (++retries < 3 &&
+ ext4_mb_discard_preallocations_should_retry(sb, ac, &seq))
goto repeat;
/*
* If block allocation fails then the pa allocated above
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 39da92ceabf8..dcda2a943cee 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -178,7 +178,6 @@ struct ext4_allocation_context {
/* copy of the best found extent taken before preallocation efforts */
struct ext4_free_extent ac_f_ex;
- ext4_group_t ac_last_optimal_group;
__u32 ac_groups_considered;
__u32 ac_flags; /* allocation hints */
__u16 ac_groups_scanned;
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 3e4eb3467cb4..8a6b493b5b5f 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -461,8 +461,9 @@ static int fat_allow_set_time(struct user_namespace *mnt_userns,
{
umode_t allow_utime = sbi->options.allow_utime;
- if (!uid_eq(current_fsuid(), i_uid_into_mnt(mnt_userns, inode))) {
- if (in_group_p(i_gid_into_mnt(mnt_userns, inode)))
+ if (!vfsuid_eq_kuid(i_uid_into_vfsuid(mnt_userns, inode),
+ current_fsuid())) {
+ if (vfsgid_in_group_p(i_gid_into_vfsgid(mnt_userns, inode)))
allow_utime >>= 3;
if (allow_utime & MAY_WRITE)
return 1;
diff --git a/fs/internal.h b/fs/internal.h
index 87e96b9024ce..3e206d3e317c 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -16,6 +16,7 @@ struct shrink_control;
struct fs_context;
struct user_namespace;
struct pipe_inode_info;
+struct iov_iter;
/*
* block/bdev.c
@@ -221,3 +222,5 @@ ssize_t do_getxattr(struct user_namespace *mnt_userns,
int setxattr_copy(const char __user *name, struct xattr_ctx *ctx);
int do_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
struct xattr_ctx *ctx);
+
+ssize_t __kernel_write_iter(struct file *file, struct iov_iter *from, loff_t *pos);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 27c720d71b4e..898dd95bc7a7 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -606,6 +606,31 @@ static inline gfp_t nfs_io_gfp_mask(void)
return GFP_KERNEL;
}
+/*
+ * Special version of should_remove_suid() that ignores capabilities.
+ */
+static inline int nfs_should_remove_suid(const struct inode *inode)
+{
+ umode_t mode = inode->i_mode;
+ int kill = 0;
+
+ /* suid always must be killed */
+ if (unlikely(mode & S_ISUID))
+ kill = ATTR_KILL_SUID;
+
+ /*
+ * sgid without any exec bits is just a mandatory locking mark; leave
+ * it alone. If some exec bits are set, it's a real sgid; kill it.
+ */
+ if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
+ kill |= ATTR_KILL_SGID;
+
+ if (unlikely(kill && S_ISREG(mode)))
+ return kill;
+
+ return 0;
+}
+
/* unlink.c */
extern struct rpc_task *
nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 068c45b3bc1a..6dab9e408372 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -78,10 +78,15 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
status = nfs4_call_sync(server->client, server, msg,
&args.seq_args, &res.seq_res, 0);
- if (status == 0)
+ if (status == 0) {
+ if (nfs_should_remove_suid(inode)) {
+ spin_lock(&inode->i_lock);
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_MODE);
+ spin_unlock(&inode->i_lock);
+ }
status = nfs_post_op_update_inode_force_wcc(inode,
res.falloc_fattr);
-
+ }
if (msg->rpc_proc == &nfs4_procedures[NFSPROC4_CLNT_ALLOCATE])
trace_nfs4_fallocate(inode, &args, status);
else
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 82944e14fcea..ee66ffdb985e 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1051,22 +1051,31 @@ static void nfs_fill_super(struct super_block *sb, struct nfs_fs_context *ctx)
if (ctx->bsize)
sb->s_blocksize = nfs_block_size(ctx->bsize, &sb->s_blocksize_bits);
- if (server->nfs_client->rpc_ops->version != 2) {
- /* The VFS shouldn't apply the umask to mode bits. We will do
- * so ourselves when necessary.
+ switch (server->nfs_client->rpc_ops->version) {
+ case 2:
+ sb->s_time_gran = 1000;
+ sb->s_time_min = 0;
+ sb->s_time_max = U32_MAX;
+ break;
+ case 3:
+ /*
+ * The VFS shouldn't apply the umask to mode bits.
+ * We will do so ourselves when necessary.
*/
sb->s_flags |= SB_POSIXACL;
sb->s_time_gran = 1;
- sb->s_export_op = &nfs_export_ops;
- } else
- sb->s_time_gran = 1000;
-
- if (server->nfs_client->rpc_ops->version != 4) {
sb->s_time_min = 0;
sb->s_time_max = U32_MAX;
- } else {
+ sb->s_export_op = &nfs_export_ops;
+ break;
+ case 4:
+ sb->s_flags |= SB_POSIXACL;
+ sb->s_time_gran = 1;
sb->s_time_min = S64_MIN;
sb->s_time_max = S64_MAX;
+ if (server->caps & NFS_CAP_ATOMIC_OPEN_V1)
+ sb->s_export_op = &nfs_export_ops;
+ break;
}
sb->s_magic = NFS_SUPER_MAGIC;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 1843fa235d9b..f41d24b54fd1 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1496,31 +1496,6 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata)
NFS_PROTO(data->inode)->commit_rpc_prepare(task, data);
}
-/*
- * Special version of should_remove_suid() that ignores capabilities.
- */
-static int nfs_should_remove_suid(const struct inode *inode)
-{
- umode_t mode = inode->i_mode;
- int kill = 0;
-
- /* suid always must be killed */
- if (unlikely(mode & S_ISUID))
- kill = ATTR_KILL_SUID;
-
- /*
- * sgid without any exec bits is just a mandatory locking mark; leave
- * it alone. If some exec bits are set, it's a real sgid; kill it.
- */
- if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
- kill |= ATTR_KILL_SGID;
-
- if (unlikely(kill && S_ISREG(mode)))
- return kill;
-
- return 0;
-}
-
static void nfs_writeback_check_extend(struct nfs_pgio_header *hdr,
struct nfs_fattr *fattr)
{
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 9f486b788ed0..fc17b0ac8729 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -300,6 +300,10 @@ commit_metadata(struct svc_fh *fhp)
static void
nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap)
{
+ /* Ignore mode updates on symlinks */
+ if (S_ISLNK(inode->i_mode))
+ iap->ia_valid &= ~ATTR_MODE;
+
/* sanitize the mode change */
if (iap->ia_valid & ATTR_MODE) {
iap->ia_mode &= S_IALLUGO;
@@ -353,7 +357,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
int accmode = NFSD_MAY_SATTR;
umode_t ftype = 0;
__be32 err;
- int host_err;
+ int host_err = 0;
bool get_write_count;
bool size_change = (iap->ia_valid & ATTR_SIZE);
@@ -391,13 +395,6 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
dentry = fhp->fh_dentry;
inode = d_inode(dentry);
- /* Ignore any mode updates on symlinks */
- if (S_ISLNK(inode->i_mode))
- iap->ia_valid &= ~ATTR_MODE;
-
- if (!iap->ia_valid)
- return 0;
-
nfsd_sanitize_attrs(inode, iap);
if (check_guard && guardtime != inode->i_ctime.tv_sec)
@@ -448,8 +445,10 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
goto out_unlock;
}
- iap->ia_valid |= ATTR_CTIME;
- host_err = notify_change(&init_user_ns, dentry, iap, NULL);
+ if (iap->ia_valid) {
+ iap->ia_valid |= ATTR_CTIME;
+ host_err = notify_change(&init_user_ns, dentry, iap, NULL);
+ }
out_unlock:
if (attr->na_seclabel && attr->na_seclabel->len)
@@ -846,10 +845,14 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
struct splice_desc *sd)
{
struct svc_rqst *rqstp = sd->u.data;
-
- svc_rqst_replace_page(rqstp, buf->page);
- if (rqstp->rq_res.page_len == 0)
- rqstp->rq_res.page_base = buf->offset;
+ struct page *page = buf->page; // may be a compound one
+ unsigned offset = buf->offset;
+
+ page += offset / PAGE_SIZE;
+ for (int i = sd->len; i > 0; i -= PAGE_SIZE)
+ svc_rqst_replace_page(rqstp, page++);
+ if (rqstp->rq_res.page_len == 0) // first call
+ rqstp->rq_res.page_base = offset % PAGE_SIZE;
rqstp->rq_res.page_len += sd->len;
return sd->len;
}
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 5ae8de09b271..001f4e053c85 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -2092,7 +2092,8 @@ get_ctx_vol_failed:
// TODO: Initialize security.
/* Get the extended system files' directory inode. */
vol->extend_ino = ntfs_iget(sb, FILE_Extend);
- if (IS_ERR(vol->extend_ino) || is_bad_inode(vol->extend_ino)) {
+ if (IS_ERR(vol->extend_ino) || is_bad_inode(vol->extend_ino) ||
+ !S_ISDIR(vol->extend_ino->i_mode)) {
if (!IS_ERR(vol->extend_ino))
iput(vol->extend_ino);
ntfs_error(sb, "Failed to load $Extend.");
diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c
index 51363d4e8636..26a76ebfe58f 100644
--- a/fs/ntfs3/inode.c
+++ b/fs/ntfs3/inode.c
@@ -1927,8 +1927,6 @@ const struct inode_operations ntfs_link_inode_operations = {
.setattr = ntfs3_setattr,
.listxattr = ntfs_listxattr,
.permission = ntfs_permission,
- .get_acl = ntfs_get_acl,
- .set_acl = ntfs_set_acl,
};
const struct address_space_operations ntfs_aops = {
diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c
index 6ae1f56b7358..7de8718c68a9 100644
--- a/fs/ntfs3/xattr.c
+++ b/fs/ntfs3/xattr.c
@@ -625,67 +625,6 @@ int ntfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
return ntfs_set_acl_ex(mnt_userns, inode, acl, type, false);
}
-static int ntfs_xattr_get_acl(struct user_namespace *mnt_userns,
- struct inode *inode, int type, void *buffer,
- size_t size)
-{
- struct posix_acl *acl;
- int err;
-
- if (!(inode->i_sb->s_flags & SB_POSIXACL)) {
- ntfs_inode_warn(inode, "add mount option \"acl\" to use acl");
- return -EOPNOTSUPP;
- }
-
- acl = ntfs_get_acl(inode, type, false);
- if (IS_ERR(acl))
- return PTR_ERR(acl);
-
- if (!acl)
- return -ENODATA;
-
- err = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
- posix_acl_release(acl);
-
- return err;
-}
-
-static int ntfs_xattr_set_acl(struct user_namespace *mnt_userns,
- struct inode *inode, int type, const void *value,
- size_t size)
-{
- struct posix_acl *acl;
- int err;
-
- if (!(inode->i_sb->s_flags & SB_POSIXACL)) {
- ntfs_inode_warn(inode, "add mount option \"acl\" to use acl");
- return -EOPNOTSUPP;
- }
-
- if (!inode_owner_or_capable(mnt_userns, inode))
- return -EPERM;
-
- if (!value) {
- acl = NULL;
- } else {
- acl = posix_acl_from_xattr(&init_user_ns, value, size);
- if (IS_ERR(acl))
- return PTR_ERR(acl);
-
- if (acl) {
- err = posix_acl_valid(&init_user_ns, acl);
- if (err)
- goto release_and_out;
- }
- }
-
- err = ntfs_set_acl(mnt_userns, inode, acl, type);
-
-release_and_out:
- posix_acl_release(acl);
- return err;
-}
-
/*
* ntfs_init_acl - Initialize the ACLs of a new inode.
*
@@ -852,23 +791,6 @@ static int ntfs_getxattr(const struct xattr_handler *handler, struct dentry *de,
goto out;
}
-#ifdef CONFIG_NTFS3_FS_POSIX_ACL
- if ((name_len == sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1 &&
- !memcmp(name, XATTR_NAME_POSIX_ACL_ACCESS,
- sizeof(XATTR_NAME_POSIX_ACL_ACCESS))) ||
- (name_len == sizeof(XATTR_NAME_POSIX_ACL_DEFAULT) - 1 &&
- !memcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT,
- sizeof(XATTR_NAME_POSIX_ACL_DEFAULT)))) {
- /* TODO: init_user_ns? */
- err = ntfs_xattr_get_acl(
- &init_user_ns, inode,
- name_len == sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1
- ? ACL_TYPE_ACCESS
- : ACL_TYPE_DEFAULT,
- buffer, size);
- goto out;
- }
-#endif
/* Deal with NTFS extended attribute. */
err = ntfs_get_ea(inode, name, name_len, buffer, size, NULL);
@@ -981,22 +903,6 @@ set_new_fa:
goto out;
}
-#ifdef CONFIG_NTFS3_FS_POSIX_ACL
- if ((name_len == sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1 &&
- !memcmp(name, XATTR_NAME_POSIX_ACL_ACCESS,
- sizeof(XATTR_NAME_POSIX_ACL_ACCESS))) ||
- (name_len == sizeof(XATTR_NAME_POSIX_ACL_DEFAULT) - 1 &&
- !memcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT,
- sizeof(XATTR_NAME_POSIX_ACL_DEFAULT)))) {
- err = ntfs_xattr_set_acl(
- mnt_userns, inode,
- name_len == sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1
- ? ACL_TYPE_ACCESS
- : ACL_TYPE_DEFAULT,
- value, size);
- goto out;
- }
-#endif
/* Deal with NTFS extended attribute. */
err = ntfs_set_ea(inode, name, name_len, value, size, flags, 0);
@@ -1086,7 +992,7 @@ static bool ntfs_xattr_user_list(struct dentry *dentry)
}
// clang-format off
-static const struct xattr_handler ntfs_xattr_handler = {
+static const struct xattr_handler ntfs_other_xattr_handler = {
.prefix = "",
.get = ntfs_getxattr,
.set = ntfs_setxattr,
@@ -1094,7 +1000,11 @@ static const struct xattr_handler ntfs_xattr_handler = {
};
const struct xattr_handler *ntfs_xattr_handlers[] = {
- &ntfs_xattr_handler,
+#ifdef CONFIG_NTFS3_FS_POSIX_ACL
+ &posix_acl_access_xattr_handler,
+ &posix_acl_default_xattr_handler,
+#endif
+ &ntfs_other_xattr_handler,
NULL,
};
// clang-format on
diff --git a/fs/open.c b/fs/open.c
index 8a813fa5ca56..cf7e5c350a54 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -716,6 +716,8 @@ int chown_common(const struct path *path, uid_t user, gid_t group)
fs_userns = i_user_ns(inode);
retry_deleg:
+ newattrs.ia_vfsuid = INVALID_VFSUID;
+ newattrs.ia_vfsgid = INVALID_VFSGID;
newattrs.ia_valid = ATTR_CTIME;
if ((user != (uid_t)-1) && !setattr_vfsuid(&newattrs, uid))
return -EINVAL;
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 87759165d32b..ee93c825b06b 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -250,7 +250,7 @@ static inline int ovl_do_setxattr(struct ovl_fs *ofs, struct dentry *dentry,
size_t size, int flags)
{
int err = vfs_setxattr(ovl_upper_mnt_userns(ofs), dentry, name,
- (void *)value, size, flags);
+ value, size, flags);
pr_debug("setxattr(%pd2, \"%s\", \"%*pE\", %zu, %d) = %i\n",
dentry, name, min((int)size, 48), value, size, flags, err);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index ec746d447f1b..5da771b218d1 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1022,7 +1022,20 @@ ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
/* Check that everything is OK before copy-up */
if (value) {
- acl = posix_acl_from_xattr(&init_user_ns, value, size);
+ /* The above comment can be understood in two ways:
+ *
+ * 1. We just want to check whether the basic POSIX ACL format
+ * is ok. For example, if the header is correct and the size
+ * is sane.
+ * 2. We want to know whether the ACL_{GROUP,USER} entries can
+ * be mapped according to the underlying filesystem.
+ *
+ * Currently, we only check 1. If we wanted to check 2. we
+ * would need to pass the mnt_userns and the fs_userns of the
+ * underlying filesystem. But frankly, I think checking 1. is
+ * enough to start the copy-up.
+ */
+ acl = vfs_set_acl_prepare(&init_user_ns, &init_user_ns, value, size);
if (IS_ERR(acl))
return PTR_ERR(acl);
}
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 5af33800743e..b4f109875e79 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -710,9 +710,9 @@ EXPORT_SYMBOL(posix_acl_update_mode);
/*
* Fix up the uids and gids in posix acl extended attributes in place.
*/
-static int posix_acl_fix_xattr_common(void *value, size_t size)
+static int posix_acl_fix_xattr_common(const void *value, size_t size)
{
- struct posix_acl_xattr_header *header = value;
+ const struct posix_acl_xattr_header *header = value;
int count;
if (!header)
@@ -720,13 +720,13 @@ static int posix_acl_fix_xattr_common(void *value, size_t size)
if (size < sizeof(struct posix_acl_xattr_header))
return -EINVAL;
if (header->a_version != cpu_to_le32(POSIX_ACL_XATTR_VERSION))
- return -EINVAL;
+ return -EOPNOTSUPP;
count = posix_acl_xattr_count(size);
if (count < 0)
return -EINVAL;
if (count == 0)
- return -EINVAL;
+ return 0;
return count;
}
@@ -748,7 +748,7 @@ void posix_acl_getxattr_idmapped_mnt(struct user_namespace *mnt_userns,
return;
count = posix_acl_fix_xattr_common(value, size);
- if (count < 0)
+ if (count <= 0)
return;
for (end = entry + count; entry != end; entry++) {
@@ -771,46 +771,6 @@ void posix_acl_getxattr_idmapped_mnt(struct user_namespace *mnt_userns,
}
}
-void posix_acl_setxattr_idmapped_mnt(struct user_namespace *mnt_userns,
- const struct inode *inode,
- void *value, size_t size)
-{
- struct posix_acl_xattr_header *header = value;
- struct posix_acl_xattr_entry *entry = (void *)(header + 1), *end;
- struct user_namespace *fs_userns = i_user_ns(inode);
- int count;
- vfsuid_t vfsuid;
- vfsgid_t vfsgid;
- kuid_t uid;
- kgid_t gid;
-
- if (no_idmapping(mnt_userns, i_user_ns(inode)))
- return;
-
- count = posix_acl_fix_xattr_common(value, size);
- if (count < 0)
- return;
-
- for (end = entry + count; entry != end; entry++) {
- switch (le16_to_cpu(entry->e_tag)) {
- case ACL_USER:
- uid = make_kuid(&init_user_ns, le32_to_cpu(entry->e_id));
- vfsuid = VFSUIDT_INIT(uid);
- uid = from_vfsuid(mnt_userns, fs_userns, vfsuid);
- entry->e_id = cpu_to_le32(from_kuid(&init_user_ns, uid));
- break;
- case ACL_GROUP:
- gid = make_kgid(&init_user_ns, le32_to_cpu(entry->e_id));
- vfsgid = VFSGIDT_INIT(gid);
- gid = from_vfsgid(mnt_userns, fs_userns, vfsgid);
- entry->e_id = cpu_to_le32(from_kgid(&init_user_ns, gid));
- break;
- default:
- break;
- }
- }
-}
-
static void posix_acl_fix_xattr_userns(
struct user_namespace *to, struct user_namespace *from,
void *value, size_t size)
@@ -822,7 +782,7 @@ static void posix_acl_fix_xattr_userns(
kgid_t gid;
count = posix_acl_fix_xattr_common(value, size);
- if (count < 0)
+ if (count <= 0)
return;
for (end = entry + count; entry != end; entry++) {
@@ -857,12 +817,32 @@ void posix_acl_fix_xattr_to_user(void *value, size_t size)
posix_acl_fix_xattr_userns(user_ns, &init_user_ns, value, size);
}
-/*
- * Convert from extended attribute to in-memory representation.
+/**
+ * make_posix_acl - convert POSIX ACLs from uapi to VFS format using the
+ * provided callbacks to map ACL_{GROUP,USER} entries into the
+ * appropriate format
+ * @mnt_userns: the mount's idmapping
+ * @fs_userns: the filesystem's idmapping
+ * @value: the uapi representation of POSIX ACLs
+ * @size: the size of @void
+ * @uid_cb: callback to use for mapping the uid stored in ACL_USER entries
+ * @gid_cb: callback to use for mapping the gid stored in ACL_GROUP entries
+ *
+ * The make_posix_acl() helper is an abstraction to translate from uapi format
+ * into the VFS format allowing the caller to specific callbacks to map
+ * ACL_{GROUP,USER} entries into the expected format. This is used in
+ * posix_acl_from_xattr() and vfs_set_acl_prepare() and avoids pointless code
+ * duplication.
+ *
+ * Return: Allocated struct posix_acl on success, NULL for a valid header but
+ * without actual POSIX ACL entries, or ERR_PTR() encoded error code.
*/
-struct posix_acl *
-posix_acl_from_xattr(struct user_namespace *user_ns,
- const void *value, size_t size)
+static struct posix_acl *make_posix_acl(struct user_namespace *mnt_userns,
+ struct user_namespace *fs_userns, const void *value, size_t size,
+ kuid_t (*uid_cb)(struct user_namespace *, struct user_namespace *,
+ const struct posix_acl_xattr_entry *),
+ kgid_t (*gid_cb)(struct user_namespace *, struct user_namespace *,
+ const struct posix_acl_xattr_entry *))
{
const struct posix_acl_xattr_header *header = value;
const struct posix_acl_xattr_entry *entry = (const void *)(header + 1), *end;
@@ -870,16 +850,9 @@ posix_acl_from_xattr(struct user_namespace *user_ns,
struct posix_acl *acl;
struct posix_acl_entry *acl_e;
- if (!value)
- return NULL;
- if (size < sizeof(struct posix_acl_xattr_header))
- return ERR_PTR(-EINVAL);
- if (header->a_version != cpu_to_le32(POSIX_ACL_XATTR_VERSION))
- return ERR_PTR(-EOPNOTSUPP);
-
- count = posix_acl_xattr_count(size);
+ count = posix_acl_fix_xattr_common(value, size);
if (count < 0)
- return ERR_PTR(-EINVAL);
+ return ERR_PTR(count);
if (count == 0)
return NULL;
@@ -900,16 +873,12 @@ posix_acl_from_xattr(struct user_namespace *user_ns,
break;
case ACL_USER:
- acl_e->e_uid =
- make_kuid(user_ns,
- le32_to_cpu(entry->e_id));
+ acl_e->e_uid = uid_cb(mnt_userns, fs_userns, entry);
if (!uid_valid(acl_e->e_uid))
goto fail;
break;
case ACL_GROUP:
- acl_e->e_gid =
- make_kgid(user_ns,
- le32_to_cpu(entry->e_id));
+ acl_e->e_gid = gid_cb(mnt_userns, fs_userns, entry);
if (!gid_valid(acl_e->e_gid))
goto fail;
break;
@@ -924,6 +893,181 @@ fail:
posix_acl_release(acl);
return ERR_PTR(-EINVAL);
}
+
+/**
+ * vfs_set_acl_prepare_kuid - map ACL_USER uid according to mount- and
+ * filesystem idmapping
+ * @mnt_userns: the mount's idmapping
+ * @fs_userns: the filesystem's idmapping
+ * @e: a ACL_USER entry in POSIX ACL uapi format
+ *
+ * The uid stored as ACL_USER entry in @e is a kuid_t stored as a raw {g,u}id
+ * value. The vfs_set_acl_prepare_kuid() will recover the kuid_t through
+ * KUIDT_INIT() and then map it according to the idmapped mount. The resulting
+ * kuid_t is the value which the filesystem can map up into a raw backing store
+ * id in the filesystem's idmapping.
+ *
+ * This is used in vfs_set_acl_prepare() to generate the proper VFS
+ * representation of POSIX ACLs with ACL_USER entries during setxattr().
+ *
+ * Return: A kuid in @fs_userns for the uid stored in @e.
+ */
+static inline kuid_t
+vfs_set_acl_prepare_kuid(struct user_namespace *mnt_userns,
+ struct user_namespace *fs_userns,
+ const struct posix_acl_xattr_entry *e)
+{
+ kuid_t kuid = KUIDT_INIT(le32_to_cpu(e->e_id));
+ return from_vfsuid(mnt_userns, fs_userns, VFSUIDT_INIT(kuid));
+}
+
+/**
+ * vfs_set_acl_prepare_kgid - map ACL_GROUP gid according to mount- and
+ * filesystem idmapping
+ * @mnt_userns: the mount's idmapping
+ * @fs_userns: the filesystem's idmapping
+ * @e: a ACL_GROUP entry in POSIX ACL uapi format
+ *
+ * The gid stored as ACL_GROUP entry in @e is a kgid_t stored as a raw {g,u}id
+ * value. The vfs_set_acl_prepare_kgid() will recover the kgid_t through
+ * KGIDT_INIT() and then map it according to the idmapped mount. The resulting
+ * kgid_t is the value which the filesystem can map up into a raw backing store
+ * id in the filesystem's idmapping.
+ *
+ * This is used in vfs_set_acl_prepare() to generate the proper VFS
+ * representation of POSIX ACLs with ACL_GROUP entries during setxattr().
+ *
+ * Return: A kgid in @fs_userns for the gid stored in @e.
+ */
+static inline kgid_t
+vfs_set_acl_prepare_kgid(struct user_namespace *mnt_userns,
+ struct user_namespace *fs_userns,
+ const struct posix_acl_xattr_entry *e)
+{
+ kgid_t kgid = KGIDT_INIT(le32_to_cpu(e->e_id));
+ return from_vfsgid(mnt_userns, fs_userns, VFSGIDT_INIT(kgid));
+}
+
+/**
+ * vfs_set_acl_prepare - convert POSIX ACLs from uapi to VFS format taking
+ * mount and filesystem idmappings into account
+ * @mnt_userns: the mount's idmapping
+ * @fs_userns: the filesystem's idmapping
+ * @value: the uapi representation of POSIX ACLs
+ * @size: the size of @void
+ *
+ * When setting POSIX ACLs with ACL_{GROUP,USER} entries they need to be
+ * mapped according to the relevant mount- and filesystem idmapping. It is
+ * important that the ACL_{GROUP,USER} entries in struct posix_acl will be
+ * mapped into k{g,u}id_t that are supposed to be mapped up in the filesystem
+ * idmapping. This is crucial since the resulting struct posix_acl might be
+ * cached filesystem wide. The vfs_set_acl_prepare() function will take care to
+ * perform all necessary idmappings.
+ *
+ * Note, that since basically forever the {g,u}id values encoded as
+ * ACL_{GROUP,USER} entries in the uapi POSIX ACLs passed via @value contain
+ * values that have been mapped according to the caller's idmapping. In other
+ * words, POSIX ACLs passed in uapi format as @value during setxattr() contain
+ * {g,u}id values in their ACL_{GROUP,USER} entries that should actually have
+ * been stored as k{g,u}id_t.
+ *
+ * This means, vfs_set_acl_prepare() needs to first recover the k{g,u}id_t by
+ * calling K{G,U}IDT_INIT(). Afterwards they can be interpreted as vfs{g,u}id_t
+ * through from_vfs{g,u}id() to account for any idmapped mounts. The
+ * vfs_set_acl_prepare_k{g,u}id() helpers will take care to generate the
+ * correct k{g,u}id_t.
+ *
+ * The filesystem will then receive the POSIX ACLs ready to be cached
+ * filesystem wide and ready to be written to the backing store taking the
+ * filesystem's idmapping into account.
+ *
+ * Return: Allocated struct posix_acl on success, NULL for a valid header but
+ * without actual POSIX ACL entries, or ERR_PTR() encoded error code.
+ */
+struct posix_acl *vfs_set_acl_prepare(struct user_namespace *mnt_userns,
+ struct user_namespace *fs_userns,
+ const void *value, size_t size)
+{
+ return make_posix_acl(mnt_userns, fs_userns, value, size,
+ vfs_set_acl_prepare_kuid,
+ vfs_set_acl_prepare_kgid);
+}
+EXPORT_SYMBOL(vfs_set_acl_prepare);
+
+/**
+ * posix_acl_from_xattr_kuid - map ACL_USER uid into filesystem idmapping
+ * @mnt_userns: unused
+ * @fs_userns: the filesystem's idmapping
+ * @e: a ACL_USER entry in POSIX ACL uapi format
+ *
+ * Map the uid stored as ACL_USER entry in @e into the filesystem's idmapping.
+ * This is used in posix_acl_from_xattr() to generate the proper VFS
+ * representation of POSIX ACLs with ACL_USER entries.
+ *
+ * Return: A kuid in @fs_userns for the uid stored in @e.
+ */
+static inline kuid_t
+posix_acl_from_xattr_kuid(struct user_namespace *mnt_userns,
+ struct user_namespace *fs_userns,
+ const struct posix_acl_xattr_entry *e)
+{
+ return make_kuid(fs_userns, le32_to_cpu(e->e_id));
+}
+
+/**
+ * posix_acl_from_xattr_kgid - map ACL_GROUP gid into filesystem idmapping
+ * @mnt_userns: unused
+ * @fs_userns: the filesystem's idmapping
+ * @e: a ACL_GROUP entry in POSIX ACL uapi format
+ *
+ * Map the gid stored as ACL_GROUP entry in @e into the filesystem's idmapping.
+ * This is used in posix_acl_from_xattr() to generate the proper VFS
+ * representation of POSIX ACLs with ACL_GROUP entries.
+ *
+ * Return: A kgid in @fs_userns for the gid stored in @e.
+ */
+static inline kgid_t
+posix_acl_from_xattr_kgid(struct user_namespace *mnt_userns,
+ struct user_namespace *fs_userns,
+ const struct posix_acl_xattr_entry *e)
+{
+ return make_kgid(fs_userns, le32_to_cpu(e->e_id));
+}
+
+/**
+ * posix_acl_from_xattr - convert POSIX ACLs from backing store to VFS format
+ * @fs_userns: the filesystem's idmapping
+ * @value: the uapi representation of POSIX ACLs
+ * @size: the size of @void
+ *
+ * Filesystems that store POSIX ACLs in the unaltered uapi format should use
+ * posix_acl_from_xattr() when reading them from the backing store and
+ * converting them into the struct posix_acl VFS format. The helper is
+ * specifically intended to be called from the ->get_acl() inode operation.
+ *
+ * The posix_acl_from_xattr() function will map the raw {g,u}id values stored
+ * in ACL_{GROUP,USER} entries into the filesystem idmapping in @fs_userns. The
+ * posix_acl_from_xattr_k{g,u}id() helpers will take care to generate the
+ * correct k{g,u}id_t. The returned struct posix_acl can be cached.
+ *
+ * Note that posix_acl_from_xattr() does not take idmapped mounts into account.
+ * If it did it calling is from the ->get_acl() inode operation would return
+ * POSIX ACLs mapped according to an idmapped mount which would mean that the
+ * value couldn't be cached for the filesystem. Idmapped mounts are taken into
+ * account on the fly during permission checking or right at the VFS -
+ * userspace boundary before reporting them to the user.
+ *
+ * Return: Allocated struct posix_acl on success, NULL for a valid header but
+ * without actual POSIX ACL entries, or ERR_PTR() encoded error code.
+ */
+struct posix_acl *
+posix_acl_from_xattr(struct user_namespace *fs_userns,
+ const void *value, size_t size)
+{
+ return make_posix_acl(&init_user_ns, fs_userns, value, size,
+ posix_acl_from_xattr_kuid,
+ posix_acl_from_xattr_kgid);
+}
EXPORT_SYMBOL (posix_acl_from_xattr);
/*
@@ -1027,7 +1171,17 @@ posix_acl_xattr_set(const struct xattr_handler *handler,
int ret;
if (value) {
- acl = posix_acl_from_xattr(&init_user_ns, value, size);
+ /*
+ * By the time we end up here the {g,u}ids stored in
+ * ACL_{GROUP,USER} have already been mapped according to the
+ * caller's idmapping. The vfs_set_acl_prepare() helper will
+ * recover them and take idmapped mounts into account. The
+ * filesystem will receive the POSIX ACLs in the correct
+ * format ready to be cached or written to the backing store
+ * taking the filesystem idmapping into account.
+ */
+ acl = vfs_set_acl_prepare(mnt_userns, i_user_ns(inode),
+ value, size);
if (IS_ERR(acl))
return PTR_ERR(acl);
}
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index b2fd3c20e7c2..0c034ea39954 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -28,14 +28,11 @@
#include <linux/crypto.h>
#include <linux/string.h>
#include <linux/timer.h>
-#include <linux/scatterlist.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/jiffies.h>
#include <linux/workqueue.h>
-#include <crypto/acompress.h>
-
#include "internal.h"
/*
@@ -93,8 +90,7 @@ module_param(compress, charp, 0444);
MODULE_PARM_DESC(compress, "compression to use");
/* Compression parameters */
-static struct crypto_acomp *tfm;
-static struct acomp_req *creq;
+static struct crypto_comp *tfm;
struct pstore_zbackend {
int (*zbufsize)(size_t size);
@@ -272,21 +268,12 @@ static const struct pstore_zbackend zbackends[] = {
static int pstore_compress(const void *in, void *out,
unsigned int inlen, unsigned int outlen)
{
- struct scatterlist src, dst;
int ret;
if (!IS_ENABLED(CONFIG_PSTORE_COMPRESS))
return -EINVAL;
- sg_init_table(&src, 1);
- sg_set_buf(&src, in, inlen);
-
- sg_init_table(&dst, 1);
- sg_set_buf(&dst, out, outlen);
-
- acomp_request_set_params(creq, &src, &dst, inlen, outlen);
-
- ret = crypto_acomp_compress(creq);
+ ret = crypto_comp_compress(tfm, in, inlen, out, &outlen);
if (ret) {
pr_err("crypto_comp_compress failed, ret = %d!\n", ret);
return ret;
@@ -297,7 +284,7 @@ static int pstore_compress(const void *in, void *out,
static void allocate_buf_for_compression(void)
{
- struct crypto_acomp *acomp;
+ struct crypto_comp *ctx;
int size;
char *buf;
@@ -309,7 +296,7 @@ static void allocate_buf_for_compression(void)
if (!psinfo || tfm)
return;
- if (!crypto_has_acomp(zbackend->name, 0, CRYPTO_ALG_ASYNC)) {
+ if (!crypto_has_comp(zbackend->name, 0, 0)) {
pr_err("Unknown compression: %s\n", zbackend->name);
return;
}
@@ -328,24 +315,16 @@ static void allocate_buf_for_compression(void)
return;
}
- acomp = crypto_alloc_acomp(zbackend->name, 0, CRYPTO_ALG_ASYNC);
- if (IS_ERR_OR_NULL(acomp)) {
+ ctx = crypto_alloc_comp(zbackend->name, 0, 0);
+ if (IS_ERR_OR_NULL(ctx)) {
kfree(buf);
pr_err("crypto_alloc_comp('%s') failed: %ld\n", zbackend->name,
- PTR_ERR(acomp));
- return;
- }
-
- creq = acomp_request_alloc(acomp);
- if (!creq) {
- crypto_free_acomp(acomp);
- kfree(buf);
- pr_err("acomp_request_alloc('%s') failed\n", zbackend->name);
+ PTR_ERR(ctx));
return;
}
/* A non-NULL big_oops_buf indicates compression is available. */
- tfm = acomp;
+ tfm = ctx;
big_oops_buf_sz = size;
big_oops_buf = buf;
@@ -355,8 +334,7 @@ static void allocate_buf_for_compression(void)
static void free_buf_for_compression(void)
{
if (IS_ENABLED(CONFIG_PSTORE_COMPRESS) && tfm) {
- acomp_request_free(creq);
- crypto_free_acomp(tfm);
+ crypto_free_comp(tfm);
tfm = NULL;
}
kfree(big_oops_buf);
@@ -693,8 +671,6 @@ static void decompress_record(struct pstore_record *record)
int ret;
int unzipped_len;
char *unzipped, *workspace;
- struct acomp_req *dreq;
- struct scatterlist src, dst;
if (!IS_ENABLED(CONFIG_PSTORE_COMPRESS) || !record->compressed)
return;
@@ -718,30 +694,16 @@ static void decompress_record(struct pstore_record *record)
if (!workspace)
return;
- dreq = acomp_request_alloc(tfm);
- if (!dreq) {
- kfree(workspace);
- return;
- }
-
- sg_init_table(&src, 1);
- sg_set_buf(&src, record->buf, record->size);
-
- sg_init_table(&dst, 1);
- sg_set_buf(&dst, workspace, unzipped_len);
-
- acomp_request_set_params(dreq, &src, &dst, record->size, unzipped_len);
-
/* After decompression "unzipped_len" is almost certainly smaller. */
- ret = crypto_acomp_decompress(dreq);
+ ret = crypto_comp_decompress(tfm, record->buf, record->size,
+ workspace, &unzipped_len);
if (ret) {
- pr_err("crypto_acomp_decompress failed, ret = %d!\n", ret);
+ pr_err("crypto_comp_decompress failed, ret = %d!\n", ret);
kfree(workspace);
return;
}
/* Append ECC notice to decompressed buffer. */
- unzipped_len = dreq->dlen;
memcpy(workspace + unzipped_len, record->buf + record->size,
record->ecc_notice_size);
@@ -749,7 +711,6 @@ static void decompress_record(struct pstore_record *record)
unzipped = kmemdup(workspace, unzipped_len + record->ecc_notice_size,
GFP_KERNEL);
kfree(workspace);
- acomp_request_free(dreq);
if (!unzipped)
return;
diff --git a/fs/read_write.c b/fs/read_write.c
index 1a261dcf1778..328ce8cf9a85 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -496,14 +496,9 @@ static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t
}
/* caller is responsible for file_start_write/file_end_write */
-ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos)
+ssize_t __kernel_write_iter(struct file *file, struct iov_iter *from, loff_t *pos)
{
- struct kvec iov = {
- .iov_base = (void *)buf,
- .iov_len = min_t(size_t, count, MAX_RW_COUNT),
- };
struct kiocb kiocb;
- struct iov_iter iter;
ssize_t ret;
if (WARN_ON_ONCE(!(file->f_mode & FMODE_WRITE)))
@@ -519,8 +514,7 @@ ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t
init_sync_kiocb(&kiocb, file);
kiocb.ki_pos = pos ? *pos : 0;
- iov_iter_kvec(&iter, WRITE, &iov, 1, iov.iov_len);
- ret = file->f_op->write_iter(&kiocb, &iter);
+ ret = file->f_op->write_iter(&kiocb, from);
if (ret > 0) {
if (pos)
*pos = kiocb.ki_pos;
@@ -530,6 +524,18 @@ ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t
inc_syscw(current);
return ret;
}
+
+/* caller is responsible for file_start_write/file_end_write */
+ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos)
+{
+ struct kvec iov = {
+ .iov_base = (void *)buf,
+ .iov_len = min_t(size_t, count, MAX_RW_COUNT),
+ };
+ struct iov_iter iter;
+ iov_iter_kvec(&iter, WRITE, &iov, 1, iov.iov_len);
+ return __kernel_write_iter(file, &iter, pos);
+}
/*
* This "EXPORT_SYMBOL_GPL()" is more of a "EXPORT_SYMBOL_DONTUSE()",
* but autofs is one of the few internal kernel users that actually
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 175de70e3adf..0c1d33c4f74c 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -991,7 +991,7 @@ static int resolve_userfault_fork(struct userfaultfd_ctx *new,
int fd;
fd = anon_inode_getfd_secure("[userfaultfd]", &userfaultfd_fops, new,
- O_RDWR | (new->flags & UFFD_SHARED_FCNTL_FLAGS), inode);
+ O_RDONLY | (new->flags & UFFD_SHARED_FCNTL_FLAGS), inode);
if (fd < 0)
return fd;
@@ -2094,7 +2094,7 @@ SYSCALL_DEFINE1(userfaultfd, int, flags)
mmgrab(ctx->mm);
fd = anon_inode_getfd_secure("[userfaultfd]", &userfaultfd_fops, ctx,
- O_RDWR | (flags & UFFD_SHARED_FCNTL_FLAGS), NULL);
+ O_RDONLY | (flags & UFFD_SHARED_FCNTL_FLAGS), NULL);
if (fd < 0) {
mmdrop(ctx->mm);
kmem_cache_free(userfaultfd_ctx_cachep, ctx);
diff --git a/fs/xattr.c b/fs/xattr.c
index a1f4998bc6be..61107b6bbed2 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -290,7 +290,7 @@ static inline bool is_posix_acl_xattr(const char *name)
int
vfs_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
- const char *name, void *value, size_t size, int flags)
+ const char *name, const void *value, size_t size, int flags)
{
struct inode *inode = dentry->d_inode;
struct inode *delegated_inode = NULL;
@@ -298,16 +298,12 @@ vfs_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
int error;
if (size && strcmp(name, XATTR_NAME_CAPS) == 0) {
- error = cap_convert_nscap(mnt_userns, dentry,
- (const void **)&value, size);
+ error = cap_convert_nscap(mnt_userns, dentry, &value, size);
if (error < 0)
return error;
size = error;
}
- if (size && is_posix_acl_xattr(name))
- posix_acl_setxattr_idmapped_mnt(mnt_userns, inode, value, size);
-
retry_deleg:
inode_lock(inode);
error = __vfs_setxattr_locked(mnt_userns, dentry, name, value, size,
@@ -587,9 +583,7 @@ int setxattr_copy(const char __user *name, struct xattr_ctx *ctx)
static void setxattr_convert(struct user_namespace *mnt_userns,
struct dentry *d, struct xattr_ctx *ctx)
{
- if (ctx->size &&
- ((strcmp(ctx->kname->name, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
- (strcmp(ctx->kname->name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0)))
+ if (ctx->size && is_posix_acl_xattr(ctx->kname->name))
posix_acl_fix_xattr_from_user(ctx->kvalue, ctx->size);
}
@@ -705,8 +699,7 @@ do_getxattr(struct user_namespace *mnt_userns, struct dentry *d,
error = vfs_getxattr(mnt_userns, d, kname, ctx->kvalue, ctx->size);
if (error > 0) {
- if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
- (strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
+ if (is_posix_acl_xattr(kname))
posix_acl_fix_xattr_to_user(ctx->kvalue, error);
if (ctx->size && copy_to_user(ctx->value, ctx->kvalue, error))
error = -EFAULT;
diff --git a/fs/xfs/xfs_notify_failure.c b/fs/xfs/xfs_notify_failure.c
index 69d9c83ea4b2..5b1f9a24ed59 100644
--- a/fs/xfs/xfs_notify_failure.c
+++ b/fs/xfs/xfs_notify_failure.c
@@ -175,13 +175,13 @@ xfs_dax_notify_failure(
u64 ddev_start;
u64 ddev_end;
- if (!(mp->m_sb.sb_flags & SB_BORN)) {
+ if (!(mp->m_super->s_flags & SB_BORN)) {
xfs_warn(mp, "filesystem is not ready for notify_failure()!");
return -EIO;
}
if (mp->m_rtdev_targp && mp->m_rtdev_targp->bt_daxdev == dax_dev) {
- xfs_warn(mp,
+ xfs_debug(mp,
"notify_failure() not supported on realtime device!");
return -EOPNOTSUPP;
}
@@ -194,7 +194,7 @@ xfs_dax_notify_failure(
}
if (!xfs_has_rmapbt(mp)) {
- xfs_warn(mp, "notify_failure() needs rmapbt enabled!");
+ xfs_debug(mp, "notify_failure() needs rmapbt enabled!");
return -EOPNOTSUPP;
}