summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig.binfmt33
-rw-r--r--fs/Makefile1
-rw-r--r--fs/afs/flock.c2
-rw-r--r--fs/afs/fsclient.c2
-rw-r--r--fs/afs/internal.h3
-rw-r--r--fs/afs/misc.c1
-rw-r--r--fs/afs/rxrpc.c7
-rw-r--r--fs/afs/yfsclient.c3
-rw-r--r--fs/binfmt_aout.c342
-rw-r--r--fs/btrfs/block-group.c51
-rw-r--r--fs/btrfs/block-group.h4
-rw-r--r--fs/btrfs/ctree.c3
-rw-r--r--fs/btrfs/ctree.h5
-rw-r--r--fs/btrfs/dev-replace.c5
-rw-r--r--fs/btrfs/disk-io.c125
-rw-r--r--fs/btrfs/disk-io.h10
-rw-r--r--fs/btrfs/extent-tree.c48
-rw-r--r--fs/btrfs/extent_io.c44
-rw-r--r--fs/btrfs/file.c2
-rw-r--r--fs/btrfs/inode.c21
-rw-r--r--fs/btrfs/locking.c91
-rw-r--r--fs/btrfs/locking.h14
-rw-r--r--fs/btrfs/relocation.c9
-rw-r--r--fs/btrfs/root-tree.c5
-rw-r--r--fs/btrfs/space-info.c2
-rw-r--r--fs/btrfs/tree-checker.c25
-rw-r--r--fs/btrfs/tree-log.c8
-rw-r--r--fs/btrfs/volumes.c8
-rw-r--r--fs/btrfs/xattr.c3
-rw-r--r--fs/btrfs/zoned.c139
-rw-r--r--fs/cachefiles/internal.h1
-rw-r--r--fs/cachefiles/ondemand.c22
-rw-r--r--fs/cifs/cifs_debug.c2
-rw-r--r--fs/cifs/cifsencrypt.c3
-rw-r--r--fs/cifs/cifsfs.c6
-rw-r--r--fs/cifs/cifsfs.h4
-rw-r--r--fs/cifs/cifsglob.h9
-rw-r--r--fs/cifs/cifsproto.h2
-rw-r--r--fs/cifs/cifsroot.c2
-rw-r--r--fs/cifs/connect.c39
-rw-r--r--fs/cifs/file.c3
-rw-r--r--fs/cifs/misc.c8
-rw-r--r--fs/cifs/netmisc.c2
-rw-r--r--fs/cifs/readdir.c6
-rw-r--r--fs/cifs/smb2file.c1
-rw-r--r--fs/cifs/smb2misc.c4
-rw-r--r--fs/cifs/smb2ops.c171
-rw-r--r--fs/cifs/smb2pdu.c50
-rw-r--r--fs/cifs/smb2proto.h6
-rw-r--r--fs/cifs/transport.c27
-rw-r--r--fs/coredump.c38
-rw-r--r--fs/dax.c3
-rw-r--r--fs/dcache.c72
-rw-r--r--fs/debugfs/inode.c22
-rw-r--r--fs/erofs/fscache.c8
-rw-r--r--fs/erofs/internal.h29
-rw-r--r--fs/erofs/zmap.c16
-rw-r--r--fs/exec.c24
-rw-r--r--fs/exfat/fatent.c3
-rw-r--r--fs/ext4/ext4.h10
-rw-r--r--fs/ext4/extents.c4
-rw-r--r--fs/ext4/ialloc.c2
-rw-r--r--fs/ext4/mballoc.c317
-rw-r--r--fs/ext4/mballoc.h1
-rw-r--r--fs/fs-writeback.c12
-rw-r--r--fs/inode.c14
-rw-r--r--fs/internal.h3
-rw-r--r--fs/ksmbd/ksmbd_netlink.h2
-rw-r--r--fs/ksmbd/mgmt/share_config.c6
-rw-r--r--fs/ksmbd/mgmt/share_config.h1
-rw-r--r--fs/ksmbd/mgmt/tree_connect.c16
-rw-r--r--fs/ksmbd/smb2pdu.c35
-rw-r--r--fs/locks.c1
-rw-r--r--fs/namespace.c7
-rw-r--r--fs/nfs/dir.c7
-rw-r--r--fs/nfs/file.c15
-rw-r--r--fs/nfs/inode.c1
-rw-r--r--fs/nfs/internal.h25
-rw-r--r--fs/nfs/nfs42proc.c9
-rw-r--r--fs/nfs/nfs4file.c6
-rw-r--r--fs/nfs/pnfs.c1
-rw-r--r--fs/nfs/super.c27
-rw-r--r--fs/nfs/write.c31
-rw-r--r--fs/nfsd/vfs.c31
-rw-r--r--fs/ntfs/super.c3
-rw-r--r--fs/ntfs3/attrib.c557
-rw-r--r--fs/ntfs3/bitmap.c12
-rw-r--r--fs/ntfs3/file.c110
-rw-r--r--fs/ntfs3/frecord.c128
-rw-r--r--fs/ntfs3/fslog.c4
-rw-r--r--fs/ntfs3/fsntfs.c92
-rw-r--r--fs/ntfs3/index.c33
-rw-r--r--fs/ntfs3/inode.c19
-rw-r--r--fs/ntfs3/namei.c6
-rw-r--r--fs/ntfs3/ntfs_fs.h16
-rw-r--r--fs/ntfs3/record.c27
-rw-r--r--fs/ntfs3/run.c108
-rw-r--r--fs/ntfs3/super.c17
-rw-r--r--fs/ntfs3/xattr.c51
-rw-r--r--fs/ocfs2/dlmglue.c8
-rw-r--r--fs/ocfs2/super.c3
-rw-r--r--fs/open.c2
-rw-r--r--fs/overlayfs/inode.c11
-rw-r--r--fs/posix_acl.c15
-rw-r--r--fs/proc/task_mmu.c7
-rw-r--r--fs/pstore/platform.c63
-rw-r--r--fs/read_write.c22
-rw-r--r--fs/squashfs/file.c2
-rw-r--r--fs/squashfs/file_direct.c2
-rw-r--r--fs/squashfs/page_actor.c34
-rw-r--r--fs/squashfs/page_actor.h5
-rw-r--r--fs/tracefs/inode.c31
-rw-r--r--fs/userfaultfd.c4
-rw-r--r--fs/xfs/xfs_notify_failure.c6
114 files changed, 1978 insertions, 1603 deletions
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 21e154516bf2..f14478643b91 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -142,39 +142,6 @@ config BINFMT_ZFLAT
help
Support FLAT format compressed binaries
-config HAVE_AOUT
- def_bool n
-
-config BINFMT_AOUT
- tristate "Kernel support for a.out and ECOFF binaries"
- depends on HAVE_AOUT
- help
- A.out (Assembler.OUTput) is a set of formats for libraries and
- executables used in the earliest versions of UNIX. Linux used
- the a.out formats QMAGIC and ZMAGIC until they were replaced
- with the ELF format.
-
- The conversion to ELF started in 1995. This option is primarily
- provided for historical interest and for the benefit of those
- who need to run binaries from that era.
-
- Most people should answer N here. If you think you may have
- occasional use for this format, enable module support above
- and answer M here to compile this support as a module called
- binfmt_aout.
-
- If any crucial components of your system (such as /sbin/init
- or /lib/ld.so) are still in a.out format, you will have to
- say Y here.
-
-config OSF4_COMPAT
- bool "OSF/1 v4 readv/writev compatibility"
- depends on ALPHA && BINFMT_AOUT
- help
- Say Y if you are using OSF/1 binaries (like Netscape and Acrobat)
- with v4 shared libraries freely available from Compaq. If you're
- going to use shared libraries from Tru64 version 5.0 or later, say N.
-
config BINFMT_MISC
tristate "Kernel support for MISC binaries"
help
diff --git a/fs/Makefile b/fs/Makefile
index 93b80529f8e8..4dea17840761 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -38,7 +38,6 @@ obj-$(CONFIG_FS_DAX) += dax.o
obj-$(CONFIG_FS_ENCRYPTION) += crypto/
obj-$(CONFIG_FS_VERITY) += verity/
obj-$(CONFIG_FILE_LOCKING) += locks.o
-obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o
obj-$(CONFIG_BINFMT_MISC) += binfmt_misc.o
obj-$(CONFIG_BINFMT_SCRIPT) += binfmt_script.o
obj-$(CONFIG_BINFMT_ELF) += binfmt_elf.o
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index c4210a3964d8..bbcc5afd1576 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -76,7 +76,7 @@ void afs_lock_op_done(struct afs_call *call)
if (call->error == 0) {
spin_lock(&vnode->lock);
trace_afs_flock_ev(vnode, NULL, afs_flock_timestamp, 0);
- vnode->locked_at = call->reply_time;
+ vnode->locked_at = call->issue_time;
afs_schedule_lock_extension(vnode);
spin_unlock(&vnode->lock);
}
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 4943413d9c5f..7d37f63ef0f0 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -131,7 +131,7 @@ bad:
static time64_t xdr_decode_expiry(struct afs_call *call, u32 expiry)
{
- return ktime_divns(call->reply_time, NSEC_PER_SEC) + expiry;
+ return ktime_divns(call->issue_time, NSEC_PER_SEC) + expiry;
}
static void xdr_decode_AFSCallBack(const __be32 **_bp,
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 64ad55494349..723d162078a3 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -137,7 +137,6 @@ struct afs_call {
bool need_attention; /* T if RxRPC poked us */
bool async; /* T if asynchronous */
bool upgrade; /* T to request service upgrade */
- bool have_reply_time; /* T if have got reply_time */
bool intr; /* T if interruptible */
bool unmarshalling_error; /* T if an unmarshalling error occurred */
u16 service_id; /* Actual service ID (after upgrade) */
@@ -151,7 +150,7 @@ struct afs_call {
} __attribute__((packed));
__be64 tmp64;
};
- ktime_t reply_time; /* Time of first reply packet */
+ ktime_t issue_time; /* Time of issue of operation */
};
struct afs_call_type {
diff --git a/fs/afs/misc.c b/fs/afs/misc.c
index 933e67fcdab1..805328ca5428 100644
--- a/fs/afs/misc.c
+++ b/fs/afs/misc.c
@@ -69,6 +69,7 @@ int afs_abort_to_error(u32 abort_code)
/* Unified AFS error table */
case UAEPERM: return -EPERM;
case UAENOENT: return -ENOENT;
+ case UAEAGAIN: return -EAGAIN;
case UAEACCES: return -EACCES;
case UAEBUSY: return -EBUSY;
case UAEEXIST: return -EEXIST;
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index d5c4785c862d..eccc3cd0cb70 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -351,6 +351,7 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp)
if (call->max_lifespan)
rxrpc_kernel_set_max_life(call->net->socket, rxcall,
call->max_lifespan);
+ call->issue_time = ktime_get_real();
/* send the request */
iov[0].iov_base = call->request;
@@ -501,12 +502,6 @@ static void afs_deliver_to_call(struct afs_call *call)
return;
}
- if (!call->have_reply_time &&
- rxrpc_kernel_get_reply_time(call->net->socket,
- call->rxcall,
- &call->reply_time))
- call->have_reply_time = true;
-
ret = call->type->deliver(call);
state = READ_ONCE(call->state);
if (ret == 0 && call->unmarshalling_error)
diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c
index fdc7d675b4b0..11571cca86c1 100644
--- a/fs/afs/yfsclient.c
+++ b/fs/afs/yfsclient.c
@@ -232,8 +232,7 @@ static void xdr_decode_YFSCallBack(const __be32 **_bp,
struct afs_callback *cb = &scb->callback;
ktime_t cb_expiry;
- cb_expiry = call->reply_time;
- cb_expiry = ktime_add(cb_expiry, xdr_to_u64(x->expiration_time) * 100);
+ cb_expiry = ktime_add(call->issue_time, xdr_to_u64(x->expiration_time) * 100);
cb->expires_at = ktime_divns(cb_expiry, NSEC_PER_SEC);
scb->have_cb = true;
*_bp += xdr_size(x);
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
deleted file mode 100644
index 0dcfc691e7e2..000000000000
--- a/fs/binfmt_aout.c
+++ /dev/null
@@ -1,342 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/fs/binfmt_aout.c
- *
- * Copyright (C) 1991, 1992, 1996 Linus Torvalds
- */
-
-#include <linux/module.h>
-
-#include <linux/time.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/mman.h>
-#include <linux/a.out.h>
-#include <linux/errno.h>
-#include <linux/signal.h>
-#include <linux/string.h>
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/stat.h>
-#include <linux/fcntl.h>
-#include <linux/ptrace.h>
-#include <linux/user.h>
-#include <linux/binfmts.h>
-#include <linux/personality.h>
-#include <linux/init.h>
-#include <linux/coredump.h>
-#include <linux/slab.h>
-#include <linux/sched/task_stack.h>
-
-#include <linux/uaccess.h>
-#include <asm/cacheflush.h>
-
-static int load_aout_binary(struct linux_binprm *);
-static int load_aout_library(struct file*);
-
-static struct linux_binfmt aout_format = {
- .module = THIS_MODULE,
- .load_binary = load_aout_binary,
- .load_shlib = load_aout_library,
-};
-
-#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
-
-static int set_brk(unsigned long start, unsigned long end)
-{
- start = PAGE_ALIGN(start);
- end = PAGE_ALIGN(end);
- if (end > start)
- return vm_brk(start, end - start);
- return 0;
-}
-
-/*
- * create_aout_tables() parses the env- and arg-strings in new user
- * memory and creates the pointer tables from them, and puts their
- * addresses on the "stack", returning the new stack pointer value.
- */
-static unsigned long __user *create_aout_tables(char __user *p, struct linux_binprm * bprm)
-{
- char __user * __user *argv;
- char __user * __user *envp;
- unsigned long __user *sp;
- int argc = bprm->argc;
- int envc = bprm->envc;
-
- sp = (void __user *)((-(unsigned long)sizeof(char *)) & (unsigned long) p);
-#ifdef __alpha__
-/* whee.. test-programs are so much fun. */
- put_user(0, --sp);
- put_user(0, --sp);
- if (bprm->loader) {
- put_user(0, --sp);
- put_user(1003, --sp);
- put_user(bprm->loader, --sp);
- put_user(1002, --sp);
- }
- put_user(bprm->exec, --sp);
- put_user(1001, --sp);
-#endif
- sp -= envc+1;
- envp = (char __user * __user *) sp;
- sp -= argc+1;
- argv = (char __user * __user *) sp;
-#ifndef __alpha__
- put_user((unsigned long) envp,--sp);
- put_user((unsigned long) argv,--sp);
-#endif
- put_user(argc,--sp);
- current->mm->arg_start = (unsigned long) p;
- while (argc-->0) {
- char c;
- put_user(p,argv++);
- do {
- get_user(c,p++);
- } while (c);
- }
- put_user(NULL,argv);
- current->mm->arg_end = current->mm->env_start = (unsigned long) p;
- while (envc-->0) {
- char c;
- put_user(p,envp++);
- do {
- get_user(c,p++);
- } while (c);
- }
- put_user(NULL,envp);
- current->mm->env_end = (unsigned long) p;
- return sp;
-}
-
-/*
- * These are the functions used to load a.out style executables and shared
- * libraries. There is no binary dependent code anywhere else.
- */
-
-static int load_aout_binary(struct linux_binprm * bprm)
-{
- struct pt_regs *regs = current_pt_regs();
- struct exec ex;
- unsigned long error;
- unsigned long fd_offset;
- unsigned long rlim;
- int retval;
-
- ex = *((struct exec *) bprm->buf); /* exec-header */
- if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC &&
- N_MAGIC(ex) != QMAGIC && N_MAGIC(ex) != NMAGIC) ||
- N_TRSIZE(ex) || N_DRSIZE(ex) ||
- i_size_read(file_inode(bprm->file)) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
- return -ENOEXEC;
- }
-
- /*
- * Requires a mmap handler. This prevents people from using a.out
- * as part of an exploit attack against /proc-related vulnerabilities.
- */
- if (!bprm->file->f_op->mmap)
- return -ENOEXEC;
-
- fd_offset = N_TXTOFF(ex);
-
- /* Check initial limits. This avoids letting people circumvent
- * size limits imposed on them by creating programs with large
- * arrays in the data or bss.
- */
- rlim = rlimit(RLIMIT_DATA);
- if (rlim >= RLIM_INFINITY)
- rlim = ~0;
- if (ex.a_data + ex.a_bss > rlim)
- return -ENOMEM;
-
- /* Flush all traces of the currently running executable */
- retval = begin_new_exec(bprm);
- if (retval)
- return retval;
-
- /* OK, This is the point of no return */
-#ifdef __alpha__
- SET_AOUT_PERSONALITY(bprm, ex);
-#else
- set_personality(PER_LINUX);
-#endif
- setup_new_exec(bprm);
-
- current->mm->end_code = ex.a_text +
- (current->mm->start_code = N_TXTADDR(ex));
- current->mm->end_data = ex.a_data +
- (current->mm->start_data = N_DATADDR(ex));
- current->mm->brk = ex.a_bss +
- (current->mm->start_brk = N_BSSADDR(ex));
-
- retval = setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT);
- if (retval < 0)
- return retval;
-
-
- if (N_MAGIC(ex) == OMAGIC) {
- unsigned long text_addr, map_size;
- loff_t pos;
-
- text_addr = N_TXTADDR(ex);
-
-#ifdef __alpha__
- pos = fd_offset;
- map_size = ex.a_text+ex.a_data + PAGE_SIZE - 1;
-#else
- pos = 32;
- map_size = ex.a_text+ex.a_data;
-#endif
- error = vm_brk(text_addr & PAGE_MASK, map_size);
- if (error)
- return error;
-
- error = read_code(bprm->file, text_addr, pos,
- ex.a_text+ex.a_data);
- if ((signed long)error < 0)
- return error;
- } else {
- if ((ex.a_text & 0xfff || ex.a_data & 0xfff) &&
- (N_MAGIC(ex) != NMAGIC) && printk_ratelimit())
- {
- printk(KERN_NOTICE "executable not page aligned\n");
- }
-
- if ((fd_offset & ~PAGE_MASK) != 0 && printk_ratelimit())
- {
- printk(KERN_WARNING
- "fd_offset is not page aligned. Please convert program: %pD\n",
- bprm->file);
- }
-
- if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) {
- error = vm_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
- if (error)
- return error;
-
- read_code(bprm->file, N_TXTADDR(ex), fd_offset,
- ex.a_text + ex.a_data);
- goto beyond_if;
- }
-
- error = vm_mmap(bprm->file, N_TXTADDR(ex), ex.a_text,
- PROT_READ | PROT_EXEC, MAP_FIXED | MAP_PRIVATE,
- fd_offset);
-
- if (error != N_TXTADDR(ex))
- return error;
-
- error = vm_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
- PROT_READ | PROT_WRITE | PROT_EXEC,
- MAP_FIXED | MAP_PRIVATE,
- fd_offset + ex.a_text);
- if (error != N_DATADDR(ex))
- return error;
- }
-beyond_if:
- set_binfmt(&aout_format);
-
- retval = set_brk(current->mm->start_brk, current->mm->brk);
- if (retval < 0)
- return retval;
-
- current->mm->start_stack =
- (unsigned long) create_aout_tables((char __user *) bprm->p, bprm);
-#ifdef __alpha__
- regs->gp = ex.a_gpvalue;
-#endif
- finalize_exec(bprm);
- start_thread(regs, ex.a_entry, current->mm->start_stack);
- return 0;
-}
-
-static int load_aout_library(struct file *file)
-{
- struct inode * inode;
- unsigned long bss, start_addr, len;
- unsigned long error;
- int retval;
- struct exec ex;
- loff_t pos = 0;
-
- inode = file_inode(file);
-
- retval = -ENOEXEC;
- error = kernel_read(file, &ex, sizeof(ex), &pos);
- if (error != sizeof(ex))
- goto out;
-
- /* We come in here for the regular a.out style of shared libraries */
- if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != QMAGIC) || N_TRSIZE(ex) ||
- N_DRSIZE(ex) || ((ex.a_entry & 0xfff) && N_MAGIC(ex) == ZMAGIC) ||
- i_size_read(inode) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
- goto out;
- }
-
- /*
- * Requires a mmap handler. This prevents people from using a.out
- * as part of an exploit attack against /proc-related vulnerabilities.
- */
- if (!file->f_op->mmap)
- goto out;
-
- if (N_FLAGS(ex))
- goto out;
-
- /* For QMAGIC, the starting address is 0x20 into the page. We mask
- this off to get the starting address for the page */
-
- start_addr = ex.a_entry & 0xfffff000;
-
- if ((N_TXTOFF(ex) & ~PAGE_MASK) != 0) {
- if (printk_ratelimit())
- {
- printk(KERN_WARNING
- "N_TXTOFF is not page aligned. Please convert library: %pD\n",
- file);
- }
- retval = vm_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
- if (retval)
- goto out;
-
- read_code(file, start_addr, N_TXTOFF(ex),
- ex.a_text + ex.a_data);
- retval = 0;
- goto out;
- }
- /* Now use mmap to map the library into memory. */
- error = vm_mmap(file, start_addr, ex.a_text + ex.a_data,
- PROT_READ | PROT_WRITE | PROT_EXEC,
- MAP_FIXED | MAP_PRIVATE,
- N_TXTOFF(ex));
- retval = error;
- if (error != start_addr)
- goto out;
-
- len = PAGE_ALIGN(ex.a_text + ex.a_data);
- bss = ex.a_text + ex.a_data + ex.a_bss;
- if (bss > len) {
- retval = vm_brk(start_addr + len, bss - len);
- if (retval)
- goto out;
- }
- retval = 0;
-out:
- return retval;
-}
-
-static int __init init_aout_binfmt(void)
-{
- register_binfmt(&aout_format);
- return 0;
-}
-
-static void __exit exit_aout_binfmt(void)
-{
- unregister_binfmt(&aout_format);
-}
-
-core_initcall(init_aout_binfmt);
-module_exit(exit_aout_binfmt);
-MODULE_LICENSE("GPL");
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index c3aecfb0a71d..e0375ba9d0fe 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -440,39 +440,26 @@ void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache,
btrfs_put_caching_control(caching_ctl);
}
-int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache)
+static int btrfs_caching_ctl_wait_done(struct btrfs_block_group *cache,
+ struct btrfs_caching_control *caching_ctl)
+{
+ wait_event(caching_ctl->wait, btrfs_block_group_done(cache));
+ return cache->cached == BTRFS_CACHE_ERROR ? -EIO : 0;
+}
+
+static int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache)
{
struct btrfs_caching_control *caching_ctl;
- int ret = 0;
+ int ret;
caching_ctl = btrfs_get_caching_control(cache);
if (!caching_ctl)
return (cache->cached == BTRFS_CACHE_ERROR) ? -EIO : 0;
-
- wait_event(caching_ctl->wait, btrfs_block_group_done(cache));
- if (cache->cached == BTRFS_CACHE_ERROR)
- ret = -EIO;
+ ret = btrfs_caching_ctl_wait_done(cache, caching_ctl);
btrfs_put_caching_control(caching_ctl);
return ret;
}
-static bool space_cache_v1_done(struct btrfs_block_group *cache)
-{
- bool ret;
-
- spin_lock(&cache->lock);
- ret = cache->cached != BTRFS_CACHE_FAST;
- spin_unlock(&cache->lock);
-
- return ret;
-}
-
-void btrfs_wait_space_cache_v1_finished(struct btrfs_block_group *cache,
- struct btrfs_caching_control *caching_ctl)
-{
- wait_event(caching_ctl->wait, space_cache_v1_done(cache));
-}
-
#ifdef CONFIG_BTRFS_DEBUG
static void fragment_free_space(struct btrfs_block_group *block_group)
{
@@ -750,9 +737,8 @@ done:
btrfs_put_block_group(block_group);
}
-int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only)
+int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait)
{
- DEFINE_WAIT(wait);
struct btrfs_fs_info *fs_info = cache->fs_info;
struct btrfs_caching_control *caching_ctl = NULL;
int ret = 0;
@@ -785,10 +771,7 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only
}
WARN_ON(cache->caching_ctl);
cache->caching_ctl = caching_ctl;
- if (btrfs_test_opt(fs_info, SPACE_CACHE))
- cache->cached = BTRFS_CACHE_FAST;
- else
- cache->cached = BTRFS_CACHE_STARTED;
+ cache->cached = BTRFS_CACHE_STARTED;
cache->has_caching_ctl = 1;
spin_unlock(&cache->lock);
@@ -801,8 +784,8 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only
btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work);
out:
- if (load_cache_only && caching_ctl)
- btrfs_wait_space_cache_v1_finished(cache, caching_ctl);
+ if (wait && caching_ctl)
+ ret = btrfs_caching_ctl_wait_done(cache, caching_ctl);
if (caching_ctl)
btrfs_put_caching_control(caching_ctl);
@@ -1640,9 +1623,11 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
div64_u64(zone_unusable * 100, bg->length));
trace_btrfs_reclaim_block_group(bg);
ret = btrfs_relocate_chunk(fs_info, bg->start);
- if (ret)
+ if (ret) {
+ btrfs_dec_block_group_ro(bg);
btrfs_err(fs_info, "error relocating chunk %llu",
bg->start);
+ }
next:
btrfs_put_block_group(bg);
@@ -3310,7 +3295,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
* space back to the block group, otherwise we will leak space.
*/
if (!alloc && !btrfs_block_group_done(cache))
- btrfs_cache_block_group(cache, 1);
+ btrfs_cache_block_group(cache, true);
byte_in_group = bytenr - cache->start;
WARN_ON(byte_in_group > cache->length);
diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h
index 35e0e860cc0b..6b3cdc4cbc41 100644
--- a/fs/btrfs/block-group.h
+++ b/fs/btrfs/block-group.h
@@ -263,9 +263,7 @@ void btrfs_dec_nocow_writers(struct btrfs_block_group *bg);
void btrfs_wait_nocow_writers(struct btrfs_block_group *bg);
void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache,
u64 num_bytes);
-int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache);
-int btrfs_cache_block_group(struct btrfs_block_group *cache,
- int load_cache_only);
+int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait);
void btrfs_put_caching_control(struct btrfs_caching_control *ctl);
struct btrfs_caching_control *btrfs_get_caching_control(
struct btrfs_block_group *cache);
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 6e556031a8f3..ebfa35fe1c38 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -2075,6 +2075,9 @@ cow_done:
if (!p->skip_locking) {
level = btrfs_header_level(b);
+
+ btrfs_maybe_reset_lockdep_class(root, b);
+
if (level <= write_lock_level) {
btrfs_tree_lock(b);
p->locks[level] = BTRFS_WRITE_LOCK;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 4db85b9dc7ed..df8c99c99df9 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -505,7 +505,6 @@ struct btrfs_free_cluster {
enum btrfs_caching_type {
BTRFS_CACHE_NO,
BTRFS_CACHE_STARTED,
- BTRFS_CACHE_FAST,
BTRFS_CACHE_FINISHED,
BTRFS_CACHE_ERROR,
};
@@ -1089,8 +1088,6 @@ struct btrfs_fs_info {
spinlock_t zone_active_bgs_lock;
struct list_head zone_active_bgs;
- /* Waiters when BTRFS_FS_NEED_ZONE_FINISH is set */
- wait_queue_head_t zone_finish_wait;
/* Updates are not protected by any lock */
struct btrfs_commit_stats commit_stats;
@@ -1173,6 +1170,8 @@ enum {
BTRFS_ROOT_ORPHAN_CLEANUP,
/* This root has a drop operation that was started previously. */
BTRFS_ROOT_UNFINISHED_DROP,
+ /* This reloc root needs to have its buffers lockdep class reset. */
+ BTRFS_ROOT_RESET_LOCKDEP_CLASS,
};
static inline void btrfs_wake_unfinished_drop(struct btrfs_fs_info *fs_info)
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index f43196a893ca..41cddd3ff059 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -165,7 +165,7 @@ no_valid_dev_replace_entry_found:
*/
if (btrfs_find_device(fs_info->fs_devices, &args)) {
btrfs_err(fs_info,
- "replace devid present without an active replace item");
+"replace without active item, run 'device scan --forget' on the target device");
ret = -EUCLEAN;
} else {
dev_replace->srcdev = NULL;
@@ -1129,8 +1129,7 @@ int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
up_write(&dev_replace->rwsem);
/* Scrub for replace must not be running in suspended state */
- ret = btrfs_scrub_cancel(fs_info);
- ASSERT(ret != -ENOTCONN);
+ btrfs_scrub_cancel(fs_info);
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 4c3166f3c725..2633137c3e9f 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -87,88 +87,6 @@ struct async_submit_bio {
};
/*
- * Lockdep class keys for extent_buffer->lock's in this root. For a given
- * eb, the lockdep key is determined by the btrfs_root it belongs to and
- * the level the eb occupies in the tree.
- *
- * Different roots are used for different purposes and may nest inside each
- * other and they require separate keysets. As lockdep keys should be
- * static, assign keysets according to the purpose of the root as indicated
- * by btrfs_root->root_key.objectid. This ensures that all special purpose
- * roots have separate keysets.
- *
- * Lock-nesting across peer nodes is always done with the immediate parent
- * node locked thus preventing deadlock. As lockdep doesn't know this, use
- * subclass to avoid triggering lockdep warning in such cases.
- *
- * The key is set by the readpage_end_io_hook after the buffer has passed
- * csum validation but before the pages are unlocked. It is also set by
- * btrfs_init_new_buffer on freshly allocated blocks.
- *
- * We also add a check to make sure the highest level of the tree is the
- * same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this code
- * needs update as well.
- */
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# if BTRFS_MAX_LEVEL != 8
-# error
-# endif
-
-#define DEFINE_LEVEL(stem, level) \
- .names[level] = "btrfs-" stem "-0" #level,
-
-#define DEFINE_NAME(stem) \
- DEFINE_LEVEL(stem, 0) \
- DEFINE_LEVEL(stem, 1) \
- DEFINE_LEVEL(stem, 2) \
- DEFINE_LEVEL(stem, 3) \
- DEFINE_LEVEL(stem, 4) \
- DEFINE_LEVEL(stem, 5) \
- DEFINE_LEVEL(stem, 6) \
- DEFINE_LEVEL(stem, 7)
-
-static struct btrfs_lockdep_keyset {
- u64 id; /* root objectid */
- /* Longest entry: btrfs-free-space-00 */
- char names[BTRFS_MAX_LEVEL][20];
- struct lock_class_key keys[BTRFS_MAX_LEVEL];
-} btrfs_lockdep_keysets[] = {
- { .id = BTRFS_ROOT_TREE_OBJECTID, DEFINE_NAME("root") },
- { .id = BTRFS_EXTENT_TREE_OBJECTID, DEFINE_NAME("extent") },
- { .id = BTRFS_CHUNK_TREE_OBJECTID, DEFINE_NAME("chunk") },
- { .id = BTRFS_DEV_TREE_OBJECTID, DEFINE_NAME("dev") },
- { .id = BTRFS_CSUM_TREE_OBJECTID, DEFINE_NAME("csum") },
- { .id = BTRFS_QUOTA_TREE_OBJECTID, DEFINE_NAME("quota") },
- { .id = BTRFS_TREE_LOG_OBJECTID, DEFINE_NAME("log") },
- { .id = BTRFS_TREE_RELOC_OBJECTID, DEFINE_NAME("treloc") },
- { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, DEFINE_NAME("dreloc") },
- { .id = BTRFS_UUID_TREE_OBJECTID, DEFINE_NAME("uuid") },
- { .id = BTRFS_FREE_SPACE_TREE_OBJECTID, DEFINE_NAME("free-space") },
- { .id = 0, DEFINE_NAME("tree") },
-};
-
-#undef DEFINE_LEVEL
-#undef DEFINE_NAME
-
-void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb,
- int level)
-{
- struct btrfs_lockdep_keyset *ks;
-
- BUG_ON(level >= ARRAY_SIZE(ks->keys));
-
- /* find the matching keyset, id 0 is the default entry */
- for (ks = btrfs_lockdep_keysets; ks->id; ks++)
- if (ks->id == objectid)
- break;
-
- lockdep_set_class_and_name(&eb->lock,
- &ks->keys[level], ks->names[level]);
-}
-
-#endif
-
-/*
* Compute the csum of a btree block and store the result to provided buffer.
*/
static void csum_tree_block(struct extent_buffer *buf, u8 *result)
@@ -3150,7 +3068,6 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
init_waitqueue_head(&fs_info->transaction_blocked_wait);
init_waitqueue_head(&fs_info->async_submit_wait);
init_waitqueue_head(&fs_info->delayed_iputs_wait);
- init_waitqueue_head(&fs_info->zone_finish_wait);
/* Usable values until the real ones are cached from the superblock */
fs_info->nodesize = 4096;
@@ -4558,6 +4475,17 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
set_bit(BTRFS_FS_CLOSING_START, &fs_info->flags);
/*
+ * If we had UNFINISHED_DROPS we could still be processing them, so
+ * clear that bit and wake up relocation so it can stop.
+ * We must do this before stopping the block group reclaim task, because
+ * at btrfs_relocate_block_group() we wait for this bit, and after the
+ * wait we stop with -EINTR if btrfs_fs_closing() returns non-zero - we
+ * have just set BTRFS_FS_CLOSING_START, so btrfs_fs_closing() will
+ * return 1.
+ */
+ btrfs_wake_unfinished_drop(fs_info);
+
+ /*
* We may have the reclaim task running and relocating a data block group,
* in which case it may create delayed iputs. So stop it before we park
* the cleaner kthread otherwise we can get new delayed iputs after
@@ -4575,12 +4503,6 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
*/
kthread_park(fs_info->cleaner_kthread);
- /*
- * If we had UNFINISHED_DROPS we could still be processing them, so
- * clear that bit and wake up relocation so it can stop.
- */
- btrfs_wake_unfinished_drop(fs_info);
-
/* wait for the qgroup rescan worker to stop */
btrfs_qgroup_wait_for_completion(fs_info, false);
@@ -4603,6 +4525,31 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
/* clear out the rbtree of defraggable inodes */
btrfs_cleanup_defrag_inodes(fs_info);
+ /*
+ * After we parked the cleaner kthread, ordered extents may have
+ * completed and created new delayed iputs. If one of the async reclaim
+ * tasks is running and in the RUN_DELAYED_IPUTS flush state, then we
+ * can hang forever trying to stop it, because if a delayed iput is
+ * added after it ran btrfs_run_delayed_iputs() and before it called
+ * btrfs_wait_on_delayed_iputs(), it will hang forever since there is
+ * no one else to run iputs.
+ *
+ * So wait for all ongoing ordered extents to complete and then run
+ * delayed iputs. This works because once we reach this point no one
+ * can either create new ordered extents nor create delayed iputs
+ * through some other means.
+ *
+ * Also note that btrfs_wait_ordered_roots() is not safe here, because
+ * it waits for BTRFS_ORDERED_COMPLETE to be set on an ordered extent,
+ * but the delayed iput for the respective inode is made only when doing
+ * the final btrfs_put_ordered_extent() (which must happen at
+ * btrfs_finish_ordered_io() when we are unmounting).
+ */
+ btrfs_flush_workqueue(fs_info->endio_write_workers);
+ /* Ordered extents for free space inodes. */
+ btrfs_flush_workqueue(fs_info->endio_freespace_worker);
+ btrfs_run_delayed_iputs(fs_info);
+
cancel_work_sync(&fs_info->async_reclaim_work);
cancel_work_sync(&fs_info->async_data_reclaim_work);
cancel_work_sync(&fs_info->preempt_reclaim_work);
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 8993b428e09c..47ad8e0a2d33 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -137,14 +137,4 @@ int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags);
int btrfs_get_free_objectid(struct btrfs_root *root, u64 *objectid);
int btrfs_init_root_free_objectid(struct btrfs_root *root);
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-void btrfs_set_buffer_lockdep_class(u64 objectid,
- struct extent_buffer *eb, int level);
-#else
-static inline void btrfs_set_buffer_lockdep_class(u64 objectid,
- struct extent_buffer *eb, int level)
-{
-}
-#endif
-
#endif
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index ea3ec1e761e8..6914cd8024ba 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2551,17 +2551,10 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
return -EINVAL;
/*
- * pull in the free space cache (if any) so that our pin
- * removes the free space from the cache. We have load_only set
- * to one because the slow code to read in the free extents does check
- * the pinned extents.
+ * Fully cache the free space first so that our pin removes the free space
+ * from the cache.
*/
- btrfs_cache_block_group(cache, 1);
- /*
- * Make sure we wait until the cache is completely built in case it is
- * missing or is invalid and therefore needs to be rebuilt.
- */
- ret = btrfs_wait_block_group_cache_done(cache);
+ ret = btrfs_cache_block_group(cache, true);
if (ret)
goto out;
@@ -2584,12 +2577,7 @@ static int __exclude_logged_extent(struct btrfs_fs_info *fs_info,
if (!block_group)
return -EINVAL;
- btrfs_cache_block_group(block_group, 1);
- /*
- * Make sure we wait until the cache is completely built in case it is
- * missing or is invalid and therefore needs to be rebuilt.
- */
- ret = btrfs_wait_block_group_cache_done(block_group);
+ ret = btrfs_cache_block_group(block_group, true);
if (ret)
goto out;
@@ -4399,7 +4387,7 @@ have_block_group:
ffe_ctl->cached = btrfs_block_group_done(block_group);
if (unlikely(!ffe_ctl->cached)) {
ffe_ctl->have_caching_bg = true;
- ret = btrfs_cache_block_group(block_group, 0);
+ ret = btrfs_cache_block_group(block_group, false);
/*
* If we get ENOMEM here or something else we want to
@@ -4867,6 +4855,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_buffer *buf;
+ u64 lockdep_owner = owner;
buf = btrfs_find_create_tree_block(fs_info, bytenr, owner, level);
if (IS_ERR(buf))
@@ -4886,11 +4875,26 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
}
/*
+ * The reloc trees are just snapshots, so we need them to appear to be
+ * just like any other fs tree WRT lockdep.
+ *
+ * The exception however is in replace_path() in relocation, where we
+ * hold the lock on the original fs root and then search for the reloc
+ * root. At that point we need to make sure any reloc root buffers are
+ * set to the BTRFS_TREE_RELOC_OBJECTID lockdep class in order to make
+ * lockdep happy.
+ */
+ if (lockdep_owner == BTRFS_TREE_RELOC_OBJECTID &&
+ !test_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &root->state))
+ lockdep_owner = BTRFS_FS_TREE_OBJECTID;
+
+ /*
* This needs to stay, because we could allocate a freed block from an
* old tree into a new tree, so we need to make sure this new block is
* set to the appropriate level and owner.
*/
- btrfs_set_buffer_lockdep_class(owner, buf, level);
+ btrfs_set_buffer_lockdep_class(lockdep_owner, buf, level);
+
__btrfs_tree_lock(buf, nest);
btrfs_clean_tree_block(buf);
clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
@@ -6153,13 +6157,7 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
if (end - start >= range->minlen) {
if (!btrfs_block_group_done(cache)) {
- ret = btrfs_cache_block_group(cache, 0);
- if (ret) {
- bg_failed++;
- bg_ret = ret;
- continue;
- }
- ret = btrfs_wait_block_group_cache_done(cache);
+ ret = btrfs_cache_block_group(cache, true);
if (ret) {
bg_failed++;
bg_ret = ret;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index bfae67c593c5..cf4f19e80e2f 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3233,7 +3233,7 @@ static int btrfs_bio_add_page(struct btrfs_bio_ctrl *bio_ctrl,
u32 bio_size = bio->bi_iter.bi_size;
u32 real_size;
const sector_t sector = disk_bytenr >> SECTOR_SHIFT;
- bool contig;
+ bool contig = false;
int ret;
ASSERT(bio);
@@ -3242,10 +3242,35 @@ static int btrfs_bio_add_page(struct btrfs_bio_ctrl *bio_ctrl,
if (bio_ctrl->compress_type != compress_type)
return 0;
- if (bio_ctrl->compress_type != BTRFS_COMPRESS_NONE)
+
+ if (bio->bi_iter.bi_size == 0) {
+ /* We can always add a page into an empty bio. */
+ contig = true;
+ } else if (bio_ctrl->compress_type == BTRFS_COMPRESS_NONE) {
+ struct bio_vec *bvec = bio_last_bvec_all(bio);
+
+ /*
+ * The contig check requires the following conditions to be met:
+ * 1) The pages are belonging to the same inode
+ * This is implied by the call chain.
+ *
+ * 2) The range has adjacent logical bytenr
+ *
+ * 3) The range has adjacent file offset
+ * This is required for the usage of btrfs_bio->file_offset.
+ */
+ if (bio_end_sector(bio) == sector &&
+ page_offset(bvec->bv_page) + bvec->bv_offset +
+ bvec->bv_len == page_offset(page) + pg_offset)
+ contig = true;
+ } else {
+ /*
+ * For compression, all IO should have its logical bytenr
+ * set to the starting bytenr of the compressed extent.
+ */
contig = bio->bi_iter.bi_sector == sector;
- else
- contig = bio_end_sector(bio) == sector;
+ }
+
if (!contig)
return 0;
@@ -6140,6 +6165,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
struct extent_buffer *exists = NULL;
struct page *p;
struct address_space *mapping = fs_info->btree_inode->i_mapping;
+ u64 lockdep_owner = owner_root;
int uptodate = 1;
int ret;
@@ -6164,7 +6190,15 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
eb = __alloc_extent_buffer(fs_info, start, len);
if (!eb)
return ERR_PTR(-ENOMEM);
- btrfs_set_buffer_lockdep_class(owner_root, eb, level);
+
+ /*
+ * The reloc trees are just snapshots, so we need them to appear to be
+ * just like any other fs tree WRT lockdep.
+ */
+ if (lockdep_owner == BTRFS_TREE_RELOC_OBJECTID)
+ lockdep_owner = BTRFS_FS_TREE_OBJECTID;
+
+ btrfs_set_buffer_lockdep_class(lockdep_owner, eb, level);
num_pages = num_extent_pages(eb);
for (i = 0; i < num_pages; i++, index++) {
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 66c822182ecc..5a3f6e0d9688 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2482,6 +2482,7 @@ static int fill_holes(struct btrfs_trans_handle *trans,
btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
btrfs_set_file_extent_offset(leaf, fi, 0);
+ btrfs_set_file_extent_generation(leaf, fi, trans->transid);
btrfs_mark_buffer_dirty(leaf);
goto out;
}
@@ -2498,6 +2499,7 @@ static int fill_holes(struct btrfs_trans_handle *trans,
btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
btrfs_set_file_extent_offset(leaf, fi, 0);
+ btrfs_set_file_extent_generation(leaf, fi, trans->transid);
btrfs_mark_buffer_dirty(leaf);
goto out;
}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index f0c97d25b4a0..1372210869b1 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1644,10 +1644,9 @@ static noinline int run_delalloc_zoned(struct btrfs_inode *inode,
done_offset = end;
if (done_offset == start) {
- struct btrfs_fs_info *info = inode->root->fs_info;
-
- wait_var_event(&info->zone_finish_wait,
- !test_bit(BTRFS_FS_NEED_ZONE_FINISH, &info->flags));
+ wait_on_bit_io(&inode->root->fs_info->flags,
+ BTRFS_FS_NEED_ZONE_FINISH,
+ TASK_UNINTERRUPTIBLE);
continue;
}
@@ -7694,6 +7693,20 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
bool unlock_extents = false;
/*
+ * We could potentially fault if we have a buffer > PAGE_SIZE, and if
+ * we're NOWAIT we may submit a bio for a partial range and return
+ * EIOCBQUEUED, which would result in an errant short read.
+ *
+ * The best way to handle this would be to allow for partial completions
+ * of iocb's, so we could submit the partial bio, return and fault in
+ * the rest of the pages, and then submit the io for the rest of the
+ * range. However we don't have that currently, so simply return
+ * -EAGAIN at this point so that the normal path is used.
+ */
+ if (!write && (flags & IOMAP_NOWAIT) && length > PAGE_SIZE)
+ return -EAGAIN;
+
+ /*
* Cap the size of reads to that usually seen in buffered I/O as we need
* to allocate a contiguous array for the checksums.
*/
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index 33461b4f9c8b..9063072b399b 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -14,6 +14,93 @@
#include "locking.h"
/*
+ * Lockdep class keys for extent_buffer->lock's in this root. For a given
+ * eb, the lockdep key is determined by the btrfs_root it belongs to and
+ * the level the eb occupies in the tree.
+ *
+ * Different roots are used for different purposes and may nest inside each
+ * other and they require separate keysets. As lockdep keys should be
+ * static, assign keysets according to the purpose of the root as indicated
+ * by btrfs_root->root_key.objectid. This ensures that all special purpose
+ * roots have separate keysets.
+ *
+ * Lock-nesting across peer nodes is always done with the immediate parent
+ * node locked thus preventing deadlock. As lockdep doesn't know this, use
+ * subclass to avoid triggering lockdep warning in such cases.
+ *
+ * The key is set by the readpage_end_io_hook after the buffer has passed
+ * csum validation but before the pages are unlocked. It is also set by
+ * btrfs_init_new_buffer on freshly allocated blocks.
+ *
+ * We also add a check to make sure the highest level of the tree is the
+ * same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this code
+ * needs update as well.
+ */
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+#if BTRFS_MAX_LEVEL != 8
+#error
+#endif
+
+#define DEFINE_LEVEL(stem, level) \
+ .names[level] = "btrfs-" stem "-0" #level,
+
+#define DEFINE_NAME(stem) \
+ DEFINE_LEVEL(stem, 0) \
+ DEFINE_LEVEL(stem, 1) \
+ DEFINE_LEVEL(stem, 2) \
+ DEFINE_LEVEL(stem, 3) \
+ DEFINE_LEVEL(stem, 4) \
+ DEFINE_LEVEL(stem, 5) \
+ DEFINE_LEVEL(stem, 6) \
+ DEFINE_LEVEL(stem, 7)
+
+static struct btrfs_lockdep_keyset {
+ u64 id; /* root objectid */
+ /* Longest entry: btrfs-free-space-00 */
+ char names[BTRFS_MAX_LEVEL][20];
+ struct lock_class_key keys[BTRFS_MAX_LEVEL];
+} btrfs_lockdep_keysets[] = {
+ { .id = BTRFS_ROOT_TREE_OBJECTID, DEFINE_NAME("root") },
+ { .id = BTRFS_EXTENT_TREE_OBJECTID, DEFINE_NAME("extent") },
+ { .id = BTRFS_CHUNK_TREE_OBJECTID, DEFINE_NAME("chunk") },
+ { .id = BTRFS_DEV_TREE_OBJECTID, DEFINE_NAME("dev") },
+ { .id = BTRFS_CSUM_TREE_OBJECTID, DEFINE_NAME("csum") },
+ { .id = BTRFS_QUOTA_TREE_OBJECTID, DEFINE_NAME("quota") },
+ { .id = BTRFS_TREE_LOG_OBJECTID, DEFINE_NAME("log") },
+ { .id = BTRFS_TREE_RELOC_OBJECTID, DEFINE_NAME("treloc") },
+ { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, DEFINE_NAME("dreloc") },
+ { .id = BTRFS_UUID_TREE_OBJECTID, DEFINE_NAME("uuid") },
+ { .id = BTRFS_FREE_SPACE_TREE_OBJECTID, DEFINE_NAME("free-space") },
+ { .id = 0, DEFINE_NAME("tree") },
+};
+
+#undef DEFINE_LEVEL
+#undef DEFINE_NAME
+
+void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb, int level)
+{
+ struct btrfs_lockdep_keyset *ks;
+
+ BUG_ON(level >= ARRAY_SIZE(ks->keys));
+
+ /* Find the matching keyset, id 0 is the default entry */
+ for (ks = btrfs_lockdep_keysets; ks->id; ks++)
+ if (ks->id == objectid)
+ break;
+
+ lockdep_set_class_and_name(&eb->lock, &ks->keys[level], ks->names[level]);
+}
+
+void btrfs_maybe_reset_lockdep_class(struct btrfs_root *root, struct extent_buffer *eb)
+{
+ if (test_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &root->state))
+ btrfs_set_buffer_lockdep_class(root->root_key.objectid,
+ eb, btrfs_header_level(eb));
+}
+
+#endif
+
+/*
* Extent buffer locking
* =====================
*
@@ -164,6 +251,8 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
while (1) {
eb = btrfs_root_node(root);
+
+ btrfs_maybe_reset_lockdep_class(root, eb);
btrfs_tree_lock(eb);
if (eb == root->node)
break;
@@ -185,6 +274,8 @@ struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
while (1) {
eb = btrfs_root_node(root);
+
+ btrfs_maybe_reset_lockdep_class(root, eb);
btrfs_tree_read_lock(eb);
if (eb == root->node)
break;
diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h
index bbc45534ae9a..ab268be09bb5 100644
--- a/fs/btrfs/locking.h
+++ b/fs/btrfs/locking.h
@@ -131,4 +131,18 @@ void btrfs_drew_write_unlock(struct btrfs_drew_lock *lock);
void btrfs_drew_read_lock(struct btrfs_drew_lock *lock);
void btrfs_drew_read_unlock(struct btrfs_drew_lock *lock);
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb, int level);
+void btrfs_maybe_reset_lockdep_class(struct btrfs_root *root, struct extent_buffer *eb);
+#else
+static inline void btrfs_set_buffer_lockdep_class(u64 objectid,
+ struct extent_buffer *eb, int level)
+{
+}
+static inline void btrfs_maybe_reset_lockdep_class(struct btrfs_root *root,
+ struct extent_buffer *eb)
+{
+}
+#endif
+
#endif
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index a6dc827e75af..45c02aba2492 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1326,7 +1326,9 @@ again:
btrfs_release_path(path);
path->lowest_level = level;
+ set_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &src->state);
ret = btrfs_search_slot(trans, src, &key, path, 0, 1);
+ clear_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &src->state);
path->lowest_level = 0;
if (ret) {
if (ret > 0)
@@ -3573,7 +3575,12 @@ int prepare_to_relocate(struct reloc_control *rc)
*/
return PTR_ERR(trans);
}
- return btrfs_commit_transaction(trans);
+
+ ret = btrfs_commit_transaction(trans);
+ if (ret)
+ unset_reloc_control(rc);
+
+ return ret;
}
static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index a64b26b16904..d647cb2938c0 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -349,9 +349,10 @@ int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
key.offset = ref_id;
again:
ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
- if (ret < 0)
+ if (ret < 0) {
+ err = ret;
goto out;
- if (ret == 0) {
+ } else if (ret == 0) {
leaf = path->nodes[0];
ref = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_root_ref);
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index d0cbeb7ae81c..435559ba94fa 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -199,7 +199,7 @@ static u64 calc_chunk_size(const struct btrfs_fs_info *fs_info, u64 flags)
ASSERT(flags & BTRFS_BLOCK_GROUP_TYPE_MASK);
if (flags & BTRFS_BLOCK_GROUP_DATA)
- return SZ_1G;
+ return BTRFS_MAX_DATA_CHUNK_SIZE;
else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
return SZ_32M;
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index 9e0e0ae2288c..43f905ab0a18 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -1233,7 +1233,8 @@ static void extent_err(const struct extent_buffer *eb, int slot,
}
static int check_extent_item(struct extent_buffer *leaf,
- struct btrfs_key *key, int slot)
+ struct btrfs_key *key, int slot,
+ struct btrfs_key *prev_key)
{
struct btrfs_fs_info *fs_info = leaf->fs_info;
struct btrfs_extent_item *ei;
@@ -1453,6 +1454,26 @@ static int check_extent_item(struct extent_buffer *leaf,
total_refs, inline_refs);
return -EUCLEAN;
}
+
+ if ((prev_key->type == BTRFS_EXTENT_ITEM_KEY) ||
+ (prev_key->type == BTRFS_METADATA_ITEM_KEY)) {
+ u64 prev_end = prev_key->objectid;
+
+ if (prev_key->type == BTRFS_METADATA_ITEM_KEY)
+ prev_end += fs_info->nodesize;
+ else
+ prev_end += prev_key->offset;
+
+ if (unlikely(prev_end > key->objectid)) {
+ extent_err(leaf, slot,
+ "previous extent [%llu %u %llu] overlaps current extent [%llu %u %llu]",
+ prev_key->objectid, prev_key->type,
+ prev_key->offset, key->objectid, key->type,
+ key->offset);
+ return -EUCLEAN;
+ }
+ }
+
return 0;
}
@@ -1621,7 +1642,7 @@ static int check_leaf_item(struct extent_buffer *leaf,
break;
case BTRFS_EXTENT_ITEM_KEY:
case BTRFS_METADATA_ITEM_KEY:
- ret = check_extent_item(leaf, key, slot);
+ ret = check_extent_item(leaf, key, slot, prev_key);
break;
case BTRFS_TREE_BLOCK_REF_KEY:
case BTRFS_SHARED_DATA_REF_KEY:
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index dcf75a8daa20..9205c4a5ca81 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1146,7 +1146,9 @@ again:
extref = btrfs_lookup_inode_extref(NULL, root, path, name, namelen,
inode_objectid, parent_objectid, 0,
0);
- if (!IS_ERR_OR_NULL(extref)) {
+ if (IS_ERR(extref)) {
+ return PTR_ERR(extref);
+ } else if (extref) {
u32 item_size;
u32 cur_offset = 0;
unsigned long base;
@@ -1457,7 +1459,7 @@ static int add_link(struct btrfs_trans_handle *trans,
* on the inode will not free it. We will fixup the link count later.
*/
if (other_inode->i_nlink == 0)
- inc_nlink(other_inode);
+ set_nlink(other_inode, 1);
add_link:
ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode),
name, namelen, 0, ref_index);
@@ -1600,7 +1602,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
* free it. We will fixup the link count later.
*/
if (!ret && inode->i_nlink == 0)
- inc_nlink(inode);
+ set_nlink(inode, 1);
}
if (ret < 0)
goto out;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 272901514b0c..f63ff91e2883 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2345,8 +2345,11 @@ int btrfs_get_dev_args_from_path(struct btrfs_fs_info *fs_info,
ret = btrfs_get_bdev_and_sb(path, FMODE_READ, fs_info->bdev_holder, 0,
&bdev, &disk_super);
- if (ret)
+ if (ret) {
+ btrfs_put_dev_args_from_path(args);
return ret;
+ }
+
args->devid = btrfs_stack_device_id(&disk_super->dev_item);
memcpy(args->uuid, disk_super->dev_item.uuid, BTRFS_UUID_SIZE);
if (btrfs_fs_incompat(fs_info, METADATA_UUID))
@@ -5264,6 +5267,9 @@ static int decide_stripe_size_regular(struct alloc_chunk_ctl *ctl,
ctl->stripe_size);
}
+ /* Stripe size should not go beyond 1G. */
+ ctl->stripe_size = min_t(u64, ctl->stripe_size, SZ_1G);
+
/* Align to BTRFS_STRIPE_LEN */
ctl->stripe_size = round_down(ctl->stripe_size, BTRFS_STRIPE_LEN);
ctl->chunk_size = ctl->stripe_size * data_stripes;
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 7421abcf325a..5bb8d8c86311 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -371,6 +371,9 @@ static int btrfs_xattr_handler_set(const struct xattr_handler *handler,
const char *name, const void *buffer,
size_t size, int flags)
{
+ if (btrfs_root_readonly(BTRFS_I(inode)->root))
+ return -EROFS;
+
name = xattr_full_name(handler, name);
return btrfs_setxattr_trans(inode, name, buffer, size, flags);
}
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index b150b07ba1a7..73c6929f7be6 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -421,10 +421,19 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
* since btrfs adds the pages one by one to a bio, and btrfs cannot
* increase the metadata reservation even if it increases the number of
* extents, it is safe to stick with the limit.
+ *
+ * With the zoned emulation, we can have non-zoned device on the zoned
+ * mode. In this case, we don't have a valid max zone append size. So,
+ * use max_segments * PAGE_SIZE as the pseudo max_zone_append_size.
*/
- zone_info->max_zone_append_size =
- min_t(u64, (u64)bdev_max_zone_append_sectors(bdev) << SECTOR_SHIFT,
- (u64)bdev_max_segments(bdev) << PAGE_SHIFT);
+ if (bdev_is_zoned(bdev)) {
+ zone_info->max_zone_append_size = min_t(u64,
+ (u64)bdev_max_zone_append_sectors(bdev) << SECTOR_SHIFT,
+ (u64)bdev_max_segments(bdev) << PAGE_SHIFT);
+ } else {
+ zone_info->max_zone_append_size =
+ (u64)bdev_max_segments(bdev) << PAGE_SHIFT;
+ }
if (!IS_ALIGNED(nr_sectors, zone_sectors))
zone_info->nr_zones++;
@@ -1178,7 +1187,7 @@ int btrfs_ensure_empty_zones(struct btrfs_device *device, u64 start, u64 size)
* offset.
*/
static int calculate_alloc_pointer(struct btrfs_block_group *cache,
- u64 *offset_ret)
+ u64 *offset_ret, bool new)
{
struct btrfs_fs_info *fs_info = cache->fs_info;
struct btrfs_root *root;
@@ -1188,6 +1197,21 @@ static int calculate_alloc_pointer(struct btrfs_block_group *cache,
int ret;
u64 length;
+ /*
+ * Avoid tree lookups for a new block group, there's no use for it.
+ * It must always be 0.
+ *
+ * Also, we have a lock chain of extent buffer lock -> chunk mutex.
+ * For new a block group, this function is called from
+ * btrfs_make_block_group() which is already taking the chunk mutex.
+ * Thus, we cannot call calculate_alloc_pointer() which takes extent
+ * buffer locks to avoid deadlock.
+ */
+ if (new) {
+ *offset_ret = 0;
+ return 0;
+ }
+
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
@@ -1323,6 +1347,13 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
else
num_conventional++;
+ /*
+ * Consider a zone as active if we can allow any number of
+ * active zones.
+ */
+ if (!device->zone_info->max_active_zones)
+ __set_bit(i, active);
+
if (!is_sequential) {
alloc_offsets[i] = WP_CONVENTIONAL;
continue;
@@ -1389,45 +1420,23 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
__set_bit(i, active);
break;
}
-
- /*
- * Consider a zone as active if we can allow any number of
- * active zones.
- */
- if (!device->zone_info->max_active_zones)
- __set_bit(i, active);
}
if (num_sequential > 0)
cache->seq_zone = true;
if (num_conventional > 0) {
- /*
- * Avoid calling calculate_alloc_pointer() for new BG. It
- * is no use for new BG. It must be always 0.
- *
- * Also, we have a lock chain of extent buffer lock ->
- * chunk mutex. For new BG, this function is called from
- * btrfs_make_block_group() which is already taking the
- * chunk mutex. Thus, we cannot call
- * calculate_alloc_pointer() which takes extent buffer
- * locks to avoid deadlock.
- */
-
/* Zone capacity is always zone size in emulation */
cache->zone_capacity = cache->length;
- if (new) {
- cache->alloc_offset = 0;
- goto out;
- }
- ret = calculate_alloc_pointer(cache, &last_alloc);
- if (ret || map->num_stripes == num_conventional) {
- if (!ret)
- cache->alloc_offset = last_alloc;
- else
- btrfs_err(fs_info,
+ ret = calculate_alloc_pointer(cache, &last_alloc, new);
+ if (ret) {
+ btrfs_err(fs_info,
"zoned: failed to determine allocation offset of bg %llu",
- cache->start);
+ cache->start);
+ goto out;
+ } else if (map->num_stripes == num_conventional) {
+ cache->alloc_offset = last_alloc;
+ cache->zone_is_active = 1;
goto out;
}
}
@@ -1495,13 +1504,6 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
goto out;
}
- if (cache->zone_is_active) {
- btrfs_get_block_group(cache);
- spin_lock(&fs_info->zone_active_bgs_lock);
- list_add_tail(&cache->active_bg_list, &fs_info->zone_active_bgs);
- spin_unlock(&fs_info->zone_active_bgs_lock);
- }
-
out:
if (cache->alloc_offset > fs_info->zone_size) {
btrfs_err(fs_info,
@@ -1526,10 +1528,16 @@ out:
ret = -EIO;
}
- if (!ret)
+ if (!ret) {
cache->meta_write_pointer = cache->alloc_offset + cache->start;
-
- if (ret) {
+ if (cache->zone_is_active) {
+ btrfs_get_block_group(cache);
+ spin_lock(&fs_info->zone_active_bgs_lock);
+ list_add_tail(&cache->active_bg_list,
+ &fs_info->zone_active_bgs);
+ spin_unlock(&fs_info->zone_active_bgs_lock);
+ }
+ } else {
kfree(cache->physical_map);
cache->physical_map = NULL;
}
@@ -1910,10 +1918,44 @@ out_unlock:
return ret;
}
+static void wait_eb_writebacks(struct btrfs_block_group *block_group)
+{
+ struct btrfs_fs_info *fs_info = block_group->fs_info;
+ const u64 end = block_group->start + block_group->length;
+ struct radix_tree_iter iter;
+ struct extent_buffer *eb;
+ void __rcu **slot;
+
+ rcu_read_lock();
+ radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter,
+ block_group->start >> fs_info->sectorsize_bits) {
+ eb = radix_tree_deref_slot(slot);
+ if (!eb)
+ continue;
+ if (radix_tree_deref_retry(eb)) {
+ slot = radix_tree_iter_retry(&iter);
+ continue;
+ }
+
+ if (eb->start < block_group->start)
+ continue;
+ if (eb->start >= end)
+ break;
+
+ slot = radix_tree_iter_resume(slot, &iter);
+ rcu_read_unlock();
+ wait_on_extent_buffer_writeback(eb);
+ rcu_read_lock();
+ }
+ rcu_read_unlock();
+}
+
static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_written)
{
struct btrfs_fs_info *fs_info = block_group->fs_info;
struct map_lookup *map;
+ const bool is_metadata = (block_group->flags &
+ (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM));
int ret = 0;
int i;
@@ -1924,8 +1966,7 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
}
/* Check if we have unwritten allocated space */
- if ((block_group->flags &
- (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)) &&
+ if (is_metadata &&
block_group->start + block_group->alloc_offset > block_group->meta_write_pointer) {
spin_unlock(&block_group->lock);
return -EAGAIN;
@@ -1950,6 +1991,9 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
/* No need to wait for NOCOW writers. Zoned mode does not allow that */
btrfs_wait_ordered_roots(fs_info, U64_MAX, block_group->start,
block_group->length);
+ /* Wait for extent buffers to be written. */
+ if (is_metadata)
+ wait_eb_writebacks(block_group);
spin_lock(&block_group->lock);
@@ -2007,8 +2051,7 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
/* For active_bg_list */
btrfs_put_block_group(block_group);
- clear_bit(BTRFS_FS_NEED_ZONE_FINISH, &fs_info->flags);
- wake_up_all(&fs_info->zone_finish_wait);
+ clear_and_wake_up_bit(BTRFS_FS_NEED_ZONE_FINISH, &fs_info->flags);
return 0;
}
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index 6cba2c6de2f9..2ad58c465208 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -111,6 +111,7 @@ struct cachefiles_cache {
char *tag; /* cache binding tag */
refcount_t unbind_pincount;/* refcount to do daemon unbind */
struct xarray reqs; /* xarray of pending on-demand requests */
+ unsigned long req_id_next;
struct xarray ondemand_ids; /* xarray for ondemand_id allocation */
u32 ondemand_id_next;
};
diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c
index 1fee702d5529..0254ed39f68c 100644
--- a/fs/cachefiles/ondemand.c
+++ b/fs/cachefiles/ondemand.c
@@ -158,9 +158,13 @@ int cachefiles_ondemand_copen(struct cachefiles_cache *cache, char *args)
/* fail OPEN request if daemon reports an error */
if (size < 0) {
- if (!IS_ERR_VALUE(size))
- size = -EINVAL;
- req->error = size;
+ if (!IS_ERR_VALUE(size)) {
+ req->error = -EINVAL;
+ ret = -EINVAL;
+ } else {
+ req->error = size;
+ ret = 0;
+ }
goto out;
}
@@ -238,14 +242,19 @@ ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache,
unsigned long id = 0;
size_t n;
int ret = 0;
- XA_STATE(xas, &cache->reqs, 0);
+ XA_STATE(xas, &cache->reqs, cache->req_id_next);
/*
- * Search for a request that has not ever been processed, to prevent
- * requests from being processed repeatedly.
+ * Cyclically search for a request that has not ever been processed,
+ * to prevent requests from being processed repeatedly, and make
+ * request distribution fair.
*/
xa_lock(&cache->reqs);
req = xas_find_marked(&xas, UINT_MAX, CACHEFILES_REQ_NEW);
+ if (!req && cache->req_id_next > 0) {
+ xas_set(&xas, 0);
+ req = xas_find_marked(&xas, cache->req_id_next - 1, CACHEFILES_REQ_NEW);
+ }
if (!req) {
xa_unlock(&cache->reqs);
return 0;
@@ -260,6 +269,7 @@ ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache,
}
xas_clear_mark(&xas, CACHEFILES_REQ_NEW);
+ cache->req_id_next = xas.xa_index + 1;
xa_unlock(&cache->reqs);
id = xas.xa_index;
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 11fd85de7217..c05477e28cff 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -42,7 +42,7 @@ void cifs_dump_detail(void *buf, struct TCP_Server_Info *server)
smb->Command, smb->Status.CifsError,
smb->Flags, smb->Flags2, smb->Mid, smb->Pid);
cifs_dbg(VFS, "smb buf %p len %u\n", smb,
- server->ops->calc_smb_size(smb, server));
+ server->ops->calc_smb_size(smb));
#endif /* CONFIG_CIFS_DEBUG2 */
}
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 8f7835ccbca1..46f5718754f9 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -32,10 +32,9 @@ int __cifs_calc_signature(struct smb_rqst *rqst,
int rc;
struct kvec *iov = rqst->rq_iov;
int n_vec = rqst->rq_nvec;
- int is_smb2 = server->vals->header_preamble_size == 0;
/* iov[0] is actual data and not the rfc1002 length for SMB2+ */
- if (is_smb2) {
+ if (!is_smb1(server)) {
if (iov[0].iov_len <= 4)
return -EIO;
i = 0;
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index f54d8bf2732a..8042d7280dec 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -1248,6 +1248,12 @@ ssize_t cifs_file_copychunk_range(unsigned int xid,
lock_two_nondirectories(target_inode, src_inode);
cifs_dbg(FYI, "about to flush pages\n");
+
+ rc = filemap_write_and_wait_range(src_inode->i_mapping, off,
+ off + len - 1);
+ if (rc)
+ goto out;
+
/* should we flush first and last page first */
truncate_inode_pages(&target_inode->i_data, 0);
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 81f4c15936d0..5b4a7a32bdc5 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -153,6 +153,6 @@ extern const struct export_operations cifs_export_ops;
#endif /* CONFIG_CIFS_NFSD_EXPORT */
/* when changing internal version - update following two lines at same time */
-#define SMB3_PRODUCT_BUILD 38
-#define CIFS_VERSION "2.38"
+#define SMB3_PRODUCT_BUILD 39
+#define CIFS_VERSION "2.39"
#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index bc0ee2d4b47b..ae7f571a7dba 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -417,7 +417,7 @@ struct smb_version_operations {
int (*close_dir)(const unsigned int, struct cifs_tcon *,
struct cifs_fid *);
/* calculate a size of SMB message */
- unsigned int (*calc_smb_size)(void *buf, struct TCP_Server_Info *ptcpi);
+ unsigned int (*calc_smb_size)(void *buf);
/* check for STATUS_PENDING and process the response if yes */
bool (*is_status_pending)(char *buf, struct TCP_Server_Info *server);
/* check for STATUS_NETWORK_SESSION_EXPIRED */
@@ -557,6 +557,8 @@ struct smb_version_values {
#define HEADER_SIZE(server) (server->vals->header_size)
#define MAX_HEADER_SIZE(server) (server->vals->max_header_size)
+#define HEADER_PREAMBLE_SIZE(server) (server->vals->header_preamble_size)
+#define MID_HEADER_SIZE(server) (HEADER_SIZE(server) - 1 - HEADER_PREAMBLE_SIZE(server))
/**
* CIFS superblock mount flags (mnt_cifs_flags) to consider when
@@ -750,6 +752,11 @@ struct TCP_Server_Info {
#endif
};
+static inline bool is_smb1(struct TCP_Server_Info *server)
+{
+ return HEADER_PREAMBLE_SIZE(server) != 0;
+}
+
static inline void cifs_server_lock(struct TCP_Server_Info *server)
{
unsigned int nofs_flag = memalloc_nofs_save();
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 87a77a684339..3bc94bcc7177 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -151,7 +151,7 @@ extern int cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool);
extern int cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
struct cifsFileInfo **ret_file);
-extern unsigned int smbCalcSize(void *buf, struct TCP_Server_Info *server);
+extern unsigned int smbCalcSize(void *buf);
extern int decode_negTokenInit(unsigned char *security_blob, int length,
struct TCP_Server_Info *server);
extern int cifs_convert_address(struct sockaddr *dst, const char *src, int len);
diff --git a/fs/cifs/cifsroot.c b/fs/cifs/cifsroot.c
index 9e91a5a40aae..56ec1b233f52 100644
--- a/fs/cifs/cifsroot.c
+++ b/fs/cifs/cifsroot.c
@@ -59,7 +59,7 @@ static int __init cifs_root_setup(char *line)
pr_err("Root-CIFS: UNC path too long\n");
return 1;
}
- strlcpy(root_dev, line, len);
+ strscpy(root_dev, line, len);
srvaddr = parse_srvaddr(&line[2], s);
if (*s) {
int n = snprintf(root_opts,
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 9111c025bcb8..7ae6f2c08153 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -702,9 +702,6 @@ cifs_readv_from_socket(struct TCP_Server_Info *server, struct msghdr *smb_msg)
int length = 0;
int total_read;
- smb_msg->msg_control = NULL;
- smb_msg->msg_controllen = 0;
-
for (total_read = 0; msg_data_left(smb_msg); total_read += length) {
try_to_freeze();
@@ -760,7 +757,7 @@ int
cifs_read_from_socket(struct TCP_Server_Info *server, char *buf,
unsigned int to_read)
{
- struct msghdr smb_msg;
+ struct msghdr smb_msg = {};
struct kvec iov = {.iov_base = buf, .iov_len = to_read};
iov_iter_kvec(&smb_msg.msg_iter, READ, &iov, 1, to_read);
@@ -770,15 +767,13 @@ cifs_read_from_socket(struct TCP_Server_Info *server, char *buf,
ssize_t
cifs_discard_from_socket(struct TCP_Server_Info *server, size_t to_read)
{
- struct msghdr smb_msg;
+ struct msghdr smb_msg = {};
/*
* iov_iter_discard already sets smb_msg.type and count and iov_offset
* and cifs_readv_from_socket sets msg_control and msg_controllen
* so little to initialize in struct msghdr
*/
- smb_msg.msg_name = NULL;
- smb_msg.msg_namelen = 0;
iov_iter_discard(&smb_msg.msg_iter, READ, to_read);
return cifs_readv_from_socket(server, &smb_msg);
@@ -788,7 +783,7 @@ int
cifs_read_page_from_socket(struct TCP_Server_Info *server, struct page *page,
unsigned int page_offset, unsigned int to_read)
{
- struct msghdr smb_msg;
+ struct msghdr smb_msg = {};
struct bio_vec bv = {
.bv_page = page, .bv_len = to_read, .bv_offset = page_offset};
iov_iter_bvec(&smb_msg.msg_iter, READ, &bv, 1, to_read);
@@ -871,7 +866,7 @@ smb2_get_credits_from_hdr(char *buffer, struct TCP_Server_Info *server)
/*
* SMB1 does not use credits.
*/
- if (server->vals->header_preamble_size)
+ if (is_smb1(server))
return 0;
return le16_to_cpu(shdr->CreditRequest);
@@ -1050,7 +1045,7 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid)
/* make sure this will fit in a large buffer */
if (pdu_length > CIFSMaxBufSize + MAX_HEADER_SIZE(server) -
- server->vals->header_preamble_size) {
+ HEADER_PREAMBLE_SIZE(server)) {
cifs_server_dbg(VFS, "SMB response too long (%u bytes)\n", pdu_length);
cifs_reconnect(server, true);
return -ECONNABORTED;
@@ -1065,8 +1060,7 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid)
/* now read the rest */
length = cifs_read_from_socket(server, buf + HEADER_SIZE(server) - 1,
- pdu_length - HEADER_SIZE(server) + 1
- + server->vals->header_preamble_size);
+ pdu_length - MID_HEADER_SIZE(server));
if (length < 0)
return length;
@@ -1122,7 +1116,7 @@ smb2_add_credits_from_hdr(char *buffer, struct TCP_Server_Info *server)
/*
* SMB1 does not use credits.
*/
- if (server->vals->header_preamble_size)
+ if (is_smb1(server))
return;
if (shdr->CreditRequest) {
@@ -1180,10 +1174,10 @@ cifs_demultiplex_thread(void *p)
if (length < 0)
continue;
- if (server->vals->header_preamble_size == 0)
- server->total_read = 0;
- else
+ if (is_smb1(server))
server->total_read = length;
+ else
+ server->total_read = 0;
/*
* The right amount was read from socket - 4 bytes,
@@ -1198,8 +1192,7 @@ next_pdu:
server->pdu_size = pdu_length;
/* make sure we have enough to get to the MID */
- if (server->pdu_size < HEADER_SIZE(server) - 1 -
- server->vals->header_preamble_size) {
+ if (server->pdu_size < MID_HEADER_SIZE(server)) {
cifs_server_dbg(VFS, "SMB response too short (%u bytes)\n",
server->pdu_size);
cifs_reconnect(server, true);
@@ -1208,9 +1201,8 @@ next_pdu:
/* read down to the MID */
length = cifs_read_from_socket(server,
- buf + server->vals->header_preamble_size,
- HEADER_SIZE(server) - 1
- - server->vals->header_preamble_size);
+ buf + HEADER_PREAMBLE_SIZE(server),
+ MID_HEADER_SIZE(server));
if (length < 0)
continue;
server->total_read += length;
@@ -2353,7 +2345,9 @@ cifs_put_tcon(struct cifs_tcon *tcon)
ses = tcon->ses;
cifs_dbg(FYI, "%s: tc_count=%d\n", __func__, tcon->tc_count);
spin_lock(&cifs_tcp_ses_lock);
+ spin_lock(&tcon->tc_lock);
if (--tcon->tc_count > 0) {
+ spin_unlock(&tcon->tc_lock);
spin_unlock(&cifs_tcp_ses_lock);
return;
}
@@ -2362,6 +2356,7 @@ cifs_put_tcon(struct cifs_tcon *tcon)
WARN_ON(tcon->tc_count < 0);
list_del_init(&tcon->tcon_list);
+ spin_unlock(&tcon->tc_lock);
spin_unlock(&cifs_tcp_ses_lock);
/* cancel polling of interfaces */
@@ -3994,7 +3989,7 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses,
}
bcc_ptr += length + 1;
bytes_left -= (length + 1);
- strlcpy(tcon->treeName, tree, sizeof(tcon->treeName));
+ strscpy(tcon->treeName, tree, sizeof(tcon->treeName));
/* mostly informational -- no need to fail on error here */
kfree(tcon->nativeFileSystem);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index fa738adc031f..6f38b134a346 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3575,6 +3575,9 @@ static ssize_t __cifs_writev(
ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
{
+ struct file *file = iocb->ki_filp;
+
+ cifs_revalidate_mapping(file->f_inode);
return __cifs_writev(iocb, from, true);
}
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 34d990f06fd6..87f60f736731 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -354,7 +354,7 @@ checkSMB(char *buf, unsigned int total_read, struct TCP_Server_Info *server)
/* otherwise, there is enough to get to the BCC */
if (check_smb_hdr(smb))
return -EIO;
- clc_len = smbCalcSize(smb, server);
+ clc_len = smbCalcSize(smb);
if (4 + rfclen != total_read) {
cifs_dbg(VFS, "Length read does not match RFC1001 length %d\n",
@@ -737,6 +737,8 @@ cifs_close_deferred_file(struct cifsInodeInfo *cifs_inode)
list_for_each_entry(cfile, &cifs_inode->openFileList, flist) {
if (delayed_work_pending(&cfile->deferred)) {
if (cancel_delayed_work(&cfile->deferred)) {
+ cifs_del_deferred_close(cfile);
+
tmp_list = kmalloc(sizeof(struct file_list), GFP_ATOMIC);
if (tmp_list == NULL)
break;
@@ -766,6 +768,8 @@ cifs_close_all_deferred_files(struct cifs_tcon *tcon)
list_for_each_entry(cfile, &tcon->openFileList, tlist) {
if (delayed_work_pending(&cfile->deferred)) {
if (cancel_delayed_work(&cfile->deferred)) {
+ cifs_del_deferred_close(cfile);
+
tmp_list = kmalloc(sizeof(struct file_list), GFP_ATOMIC);
if (tmp_list == NULL)
break;
@@ -799,6 +803,8 @@ cifs_close_deferred_file_under_dentry(struct cifs_tcon *tcon, const char *path)
if (strstr(full_path, path)) {
if (delayed_work_pending(&cfile->deferred)) {
if (cancel_delayed_work(&cfile->deferred)) {
+ cifs_del_deferred_close(cfile);
+
tmp_list = kmalloc(sizeof(struct file_list), GFP_ATOMIC);
if (tmp_list == NULL)
break;
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 28caae7aed1b..1b52e6ac431c 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -909,7 +909,7 @@ map_and_check_smb_error(struct mid_q_entry *mid, bool logErr)
* portion, the number of word parameters and the data portion of the message
*/
unsigned int
-smbCalcSize(void *buf, struct TCP_Server_Info *server)
+smbCalcSize(void *buf)
{
struct smb_hdr *ptr = buf;
return (sizeof(struct smb_hdr) + (2 * ptr->WordCount) +
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 2eece8a07c11..8e060c00c969 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -806,8 +806,7 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, loff_t pos,
end_of_smb = cfile->srch_inf.ntwrk_buf_start +
server->ops->calc_smb_size(
- cfile->srch_inf.ntwrk_buf_start,
- server);
+ cfile->srch_inf.ntwrk_buf_start);
cur_ent = cfile->srch_inf.srch_entries_start;
first_entry_in_buffer = cfile->srch_inf.index_of_last_entry
@@ -1161,8 +1160,7 @@ int cifs_readdir(struct file *file, struct dir_context *ctx)
cifs_dbg(FYI, "loop through %d times filling dir for net buf %p\n",
num_to_fill, cifsFile->srch_inf.ntwrk_buf_start);
max_len = tcon->ses->server->ops->calc_smb_size(
- cifsFile->srch_inf.ntwrk_buf_start,
- tcon->ses->server);
+ cifsFile->srch_inf.ntwrk_buf_start);
end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + max_len;
tmp_buf = kmalloc(UNICODE_NAME_MAX, GFP_KERNEL);
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index f5dcc4940b6d..9dfd2dd612c2 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -61,7 +61,6 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms,
nr_ioctl_req.Reserved = 0;
rc = SMB2_ioctl(xid, oparms->tcon, fid->persistent_fid,
fid->volatile_fid, FSCTL_LMR_REQUEST_RESILIENCY,
- true /* is_fsctl */,
(char *)&nr_ioctl_req, sizeof(nr_ioctl_req),
CIFSMaxBufSize, NULL, NULL /* no return info */);
if (rc == -EOPNOTSUPP) {
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 6a6ec6efb45a..d73e5672aac4 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -222,7 +222,7 @@ smb2_check_message(char *buf, unsigned int len, struct TCP_Server_Info *server)
}
}
- calc_len = smb2_calc_size(buf, server);
+ calc_len = smb2_calc_size(buf);
/* For SMB2_IOCTL, OutputOffset and OutputLength are optional, so might
* be 0, and not a real miscalculation */
@@ -410,7 +410,7 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *shdr)
* portion, the number of word parameters and the data portion of the message.
*/
unsigned int
-smb2_calc_size(void *buf, struct TCP_Server_Info *srvr)
+smb2_calc_size(void *buf)
{
struct smb2_pdu *pdu = buf;
struct smb2_hdr *shdr = &pdu->hdr;
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index f406af596887..421be43af425 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -387,7 +387,7 @@ smb2_dump_detail(void *buf, struct TCP_Server_Info *server)
shdr->Command, shdr->Status, shdr->Flags, shdr->MessageId,
shdr->Id.SyncId.ProcessId);
cifs_server_dbg(VFS, "smb buf %p len %u\n", buf,
- server->ops->calc_smb_size(buf, server));
+ server->ops->calc_smb_size(buf));
#endif
}
@@ -681,7 +681,7 @@ SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon)
struct cifs_ses *ses = tcon->ses;
rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
- FSCTL_QUERY_NETWORK_INTERFACE_INFO, true /* is_fsctl */,
+ FSCTL_QUERY_NETWORK_INTERFACE_INFO,
NULL /* no data input */, 0 /* no data input */,
CIFSMaxBufSize, (char **)&out_buf, &ret_data_len);
if (rc == -EOPNOTSUPP) {
@@ -1323,9 +1323,8 @@ SMB2_request_res_key(const unsigned int xid, struct cifs_tcon *tcon,
struct resume_key_req *res_key;
rc = SMB2_ioctl(xid, tcon, persistent_fid, volatile_fid,
- FSCTL_SRV_REQUEST_RESUME_KEY, true /* is_fsctl */,
- NULL, 0 /* no input */, CIFSMaxBufSize,
- (char **)&res_key, &ret_data_len);
+ FSCTL_SRV_REQUEST_RESUME_KEY, NULL, 0 /* no input */,
+ CIFSMaxBufSize, (char **)&res_key, &ret_data_len);
if (rc == -EOPNOTSUPP) {
pr_warn_once("Server share %s does not support copy range\n", tcon->treeName);
@@ -1467,7 +1466,7 @@ smb2_ioctl_query_info(const unsigned int xid,
rqst[1].rq_nvec = SMB2_IOCTL_IOV_SIZE;
rc = SMB2_ioctl_init(tcon, server, &rqst[1], COMPOUND_FID, COMPOUND_FID,
- qi.info_type, true, buffer, qi.output_buffer_length,
+ qi.info_type, buffer, qi.output_buffer_length,
CIFSMaxBufSize - MAX_SMB2_CREATE_RESPONSE_SIZE -
MAX_SMB2_CLOSE_RESPONSE_SIZE);
free_req1_func = SMB2_ioctl_free;
@@ -1601,17 +1600,8 @@ smb2_copychunk_range(const unsigned int xid,
int chunks_copied = 0;
bool chunk_sizes_updated = false;
ssize_t bytes_written, total_bytes_written = 0;
- struct inode *inode;
pcchunk = kmalloc(sizeof(struct copychunk_ioctl), GFP_KERNEL);
-
- /*
- * We need to flush all unwritten data before we can send the
- * copychunk ioctl to the server.
- */
- inode = d_inode(trgtfile->dentry);
- filemap_write_and_wait(inode->i_mapping);
-
if (pcchunk == NULL)
return -ENOMEM;
@@ -1643,9 +1633,8 @@ smb2_copychunk_range(const unsigned int xid,
retbuf = NULL;
rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid,
trgtfile->fid.volatile_fid, FSCTL_SRV_COPYCHUNK_WRITE,
- true /* is_fsctl */, (char *)pcchunk,
- sizeof(struct copychunk_ioctl), CIFSMaxBufSize,
- (char **)&retbuf, &ret_data_len);
+ (char *)pcchunk, sizeof(struct copychunk_ioctl),
+ CIFSMaxBufSize, (char **)&retbuf, &ret_data_len);
if (rc == 0) {
if (ret_data_len !=
sizeof(struct copychunk_ioctl_rsp)) {
@@ -1805,7 +1794,6 @@ static bool smb2_set_sparse(const unsigned int xid, struct cifs_tcon *tcon,
rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
cfile->fid.volatile_fid, FSCTL_SET_SPARSE,
- true /* is_fctl */,
&setsparse, 1, CIFSMaxBufSize, NULL, NULL);
if (rc) {
tcon->broken_sparse_sup = true;
@@ -1888,7 +1876,6 @@ smb2_duplicate_extents(const unsigned int xid,
rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid,
trgtfile->fid.volatile_fid,
FSCTL_DUPLICATE_EXTENTS_TO_FILE,
- true /* is_fsctl */,
(char *)&dup_ext_buf,
sizeof(struct duplicate_extents_to_file),
CIFSMaxBufSize, NULL,
@@ -1923,7 +1910,6 @@ smb3_set_integrity(const unsigned int xid, struct cifs_tcon *tcon,
return SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
cfile->fid.volatile_fid,
FSCTL_SET_INTEGRITY_INFORMATION,
- true /* is_fsctl */,
(char *)&integr_info,
sizeof(struct fsctl_set_integrity_information_req),
CIFSMaxBufSize, NULL,
@@ -1976,7 +1962,6 @@ smb3_enum_snapshots(const unsigned int xid, struct cifs_tcon *tcon,
rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
cfile->fid.volatile_fid,
FSCTL_SRV_ENUMERATE_SNAPSHOTS,
- true /* is_fsctl */,
NULL, 0 /* no input data */, max_response_size,
(char **)&retbuf,
&ret_data_len);
@@ -2699,7 +2684,6 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses,
do {
rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
FSCTL_DFS_GET_REFERRALS,
- true /* is_fsctl */,
(char *)dfs_req, dfs_req_size, CIFSMaxBufSize,
(char **)&dfs_rsp, &dfs_rsp_size);
if (!is_retryable_error(rc))
@@ -2906,8 +2890,7 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
rc = SMB2_ioctl_init(tcon, server,
&rqst[1], fid.persistent_fid,
- fid.volatile_fid, FSCTL_GET_REPARSE_POINT,
- true /* is_fctl */, NULL, 0,
+ fid.volatile_fid, FSCTL_GET_REPARSE_POINT, NULL, 0,
CIFSMaxBufSize -
MAX_SMB2_CREATE_RESPONSE_SIZE -
MAX_SMB2_CLOSE_RESPONSE_SIZE);
@@ -3087,8 +3070,7 @@ smb2_query_reparse_tag(const unsigned int xid, struct cifs_tcon *tcon,
rc = SMB2_ioctl_init(tcon, server,
&rqst[1], COMPOUND_FID,
- COMPOUND_FID, FSCTL_GET_REPARSE_POINT,
- true /* is_fctl */, NULL, 0,
+ COMPOUND_FID, FSCTL_GET_REPARSE_POINT, NULL, 0,
CIFSMaxBufSize -
MAX_SMB2_CREATE_RESPONSE_SIZE -
MAX_SMB2_CLOSE_RESPONSE_SIZE);
@@ -3316,26 +3298,43 @@ get_smb2_acl(struct cifs_sb_info *cifs_sb,
return pntsd;
}
+static long smb3_zero_data(struct file *file, struct cifs_tcon *tcon,
+ loff_t offset, loff_t len, unsigned int xid)
+{
+ struct cifsFileInfo *cfile = file->private_data;
+ struct file_zero_data_information fsctl_buf;
+
+ cifs_dbg(FYI, "Offset %lld len %lld\n", offset, len);
+
+ fsctl_buf.FileOffset = cpu_to_le64(offset);
+ fsctl_buf.BeyondFinalZero = cpu_to_le64(offset + len);
+
+ return SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
+ cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA,
+ (char *)&fsctl_buf,
+ sizeof(struct file_zero_data_information),
+ 0, NULL, NULL);
+}
+
static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
loff_t offset, loff_t len, bool keep_size)
{
struct cifs_ses *ses = tcon->ses;
- struct inode *inode;
- struct cifsInodeInfo *cifsi;
+ struct inode *inode = file_inode(file);
+ struct cifsInodeInfo *cifsi = CIFS_I(inode);
struct cifsFileInfo *cfile = file->private_data;
- struct file_zero_data_information fsctl_buf;
long rc;
unsigned int xid;
__le64 eof;
xid = get_xid();
- inode = d_inode(cfile->dentry);
- cifsi = CIFS_I(inode);
-
trace_smb3_zero_enter(xid, cfile->fid.persistent_fid, tcon->tid,
ses->Suid, offset, len);
+ inode_lock(inode);
+ filemap_invalidate_lock(inode->i_mapping);
+
/*
* We zero the range through ioctl, so we need remove the page caches
* first, otherwise the data may be inconsistent with the server.
@@ -3343,26 +3342,12 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
truncate_pagecache_range(inode, offset, offset + len - 1);
/* if file not oplocked can't be sure whether asking to extend size */
- if (!CIFS_CACHE_READ(cifsi))
- if (keep_size == false) {
- rc = -EOPNOTSUPP;
- trace_smb3_zero_err(xid, cfile->fid.persistent_fid,
- tcon->tid, ses->Suid, offset, len, rc);
- free_xid(xid);
- return rc;
- }
-
- cifs_dbg(FYI, "Offset %lld len %lld\n", offset, len);
-
- fsctl_buf.FileOffset = cpu_to_le64(offset);
- fsctl_buf.BeyondFinalZero = cpu_to_le64(offset + len);
+ rc = -EOPNOTSUPP;
+ if (keep_size == false && !CIFS_CACHE_READ(cifsi))
+ goto zero_range_exit;
- rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
- cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA, true,
- (char *)&fsctl_buf,
- sizeof(struct file_zero_data_information),
- 0, NULL, NULL);
- if (rc)
+ rc = smb3_zero_data(file, tcon, offset, len, xid);
+ if (rc < 0)
goto zero_range_exit;
/*
@@ -3375,6 +3360,8 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
}
zero_range_exit:
+ filemap_invalidate_unlock(inode->i_mapping);
+ inode_unlock(inode);
free_xid(xid);
if (rc)
trace_smb3_zero_err(xid, cfile->fid.persistent_fid, tcon->tid,
@@ -3388,7 +3375,7 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
loff_t offset, loff_t len)
{
- struct inode *inode;
+ struct inode *inode = file_inode(file);
struct cifsFileInfo *cfile = file->private_data;
struct file_zero_data_information fsctl_buf;
long rc;
@@ -3397,14 +3384,12 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
xid = get_xid();
- inode = d_inode(cfile->dentry);
-
+ inode_lock(inode);
/* Need to make file sparse, if not already, before freeing range. */
/* Consider adding equivalent for compressed since it could also work */
if (!smb2_set_sparse(xid, tcon, cfile, inode, set_sparse)) {
rc = -EOPNOTSUPP;
- free_xid(xid);
- return rc;
+ goto out;
}
filemap_invalidate_lock(inode->i_mapping);
@@ -3421,11 +3406,13 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA,
- true /* is_fctl */, (char *)&fsctl_buf,
+ (char *)&fsctl_buf,
sizeof(struct file_zero_data_information),
CIFSMaxBufSize, NULL, NULL);
- free_xid(xid);
filemap_invalidate_unlock(inode->i_mapping);
+out:
+ inode_unlock(inode);
+ free_xid(xid);
return rc;
}
@@ -3481,7 +3468,7 @@ static int smb3_simple_fallocate_range(unsigned int xid,
in_data.length = cpu_to_le64(len);
rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
cfile->fid.volatile_fid,
- FSCTL_QUERY_ALLOCATED_RANGES, true,
+ FSCTL_QUERY_ALLOCATED_RANGES,
(char *)&in_data, sizeof(in_data),
1024 * sizeof(struct file_allocated_range_buffer),
(char **)&out_data, &out_data_len);
@@ -3682,39 +3669,50 @@ static long smb3_collapse_range(struct file *file, struct cifs_tcon *tcon,
{
int rc;
unsigned int xid;
- struct inode *inode;
+ struct inode *inode = file_inode(file);
struct cifsFileInfo *cfile = file->private_data;
- struct cifsInodeInfo *cifsi;
+ struct cifsInodeInfo *cifsi = CIFS_I(inode);
__le64 eof;
+ loff_t old_eof;
xid = get_xid();
- inode = d_inode(cfile->dentry);
- cifsi = CIFS_I(inode);
+ inode_lock(inode);
- if (off >= i_size_read(inode) ||
- off + len >= i_size_read(inode)) {
+ old_eof = i_size_read(inode);
+ if ((off >= old_eof) ||
+ off + len >= old_eof) {
rc = -EINVAL;
goto out;
}
+ filemap_invalidate_lock(inode->i_mapping);
+ rc = filemap_write_and_wait_range(inode->i_mapping, off, old_eof - 1);
+ if (rc < 0)
+ goto out_2;
+
+ truncate_pagecache_range(inode, off, old_eof);
+
rc = smb2_copychunk_range(xid, cfile, cfile, off + len,
- i_size_read(inode) - off - len, off);
+ old_eof - off - len, off);
if (rc < 0)
- goto out;
+ goto out_2;
- eof = cpu_to_le64(i_size_read(inode) - len);
+ eof = cpu_to_le64(old_eof - len);
rc = SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid,
cfile->fid.volatile_fid, cfile->pid, &eof);
if (rc < 0)
- goto out;
+ goto out_2;
rc = 0;
cifsi->server_eof = i_size_read(inode) - len;
truncate_setsize(inode, cifsi->server_eof);
fscache_resize_cookie(cifs_inode_cookie(inode), cifsi->server_eof);
+out_2:
+ filemap_invalidate_unlock(inode->i_mapping);
out:
+ inode_unlock(inode);
free_xid(xid);
return rc;
}
@@ -3725,34 +3723,47 @@ static long smb3_insert_range(struct file *file, struct cifs_tcon *tcon,
int rc;
unsigned int xid;
struct cifsFileInfo *cfile = file->private_data;
+ struct inode *inode = file_inode(file);
__le64 eof;
- __u64 count;
+ __u64 count, old_eof;
xid = get_xid();
- if (off >= i_size_read(file->f_inode)) {
+ inode_lock(inode);
+
+ old_eof = i_size_read(inode);
+ if (off >= old_eof) {
rc = -EINVAL;
goto out;
}
- count = i_size_read(file->f_inode) - off;
- eof = cpu_to_le64(i_size_read(file->f_inode) + len);
+ count = old_eof - off;
+ eof = cpu_to_le64(old_eof + len);
+
+ filemap_invalidate_lock(inode->i_mapping);
+ rc = filemap_write_and_wait_range(inode->i_mapping, off, old_eof + len - 1);
+ if (rc < 0)
+ goto out_2;
+ truncate_pagecache_range(inode, off, old_eof);
rc = SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid,
cfile->fid.volatile_fid, cfile->pid, &eof);
if (rc < 0)
- goto out;
+ goto out_2;
rc = smb2_copychunk_range(xid, cfile, cfile, off, count, off + len);
if (rc < 0)
- goto out;
+ goto out_2;
- rc = smb3_zero_range(file, tcon, off, len, 1);
+ rc = smb3_zero_data(file, tcon, off, len, xid);
if (rc < 0)
- goto out;
+ goto out_2;
rc = 0;
+out_2:
+ filemap_invalidate_unlock(inode->i_mapping);
out:
+ inode_unlock(inode);
free_xid(xid);
return rc;
}
@@ -3802,7 +3813,7 @@ static loff_t smb3_llseek(struct file *file, struct cifs_tcon *tcon, loff_t offs
rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
cfile->fid.volatile_fid,
- FSCTL_QUERY_ALLOCATED_RANGES, true,
+ FSCTL_QUERY_ALLOCATED_RANGES,
(char *)&in_data, sizeof(in_data),
sizeof(struct file_allocated_range_buffer),
(char **)&out_data, &out_data_len);
@@ -3862,7 +3873,7 @@ static int smb3_fiemap(struct cifs_tcon *tcon,
rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
cfile->fid.volatile_fid,
- FSCTL_QUERY_ALLOCATED_RANGES, true,
+ FSCTL_QUERY_ALLOCATED_RANGES,
(char *)&in_data, sizeof(in_data),
1024 * sizeof(struct file_allocated_range_buffer),
(char **)&out_data, &out_data_len);
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 9b31ea946d45..6352ab32c7e7 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -965,16 +965,17 @@ SMB2_negotiate(const unsigned int xid,
} else if (rc != 0)
goto neg_exit;
+ rc = -EIO;
if (strcmp(server->vals->version_string,
SMB3ANY_VERSION_STRING) == 0) {
if (rsp->DialectRevision == cpu_to_le16(SMB20_PROT_ID)) {
cifs_server_dbg(VFS,
"SMB2 dialect returned but not requested\n");
- return -EIO;
+ goto neg_exit;
} else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID)) {
cifs_server_dbg(VFS,
"SMB2.1 dialect returned but not requested\n");
- return -EIO;
+ goto neg_exit;
} else if (rsp->DialectRevision == cpu_to_le16(SMB311_PROT_ID)) {
/* ops set to 3.0 by default for default so update */
server->ops = &smb311_operations;
@@ -985,7 +986,7 @@ SMB2_negotiate(const unsigned int xid,
if (rsp->DialectRevision == cpu_to_le16(SMB20_PROT_ID)) {
cifs_server_dbg(VFS,
"SMB2 dialect returned but not requested\n");
- return -EIO;
+ goto neg_exit;
} else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID)) {
/* ops set to 3.0 by default for default so update */
server->ops = &smb21_operations;
@@ -999,7 +1000,7 @@ SMB2_negotiate(const unsigned int xid,
/* if requested single dialect ensure returned dialect matched */
cifs_server_dbg(VFS, "Invalid 0x%x dialect returned: not requested\n",
le16_to_cpu(rsp->DialectRevision));
- return -EIO;
+ goto neg_exit;
}
cifs_dbg(FYI, "mode 0x%x\n", rsp->SecurityMode);
@@ -1017,9 +1018,10 @@ SMB2_negotiate(const unsigned int xid,
else {
cifs_server_dbg(VFS, "Invalid dialect returned by server 0x%x\n",
le16_to_cpu(rsp->DialectRevision));
- rc = -EIO;
goto neg_exit;
}
+
+ rc = 0;
server->dialect = le16_to_cpu(rsp->DialectRevision);
/*
@@ -1173,7 +1175,7 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
}
rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
- FSCTL_VALIDATE_NEGOTIATE_INFO, true /* is_fsctl */,
+ FSCTL_VALIDATE_NEGOTIATE_INFO,
(char *)pneg_inbuf, inbuflen, CIFSMaxBufSize,
(char **)&pneg_rsp, &rsplen);
if (rc == -EOPNOTSUPP) {
@@ -1928,7 +1930,7 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
tcon->capabilities = rsp->Capabilities; /* we keep caps little endian */
tcon->maximal_access = le32_to_cpu(rsp->MaximalAccess);
tcon->tid = le32_to_cpu(rsp->hdr.Id.SyncId.TreeId);
- strlcpy(tcon->treeName, tree, sizeof(tcon->treeName));
+ strscpy(tcon->treeName, tree, sizeof(tcon->treeName));
if ((rsp->Capabilities & SMB2_SHARE_CAP_DFS) &&
((tcon->share_flags & SHI1005_FLAGS_DFS) == 0))
@@ -2572,19 +2574,15 @@ alloc_path_with_tree_prefix(__le16 **out_path, int *out_size, int *out_len,
path_len = UniStrnlen((wchar_t *)path, PATH_MAX);
- /*
- * make room for one path separator between the treename and
- * path
- */
- *out_len = treename_len + 1 + path_len;
+ /* make room for one path separator only if @path isn't empty */
+ *out_len = treename_len + (path[0] ? 1 : 0) + path_len;
/*
- * final path needs to be null-terminated UTF16 with a
- * size aligned to 8
+ * final path needs to be 8-byte aligned as specified in
+ * MS-SMB2 2.2.13 SMB2 CREATE Request.
*/
-
- *out_size = roundup((*out_len+1)*2, 8);
- *out_path = kzalloc(*out_size, GFP_KERNEL);
+ *out_size = roundup(*out_len * sizeof(__le16), 8);
+ *out_path = kzalloc(*out_size + sizeof(__le16) /* null */, GFP_KERNEL);
if (!*out_path)
return -ENOMEM;
@@ -3056,7 +3054,7 @@ int
SMB2_ioctl_init(struct cifs_tcon *tcon, struct TCP_Server_Info *server,
struct smb_rqst *rqst,
u64 persistent_fid, u64 volatile_fid, u32 opcode,
- bool is_fsctl, char *in_data, u32 indatalen,
+ char *in_data, u32 indatalen,
__u32 max_response_size)
{
struct smb2_ioctl_req *req;
@@ -3131,10 +3129,8 @@ SMB2_ioctl_init(struct cifs_tcon *tcon, struct TCP_Server_Info *server,
req->hdr.CreditCharge =
cpu_to_le16(DIV_ROUND_UP(max(indatalen, max_response_size),
SMB2_MAX_BUFFER_SIZE));
- if (is_fsctl)
- req->Flags = cpu_to_le32(SMB2_0_IOCTL_IS_FSCTL);
- else
- req->Flags = 0;
+ /* always an FSCTL (for now) */
+ req->Flags = cpu_to_le32(SMB2_0_IOCTL_IS_FSCTL);
/* validate negotiate request must be signed - see MS-SMB2 3.2.5.5 */
if (opcode == FSCTL_VALIDATE_NEGOTIATE_INFO)
@@ -3161,9 +3157,9 @@ SMB2_ioctl_free(struct smb_rqst *rqst)
*/
int
SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
- u64 volatile_fid, u32 opcode, bool is_fsctl,
- char *in_data, u32 indatalen, u32 max_out_data_len,
- char **out_data, u32 *plen /* returned data len */)
+ u64 volatile_fid, u32 opcode, char *in_data, u32 indatalen,
+ u32 max_out_data_len, char **out_data,
+ u32 *plen /* returned data len */)
{
struct smb_rqst rqst;
struct smb2_ioctl_rsp *rsp = NULL;
@@ -3205,7 +3201,7 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
rc = SMB2_ioctl_init(tcon, server,
&rqst, persistent_fid, volatile_fid, opcode,
- is_fsctl, in_data, indatalen, max_out_data_len);
+ in_data, indatalen, max_out_data_len);
if (rc)
goto ioctl_exit;
@@ -3297,7 +3293,7 @@ SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
cpu_to_le16(COMPRESSION_FORMAT_DEFAULT);
rc = SMB2_ioctl(xid, tcon, persistent_fid, volatile_fid,
- FSCTL_SET_COMPRESSION, true /* is_fsctl */,
+ FSCTL_SET_COMPRESSION,
(char *)&fsctl_input /* data input */,
2 /* in data len */, CIFSMaxBufSize /* max out data */,
&ret_data /* out data */, NULL);
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 51c5bf4a338a..3f740f24b96a 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -23,7 +23,7 @@ struct smb_rqst;
extern int map_smb2_to_linux_error(char *buf, bool log_err);
extern int smb2_check_message(char *buf, unsigned int length,
struct TCP_Server_Info *server);
-extern unsigned int smb2_calc_size(void *buf, struct TCP_Server_Info *server);
+extern unsigned int smb2_calc_size(void *buf);
extern char *smb2_get_data_area_len(int *off, int *len,
struct smb2_hdr *shdr);
extern __le16 *cifs_convert_path_to_utf16(const char *from,
@@ -137,13 +137,13 @@ extern int SMB2_open_init(struct cifs_tcon *tcon,
extern void SMB2_open_free(struct smb_rqst *rqst);
extern int SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_fid, u64 volatile_fid, u32 opcode,
- bool is_fsctl, char *in_data, u32 indatalen, u32 maxoutlen,
+ char *in_data, u32 indatalen, u32 maxoutlen,
char **out_data, u32 *plen /* returned data len */);
extern int SMB2_ioctl_init(struct cifs_tcon *tcon,
struct TCP_Server_Info *server,
struct smb_rqst *rqst,
u64 persistent_fid, u64 volatile_fid, u32 opcode,
- bool is_fsctl, char *in_data, u32 indatalen,
+ char *in_data, u32 indatalen,
__u32 max_response_size);
extern void SMB2_ioctl_free(struct smb_rqst *rqst);
extern int SMB2_change_notify(const unsigned int xid, struct cifs_tcon *tcon,
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index de7aeced7e16..9a2753e21170 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -194,10 +194,6 @@ smb_send_kvec(struct TCP_Server_Info *server, struct msghdr *smb_msg,
*sent = 0;
- smb_msg->msg_name = (struct sockaddr *) &server->dstaddr;
- smb_msg->msg_namelen = sizeof(struct sockaddr);
- smb_msg->msg_control = NULL;
- smb_msg->msg_controllen = 0;
if (server->noblocksnd)
smb_msg->msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL;
else
@@ -261,8 +257,8 @@ smb_rqst_len(struct TCP_Server_Info *server, struct smb_rqst *rqst)
int nvec;
unsigned long buflen = 0;
- if (server->vals->header_preamble_size == 0 &&
- rqst->rq_nvec >= 2 && rqst->rq_iov[0].iov_len == 4) {
+ if (!is_smb1(server) && rqst->rq_nvec >= 2 &&
+ rqst->rq_iov[0].iov_len == 4) {
iov = &rqst->rq_iov[1];
nvec = rqst->rq_nvec - 1;
} else {
@@ -309,7 +305,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
sigset_t mask, oldmask;
size_t total_len = 0, sent, size;
struct socket *ssocket = server->ssocket;
- struct msghdr smb_msg;
+ struct msghdr smb_msg = {};
__be32 rfc1002_marker;
if (cifs_rdma_enabled(server)) {
@@ -346,7 +342,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
sigprocmask(SIG_BLOCK, &mask, &oldmask);
/* Generate a rfc1002 marker for SMB2+ */
- if (server->vals->header_preamble_size == 0) {
+ if (!is_smb1(server)) {
struct kvec hiov = {
.iov_base = &rfc1002_marker,
.iov_len = 4
@@ -1238,7 +1234,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
buf = (char *)midQ[i]->resp_buf;
resp_iov[i].iov_base = buf;
resp_iov[i].iov_len = midQ[i]->resp_buf_size +
- server->vals->header_preamble_size;
+ HEADER_PREAMBLE_SIZE(server);
if (midQ[i]->large_buf)
resp_buf_type[i] = CIFS_LARGE_BUFFER;
@@ -1643,7 +1639,7 @@ int
cifs_discard_remaining_data(struct TCP_Server_Info *server)
{
unsigned int rfclen = server->pdu_size;
- int remaining = rfclen + server->vals->header_preamble_size -
+ int remaining = rfclen + HEADER_PREAMBLE_SIZE(server) -
server->total_read;
while (remaining > 0) {
@@ -1689,8 +1685,7 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
unsigned int data_offset, data_len;
struct cifs_readdata *rdata = mid->callback_data;
char *buf = server->smallbuf;
- unsigned int buflen = server->pdu_size +
- server->vals->header_preamble_size;
+ unsigned int buflen = server->pdu_size + HEADER_PREAMBLE_SIZE(server);
bool use_rdma_mr = false;
cifs_dbg(FYI, "%s: mid=%llu offset=%llu bytes=%u\n",
@@ -1724,10 +1719,10 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
/* set up first two iov for signature check and to get credits */
rdata->iov[0].iov_base = buf;
- rdata->iov[0].iov_len = server->vals->header_preamble_size;
- rdata->iov[1].iov_base = buf + server->vals->header_preamble_size;
+ rdata->iov[0].iov_len = HEADER_PREAMBLE_SIZE(server);
+ rdata->iov[1].iov_base = buf + HEADER_PREAMBLE_SIZE(server);
rdata->iov[1].iov_len =
- server->total_read - server->vals->header_preamble_size;
+ server->total_read - HEADER_PREAMBLE_SIZE(server);
cifs_dbg(FYI, "0: iov_base=%p iov_len=%zu\n",
rdata->iov[0].iov_base, rdata->iov[0].iov_len);
cifs_dbg(FYI, "1: iov_base=%p iov_len=%zu\n",
@@ -1752,7 +1747,7 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
}
data_offset = server->ops->read_data_offset(buf) +
- server->vals->header_preamble_size;
+ HEADER_PREAMBLE_SIZE(server);
if (data_offset < server->total_read) {
/*
* win2k8 sometimes sends an offset of 0 when the read
diff --git a/fs/coredump.c b/fs/coredump.c
index 9f4aae202109..1ab4f5b76a1e 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -832,6 +832,38 @@ static int __dump_skip(struct coredump_params *cprm, size_t nr)
}
}
+static int dump_emit_page(struct coredump_params *cprm, struct page *page)
+{
+ struct bio_vec bvec = {
+ .bv_page = page,
+ .bv_offset = 0,
+ .bv_len = PAGE_SIZE,
+ };
+ struct iov_iter iter;
+ struct file *file = cprm->file;
+ loff_t pos = file->f_pos;
+ ssize_t n;
+
+ if (cprm->to_skip) {
+ if (!__dump_skip(cprm, cprm->to_skip))
+ return 0;
+ cprm->to_skip = 0;
+ }
+ if (cprm->written + PAGE_SIZE > cprm->limit)
+ return 0;
+ if (dump_interrupted())
+ return 0;
+ iov_iter_bvec(&iter, WRITE, &bvec, 1, PAGE_SIZE);
+ n = __kernel_write_iter(cprm->file, &iter, &pos);
+ if (n != PAGE_SIZE)
+ return 0;
+ file->f_pos = pos;
+ cprm->written += PAGE_SIZE;
+ cprm->pos += PAGE_SIZE;
+
+ return 1;
+}
+
int dump_emit(struct coredump_params *cprm, const void *addr, int nr)
{
if (cprm->to_skip) {
@@ -863,7 +895,6 @@ int dump_user_range(struct coredump_params *cprm, unsigned long start,
for (addr = start; addr < start + len; addr += PAGE_SIZE) {
struct page *page;
- int stop;
/*
* To avoid having to allocate page tables for virtual address
@@ -874,10 +905,7 @@ int dump_user_range(struct coredump_params *cprm, unsigned long start,
*/
page = get_dump_page(addr);
if (page) {
- void *kaddr = kmap_local_page(page);
-
- stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
- kunmap_local(kaddr);
+ int stop = !dump_emit_page(cprm, page);
put_page(page);
if (stop)
return 0;
diff --git a/fs/dax.c b/fs/dax.c
index c440dcef4b1b..1c6867810cbd 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1445,6 +1445,9 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
loff_t done = 0;
int ret;
+ if (!iomi.len)
+ return 0;
+
if (iov_iter_rw(iter) == WRITE) {
lockdep_assert_held_write(&iomi.inode->i_rwsem);
iomi.flags |= IOMAP_WRITE;
diff --git a/fs/dcache.c b/fs/dcache.c
index c5dc32a59c76..bb0c4d0038db 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2270,6 +2270,48 @@ bool d_same_name(const struct dentry *dentry, const struct dentry *parent,
}
EXPORT_SYMBOL_GPL(d_same_name);
+/*
+ * This is __d_lookup_rcu() when the parent dentry has
+ * DCACHE_OP_COMPARE, which makes things much nastier.
+ */
+static noinline struct dentry *__d_lookup_rcu_op_compare(
+ const struct dentry *parent,
+ const struct qstr *name,
+ unsigned *seqp)
+{
+ u64 hashlen = name->hash_len;
+ struct hlist_bl_head *b = d_hash(hashlen_hash(hashlen));
+ struct hlist_bl_node *node;
+ struct dentry *dentry;
+
+ hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
+ int tlen;
+ const char *tname;
+ unsigned seq;
+
+seqretry:
+ seq = raw_seqcount_begin(&dentry->d_seq);
+ if (dentry->d_parent != parent)
+ continue;
+ if (d_unhashed(dentry))
+ continue;
+ if (dentry->d_name.hash != hashlen_hash(hashlen))
+ continue;
+ tlen = dentry->d_name.len;
+ tname = dentry->d_name.name;
+ /* we want a consistent (name,len) pair */
+ if (read_seqcount_retry(&dentry->d_seq, seq)) {
+ cpu_relax();
+ goto seqretry;
+ }
+ if (parent->d_op->d_compare(dentry, tlen, tname, name) != 0)
+ continue;
+ *seqp = seq;
+ return dentry;
+ }
+ return NULL;
+}
+
/**
* __d_lookup_rcu - search for a dentry (racy, store-free)
* @parent: parent dentry
@@ -2316,6 +2358,9 @@ struct dentry *__d_lookup_rcu(const struct dentry *parent,
* Keep the two functions in sync.
*/
+ if (unlikely(parent->d_flags & DCACHE_OP_COMPARE))
+ return __d_lookup_rcu_op_compare(parent, name, seqp);
+
/*
* The hash list is protected using RCU.
*
@@ -2332,7 +2377,6 @@ struct dentry *__d_lookup_rcu(const struct dentry *parent,
hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
unsigned seq;
-seqretry:
/*
* The dentry sequence count protects us from concurrent
* renames, and thus protects parent and name fields.
@@ -2355,28 +2399,10 @@ seqretry:
continue;
if (d_unhashed(dentry))
continue;
-
- if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) {
- int tlen;
- const char *tname;
- if (dentry->d_name.hash != hashlen_hash(hashlen))
- continue;
- tlen = dentry->d_name.len;
- tname = dentry->d_name.name;
- /* we want a consistent (name,len) pair */
- if (read_seqcount_retry(&dentry->d_seq, seq)) {
- cpu_relax();
- goto seqretry;
- }
- if (parent->d_op->d_compare(dentry,
- tlen, tname, name) != 0)
- continue;
- } else {
- if (dentry->d_name.hash_len != hashlen)
- continue;
- if (dentry_cmp(dentry, str, hashlen_len(hashlen)) != 0)
- continue;
- }
+ if (dentry->d_name.hash_len != hashlen)
+ continue;
+ if (dentry_cmp(dentry, str, hashlen_len(hashlen)) != 0)
+ continue;
*seqp = seq;
return dentry;
}
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 3dcf0b8b4e93..232cfdf095ae 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -745,6 +745,28 @@ void debugfs_remove(struct dentry *dentry)
EXPORT_SYMBOL_GPL(debugfs_remove);
/**
+ * debugfs_lookup_and_remove - lookup a directory or file and recursively remove it
+ * @name: a pointer to a string containing the name of the item to look up.
+ * @parent: a pointer to the parent dentry of the item.
+ *
+ * This is the equlivant of doing something like
+ * debugfs_remove(debugfs_lookup(..)) but with the proper reference counting
+ * handled for the directory being looked up.
+ */
+void debugfs_lookup_and_remove(const char *name, struct dentry *parent)
+{
+ struct dentry *dentry;
+
+ dentry = debugfs_lookup(name, parent);
+ if (!dentry)
+ return;
+
+ debugfs_remove(dentry);
+ dput(dentry);
+}
+EXPORT_SYMBOL_GPL(debugfs_lookup_and_remove);
+
+/**
* debugfs_rename - rename a file/directory in the debugfs filesystem
* @old_dir: a pointer to the parent dentry for the renamed object. This
* should be a directory dentry.
diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c
index 8e01d89c3319..b5fd9d71e67f 100644
--- a/fs/erofs/fscache.c
+++ b/fs/erofs/fscache.c
@@ -222,8 +222,10 @@ static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio)
rreq = erofs_fscache_alloc_request(folio_mapping(folio),
folio_pos(folio), folio_size(folio));
- if (IS_ERR(rreq))
+ if (IS_ERR(rreq)) {
+ ret = PTR_ERR(rreq);
goto out;
+ }
return erofs_fscache_read_folios_async(mdev.m_fscache->cookie,
rreq, mdev.m_pa);
@@ -301,8 +303,10 @@ static int erofs_fscache_read_folio(struct file *file, struct folio *folio)
rreq = erofs_fscache_alloc_request(folio_mapping(folio),
folio_pos(folio), folio_size(folio));
- if (IS_ERR(rreq))
+ if (IS_ERR(rreq)) {
+ ret = PTR_ERR(rreq);
goto out_unlock;
+ }
pstart = mdev.m_pa + (pos - map.m_la);
return erofs_fscache_read_folios_async(mdev.m_fscache->cookie,
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index cfee49d33b95..a01cc82795a2 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -195,7 +195,6 @@ struct erofs_workgroup {
atomic_t refcount;
};
-#if defined(CONFIG_SMP)
static inline bool erofs_workgroup_try_to_freeze(struct erofs_workgroup *grp,
int val)
{
@@ -224,34 +223,6 @@ static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp)
return atomic_cond_read_relaxed(&grp->refcount,
VAL != EROFS_LOCKED_MAGIC);
}
-#else
-static inline bool erofs_workgroup_try_to_freeze(struct erofs_workgroup *grp,
- int val)
-{
- preempt_disable();
- /* no need to spin on UP platforms, let's just disable preemption. */
- if (val != atomic_read(&grp->refcount)) {
- preempt_enable();
- return false;
- }
- return true;
-}
-
-static inline void erofs_workgroup_unfreeze(struct erofs_workgroup *grp,
- int orig_val)
-{
- preempt_enable();
-}
-
-static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp)
-{
- int v = atomic_read(&grp->refcount);
-
- /* workgroup is never freezed on uniprocessor systems */
- DBG_BUGON(v == EROFS_LOCKED_MAGIC);
- return v;
-}
-#endif /* !CONFIG_SMP */
#endif /* !CONFIG_EROFS_FS_ZIP */
/* we strictly follow PAGE_SIZE and no buffer head yet */
diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c
index 572f0b8151ba..d58549ca1df9 100644
--- a/fs/erofs/zmap.c
+++ b/fs/erofs/zmap.c
@@ -141,7 +141,7 @@ struct z_erofs_maprecorder {
u8 type, headtype;
u16 clusterofs;
u16 delta[2];
- erofs_blk_t pblk, compressedlcs;
+ erofs_blk_t pblk, compressedblks;
erofs_off_t nextpackoff;
};
@@ -192,7 +192,7 @@ static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m,
DBG_BUGON(1);
return -EFSCORRUPTED;
}
- m->compressedlcs = m->delta[0] &
+ m->compressedblks = m->delta[0] &
~Z_EROFS_VLE_DI_D0_CBLKCNT;
m->delta[0] = 1;
}
@@ -293,7 +293,7 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m,
DBG_BUGON(1);
return -EFSCORRUPTED;
}
- m->compressedlcs = lo & ~Z_EROFS_VLE_DI_D0_CBLKCNT;
+ m->compressedblks = lo & ~Z_EROFS_VLE_DI_D0_CBLKCNT;
m->delta[0] = 1;
return 0;
} else if (i + 1 != (int)vcnt) {
@@ -497,7 +497,7 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
return 0;
}
lcn = m->lcn + 1;
- if (m->compressedlcs)
+ if (m->compressedblks)
goto out;
err = z_erofs_load_cluster_from_disk(m, lcn, false);
@@ -506,7 +506,7 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
/*
* If the 1st NONHEAD lcluster has already been handled initially w/o
- * valid compressedlcs, which means at least it mustn't be CBLKCNT, or
+ * valid compressedblks, which means at least it mustn't be CBLKCNT, or
* an internal implemenatation error is detected.
*
* The following code can also handle it properly anyway, but let's
@@ -523,12 +523,12 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
* if the 1st NONHEAD lcluster is actually PLAIN or HEAD type
* rather than CBLKCNT, it's a 1 lcluster-sized pcluster.
*/
- m->compressedlcs = 1;
+ m->compressedblks = 1 << (lclusterbits - LOG_BLOCK_SIZE);
break;
case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
if (m->delta[0] != 1)
goto err_bonus_cblkcnt;
- if (m->compressedlcs)
+ if (m->compressedblks)
break;
fallthrough;
default:
@@ -539,7 +539,7 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
return -EFSCORRUPTED;
}
out:
- map->m_plen = (u64)m->compressedlcs << lclusterbits;
+ map->m_plen = (u64)m->compressedblks << LOG_BLOCK_SIZE;
return 0;
err_bonus_cblkcnt:
erofs_err(m->inode->i_sb,
diff --git a/fs/exec.c b/fs/exec.c
index f793221f4eb6..69a572fc57db 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -65,7 +65,6 @@
#include <linux/io_uring.h>
#include <linux/syscall_user_dispatch.h>
#include <linux/coredump.h>
-#include <linux/time_namespace.h>
#include <linux/uaccess.h>
#include <asm/mmu_context.h>
@@ -584,11 +583,11 @@ static int copy_strings(int argc, struct user_arg_ptr argv,
if (kmapped_page) {
flush_dcache_page(kmapped_page);
- kunmap(kmapped_page);
+ kunmap_local(kaddr);
put_arg_page(kmapped_page);
}
kmapped_page = page;
- kaddr = kmap(kmapped_page);
+ kaddr = kmap_local_page(kmapped_page);
kpos = pos & PAGE_MASK;
flush_arg_page(bprm, kpos, kmapped_page);
}
@@ -602,7 +601,7 @@ static int copy_strings(int argc, struct user_arg_ptr argv,
out:
if (kmapped_page) {
flush_dcache_page(kmapped_page);
- kunmap(kmapped_page);
+ kunmap_local(kaddr);
put_arg_page(kmapped_page);
}
return ret;
@@ -880,11 +879,11 @@ int transfer_args_to_stack(struct linux_binprm *bprm,
for (index = MAX_ARG_PAGES - 1; index >= stop; index--) {
unsigned int offset = index == stop ? bprm->p & ~PAGE_MASK : 0;
- char *src = kmap(bprm->page[index]) + offset;
+ char *src = kmap_local_page(bprm->page[index]) + offset;
sp -= PAGE_SIZE - offset;
if (copy_to_user((void *) sp, src, PAGE_SIZE - offset) != 0)
ret = -EFAULT;
- kunmap(bprm->page[index]);
+ kunmap_local(src);
if (ret)
goto out;
}
@@ -958,8 +957,7 @@ struct file *open_exec(const char *name)
}
EXPORT_SYMBOL(open_exec);
-#if defined(CONFIG_HAVE_AOUT) || defined(CONFIG_BINFMT_FLAT) || \
- defined(CONFIG_BINFMT_ELF_FDPIC)
+#if defined(CONFIG_BINFMT_FLAT) || defined(CONFIG_BINFMT_ELF_FDPIC)
ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len)
{
ssize_t res = vfs_read(file, (void __user *)addr, len, &pos);
@@ -979,12 +977,10 @@ static int exec_mmap(struct mm_struct *mm)
{
struct task_struct *tsk;
struct mm_struct *old_mm, *active_mm;
- bool vfork;
int ret;
/* Notify parent that we're no longer interested in the old VM */
tsk = current;
- vfork = !!tsk->vfork_done;
old_mm = current->mm;
exec_mm_release(tsk, old_mm);
if (old_mm)
@@ -1029,10 +1025,6 @@ static int exec_mmap(struct mm_struct *mm)
tsk->mm->vmacache_seqnum = 0;
vmacache_flush(tsk);
task_unlock(tsk);
-
- if (vfork)
- timens_on_fork(tsk->nsproxy, tsk);
-
if (old_mm) {
mmap_read_unlock(old_mm);
BUG_ON(active_mm != old_mm);
@@ -1686,13 +1678,13 @@ int remove_arg_zero(struct linux_binprm *bprm)
ret = -EFAULT;
goto out;
}
- kaddr = kmap_atomic(page);
+ kaddr = kmap_local_page(page);
for (; offset < PAGE_SIZE && kaddr[offset];
offset++, bprm->p++)
;
- kunmap_atomic(kaddr);
+ kunmap_local(kaddr);
put_arg_page(page);
} while (offset == PAGE_SIZE);
diff --git a/fs/exfat/fatent.c b/fs/exfat/fatent.c
index ee0b7cf51157..41ae4cce1f42 100644
--- a/fs/exfat/fatent.c
+++ b/fs/exfat/fatent.c
@@ -270,8 +270,7 @@ int exfat_zeroed_cluster(struct inode *dir, unsigned int clu)
struct super_block *sb = dir->i_sb;
struct exfat_sb_info *sbi = EXFAT_SB(sb);
struct buffer_head *bh;
- sector_t blknr, last_blknr;
- int i;
+ sector_t blknr, last_blknr, i;
blknr = exfat_cluster_to_sector(sbi, clu);
last_blknr = blknr + sbi->sect_per_clus;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 9bca5565547b..3bf9a6926798 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -167,8 +167,6 @@ enum SHIFT_DIRECTION {
#define EXT4_MB_CR0_OPTIMIZED 0x8000
/* Avg fragment size rb tree lookup succeeded at least once for cr = 1 */
#define EXT4_MB_CR1_OPTIMIZED 0x00010000
-/* Perform linear traversal for one group */
-#define EXT4_MB_SEARCH_NEXT_LINEAR 0x00020000
struct ext4_allocation_request {
/* target inode for block we're allocating */
struct inode *inode;
@@ -1600,8 +1598,8 @@ struct ext4_sb_info {
struct list_head s_discard_list;
struct work_struct s_discard_work;
atomic_t s_retry_alloc_pending;
- struct rb_root s_mb_avg_fragment_size_root;
- rwlock_t s_mb_rb_lock;
+ struct list_head *s_mb_avg_fragment_size;
+ rwlock_t *s_mb_avg_fragment_size_locks;
struct list_head *s_mb_largest_free_orders;
rwlock_t *s_mb_largest_free_orders_locks;
@@ -3413,6 +3411,8 @@ struct ext4_group_info {
ext4_grpblk_t bb_first_free; /* first free block */
ext4_grpblk_t bb_free; /* total free blocks */
ext4_grpblk_t bb_fragments; /* nr of freespace fragments */
+ int bb_avg_fragment_size_order; /* order of average
+ fragment in BG */
ext4_grpblk_t bb_largest_free_order;/* order of largest frag in BG */
ext4_group_t bb_group; /* Group number */
struct list_head bb_prealloc_list;
@@ -3420,7 +3420,7 @@ struct ext4_group_info {
void *bb_bitmap;
#endif
struct rw_semaphore alloc_sem;
- struct rb_node bb_avg_fragment_size_rb;
+ struct list_head bb_avg_fragment_size_node;
struct list_head bb_largest_free_order_node;
ext4_grpblk_t bb_counters[]; /* Nr of free power-of-two-block
* regions, index is order.
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index c148bb97b527..5235974126bd 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -460,6 +460,10 @@ static int __ext4_ext_check(const char *function, unsigned int line,
error_msg = "invalid eh_entries";
goto corrupted;
}
+ if (unlikely((eh->eh_entries == 0) && (depth > 0))) {
+ error_msg = "eh_entries is 0 but eh_depth is > 0";
+ goto corrupted;
+ }
if (!ext4_valid_extent_entries(inode, eh, lblk, &pblk, depth)) {
error_msg = "invalid extent entries";
goto corrupted;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index f73e5eb43eae..208b87ce8858 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -510,7 +510,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
goto fallback;
}
- max_dirs = ndirs / ngroups + inodes_per_group / 16;
+ max_dirs = ndirs / ngroups + inodes_per_group*flex_size / 16;
min_inodes = avefreei - inodes_per_group*flex_size / 4;
if (min_inodes < 1)
min_inodes = 1;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index bd8f8b5c3d30..9dad93059945 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -140,13 +140,15 @@
* number of buddy bitmap orders possible) number of lists. Group-infos are
* placed in appropriate lists.
*
- * 2) Average fragment size rb tree (sbi->s_mb_avg_fragment_size_root)
+ * 2) Average fragment size lists (sbi->s_mb_avg_fragment_size)
*
- * Locking: sbi->s_mb_rb_lock (rwlock)
+ * Locking: sbi->s_mb_avg_fragment_size_locks(array of rw locks)
*
- * This is a red black tree consisting of group infos and the tree is sorted
- * by average fragment sizes (which is calculated as ext4_group_info->bb_free
- * / ext4_group_info->bb_fragments).
+ * This is an array of lists where in the i-th list there are groups with
+ * average fragment size >= 2^i and < 2^(i+1). The average fragment size
+ * is computed as ext4_group_info->bb_free / ext4_group_info->bb_fragments.
+ * Note that we don't bother with a special list for completely empty groups
+ * so we only have MB_NUM_ORDERS(sb) lists.
*
* When "mb_optimize_scan" mount option is set, mballoc consults the above data
* structures to decide the order in which groups are to be traversed for
@@ -160,7 +162,8 @@
*
* At CR = 1, we only consider groups where average fragment size > request
* size. So, we lookup a group which has average fragment size just above or
- * equal to request size using our rb tree (data structure 2) in O(log N) time.
+ * equal to request size using our average fragment size group lists (data
+ * structure 2) in O(1) time.
*
* If "mb_optimize_scan" mount option is not set, mballoc traverses groups in
* linear order which requires O(N) search time for each CR 0 and CR 1 phase.
@@ -802,65 +805,51 @@ static void ext4_mb_mark_free_simple(struct super_block *sb,
}
}
-static void ext4_mb_rb_insert(struct rb_root *root, struct rb_node *new,
- int (*cmp)(struct rb_node *, struct rb_node *))
+static int mb_avg_fragment_size_order(struct super_block *sb, ext4_grpblk_t len)
{
- struct rb_node **iter = &root->rb_node, *parent = NULL;
+ int order;
- while (*iter) {
- parent = *iter;
- if (cmp(new, *iter) > 0)
- iter = &((*iter)->rb_left);
- else
- iter = &((*iter)->rb_right);
- }
-
- rb_link_node(new, parent, iter);
- rb_insert_color(new, root);
-}
-
-static int
-ext4_mb_avg_fragment_size_cmp(struct rb_node *rb1, struct rb_node *rb2)
-{
- struct ext4_group_info *grp1 = rb_entry(rb1,
- struct ext4_group_info,
- bb_avg_fragment_size_rb);
- struct ext4_group_info *grp2 = rb_entry(rb2,
- struct ext4_group_info,
- bb_avg_fragment_size_rb);
- int num_frags_1, num_frags_2;
-
- num_frags_1 = grp1->bb_fragments ?
- grp1->bb_free / grp1->bb_fragments : 0;
- num_frags_2 = grp2->bb_fragments ?
- grp2->bb_free / grp2->bb_fragments : 0;
-
- return (num_frags_2 - num_frags_1);
+ /*
+ * We don't bother with a special lists groups with only 1 block free
+ * extents and for completely empty groups.
+ */
+ order = fls(len) - 2;
+ if (order < 0)
+ return 0;
+ if (order == MB_NUM_ORDERS(sb))
+ order--;
+ return order;
}
-/*
- * Reinsert grpinfo into the avg_fragment_size tree with new average
- * fragment size.
- */
+/* Move group to appropriate avg_fragment_size list */
static void
mb_update_avg_fragment_size(struct super_block *sb, struct ext4_group_info *grp)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
+ int new_order;
if (!test_opt2(sb, MB_OPTIMIZE_SCAN) || grp->bb_free == 0)
return;
- write_lock(&sbi->s_mb_rb_lock);
- if (!RB_EMPTY_NODE(&grp->bb_avg_fragment_size_rb)) {
- rb_erase(&grp->bb_avg_fragment_size_rb,
- &sbi->s_mb_avg_fragment_size_root);
- RB_CLEAR_NODE(&grp->bb_avg_fragment_size_rb);
- }
+ new_order = mb_avg_fragment_size_order(sb,
+ grp->bb_free / grp->bb_fragments);
+ if (new_order == grp->bb_avg_fragment_size_order)
+ return;
- ext4_mb_rb_insert(&sbi->s_mb_avg_fragment_size_root,
- &grp->bb_avg_fragment_size_rb,
- ext4_mb_avg_fragment_size_cmp);
- write_unlock(&sbi->s_mb_rb_lock);
+ if (grp->bb_avg_fragment_size_order != -1) {
+ write_lock(&sbi->s_mb_avg_fragment_size_locks[
+ grp->bb_avg_fragment_size_order]);
+ list_del(&grp->bb_avg_fragment_size_node);
+ write_unlock(&sbi->s_mb_avg_fragment_size_locks[
+ grp->bb_avg_fragment_size_order]);
+ }
+ grp->bb_avg_fragment_size_order = new_order;
+ write_lock(&sbi->s_mb_avg_fragment_size_locks[
+ grp->bb_avg_fragment_size_order]);
+ list_add_tail(&grp->bb_avg_fragment_size_node,
+ &sbi->s_mb_avg_fragment_size[grp->bb_avg_fragment_size_order]);
+ write_unlock(&sbi->s_mb_avg_fragment_size_locks[
+ grp->bb_avg_fragment_size_order]);
}
/*
@@ -909,86 +898,55 @@ static void ext4_mb_choose_next_group_cr0(struct ext4_allocation_context *ac,
*new_cr = 1;
} else {
*group = grp->bb_group;
- ac->ac_last_optimal_group = *group;
ac->ac_flags |= EXT4_MB_CR0_OPTIMIZED;
}
}
/*
- * Choose next group by traversing average fragment size tree. Updates *new_cr
- * if cr lvel needs an update. Sets EXT4_MB_SEARCH_NEXT_LINEAR to indicate that
- * the linear search should continue for one iteration since there's lock
- * contention on the rb tree lock.
+ * Choose next group by traversing average fragment size list of suitable
+ * order. Updates *new_cr if cr level needs an update.
*/
static void ext4_mb_choose_next_group_cr1(struct ext4_allocation_context *ac,
int *new_cr, ext4_group_t *group, ext4_group_t ngroups)
{
struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
- int avg_fragment_size, best_so_far;
- struct rb_node *node, *found;
- struct ext4_group_info *grp;
-
- /*
- * If there is contention on the lock, instead of waiting for the lock
- * to become available, just continue searching lineraly. We'll resume
- * our rb tree search later starting at ac->ac_last_optimal_group.
- */
- if (!read_trylock(&sbi->s_mb_rb_lock)) {
- ac->ac_flags |= EXT4_MB_SEARCH_NEXT_LINEAR;
- return;
- }
+ struct ext4_group_info *grp = NULL, *iter;
+ int i;
if (unlikely(ac->ac_flags & EXT4_MB_CR1_OPTIMIZED)) {
if (sbi->s_mb_stats)
atomic_inc(&sbi->s_bal_cr1_bad_suggestions);
- /* We have found something at CR 1 in the past */
- grp = ext4_get_group_info(ac->ac_sb, ac->ac_last_optimal_group);
- for (found = rb_next(&grp->bb_avg_fragment_size_rb); found != NULL;
- found = rb_next(found)) {
- grp = rb_entry(found, struct ext4_group_info,
- bb_avg_fragment_size_rb);
+ }
+
+ for (i = mb_avg_fragment_size_order(ac->ac_sb, ac->ac_g_ex.fe_len);
+ i < MB_NUM_ORDERS(ac->ac_sb); i++) {
+ if (list_empty(&sbi->s_mb_avg_fragment_size[i]))
+ continue;
+ read_lock(&sbi->s_mb_avg_fragment_size_locks[i]);
+ if (list_empty(&sbi->s_mb_avg_fragment_size[i])) {
+ read_unlock(&sbi->s_mb_avg_fragment_size_locks[i]);
+ continue;
+ }
+ list_for_each_entry(iter, &sbi->s_mb_avg_fragment_size[i],
+ bb_avg_fragment_size_node) {
if (sbi->s_mb_stats)
atomic64_inc(&sbi->s_bal_cX_groups_considered[1]);
- if (likely(ext4_mb_good_group(ac, grp->bb_group, 1)))
+ if (likely(ext4_mb_good_group(ac, iter->bb_group, 1))) {
+ grp = iter;
break;
- }
- goto done;
- }
-
- node = sbi->s_mb_avg_fragment_size_root.rb_node;
- best_so_far = 0;
- found = NULL;
-
- while (node) {
- grp = rb_entry(node, struct ext4_group_info,
- bb_avg_fragment_size_rb);
- avg_fragment_size = 0;
- if (ext4_mb_good_group(ac, grp->bb_group, 1)) {
- avg_fragment_size = grp->bb_fragments ?
- grp->bb_free / grp->bb_fragments : 0;
- if (!best_so_far || avg_fragment_size < best_so_far) {
- best_so_far = avg_fragment_size;
- found = node;
}
}
- if (avg_fragment_size > ac->ac_g_ex.fe_len)
- node = node->rb_right;
- else
- node = node->rb_left;
+ read_unlock(&sbi->s_mb_avg_fragment_size_locks[i]);
+ if (grp)
+ break;
}
-done:
- if (found) {
- grp = rb_entry(found, struct ext4_group_info,
- bb_avg_fragment_size_rb);
+ if (grp) {
*group = grp->bb_group;
ac->ac_flags |= EXT4_MB_CR1_OPTIMIZED;
} else {
*new_cr = 2;
}
-
- read_unlock(&sbi->s_mb_rb_lock);
- ac->ac_last_optimal_group = *group;
}
static inline int should_optimize_scan(struct ext4_allocation_context *ac)
@@ -1017,11 +975,6 @@ next_linear_group(struct ext4_allocation_context *ac, int group, int ngroups)
goto inc_and_return;
}
- if (ac->ac_flags & EXT4_MB_SEARCH_NEXT_LINEAR) {
- ac->ac_flags &= ~EXT4_MB_SEARCH_NEXT_LINEAR;
- goto inc_and_return;
- }
-
return group;
inc_and_return:
/*
@@ -1049,8 +1002,10 @@ static void ext4_mb_choose_next_group(struct ext4_allocation_context *ac,
{
*new_cr = ac->ac_criteria;
- if (!should_optimize_scan(ac) || ac->ac_groups_linear_remaining)
+ if (!should_optimize_scan(ac) || ac->ac_groups_linear_remaining) {
+ *group = next_linear_group(ac, *group, ngroups);
return;
+ }
if (*new_cr == 0) {
ext4_mb_choose_next_group_cr0(ac, new_cr, group, ngroups);
@@ -1075,23 +1030,25 @@ mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp)
struct ext4_sb_info *sbi = EXT4_SB(sb);
int i;
- if (test_opt2(sb, MB_OPTIMIZE_SCAN) && grp->bb_largest_free_order >= 0) {
+ for (i = MB_NUM_ORDERS(sb) - 1; i >= 0; i--)
+ if (grp->bb_counters[i] > 0)
+ break;
+ /* No need to move between order lists? */
+ if (!test_opt2(sb, MB_OPTIMIZE_SCAN) ||
+ i == grp->bb_largest_free_order) {
+ grp->bb_largest_free_order = i;
+ return;
+ }
+
+ if (grp->bb_largest_free_order >= 0) {
write_lock(&sbi->s_mb_largest_free_orders_locks[
grp->bb_largest_free_order]);
list_del_init(&grp->bb_largest_free_order_node);
write_unlock(&sbi->s_mb_largest_free_orders_locks[
grp->bb_largest_free_order]);
}
- grp->bb_largest_free_order = -1; /* uninit */
-
- for (i = MB_NUM_ORDERS(sb) - 1; i >= 0; i--) {
- if (grp->bb_counters[i] > 0) {
- grp->bb_largest_free_order = i;
- break;
- }
- }
- if (test_opt2(sb, MB_OPTIMIZE_SCAN) &&
- grp->bb_largest_free_order >= 0 && grp->bb_free) {
+ grp->bb_largest_free_order = i;
+ if (grp->bb_largest_free_order >= 0 && grp->bb_free) {
write_lock(&sbi->s_mb_largest_free_orders_locks[
grp->bb_largest_free_order]);
list_add_tail(&grp->bb_largest_free_order_node,
@@ -1148,13 +1105,13 @@ void ext4_mb_generate_buddy(struct super_block *sb,
EXT4_GROUP_INFO_BBITMAP_CORRUPT);
}
mb_set_largest_free_order(sb, grp);
+ mb_update_avg_fragment_size(sb, grp);
clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
period = get_cycles() - period;
atomic_inc(&sbi->s_mb_buddies_generated);
atomic64_add(period, &sbi->s_mb_generation_time);
- mb_update_avg_fragment_size(sb, grp);
}
/* The buddy information is attached the buddy cache inode
@@ -2636,7 +2593,7 @@ static noinline_for_stack int
ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
{
ext4_group_t prefetch_grp = 0, ngroups, group, i;
- int cr = -1;
+ int cr = -1, new_cr;
int err = 0, first_err = 0;
unsigned int nr = 0, prefetch_ios = 0;
struct ext4_sb_info *sbi;
@@ -2707,17 +2664,14 @@ repeat:
* from the goal value specified
*/
group = ac->ac_g_ex.fe_group;
- ac->ac_last_optimal_group = group;
ac->ac_groups_linear_remaining = sbi->s_mb_max_linear_groups;
prefetch_grp = group;
- for (i = 0; i < ngroups; group = next_linear_group(ac, group, ngroups),
- i++) {
- int ret = 0, new_cr;
+ for (i = 0, new_cr = cr; i < ngroups; i++,
+ ext4_mb_choose_next_group(ac, &new_cr, &group, ngroups)) {
+ int ret = 0;
cond_resched();
-
- ext4_mb_choose_next_group(ac, &new_cr, &group, ngroups);
if (new_cr != cr) {
cr = new_cr;
goto repeat;
@@ -2991,9 +2945,7 @@ __acquires(&EXT4_SB(sb)->s_mb_rb_lock)
struct super_block *sb = pde_data(file_inode(seq->file));
unsigned long position;
- read_lock(&EXT4_SB(sb)->s_mb_rb_lock);
-
- if (*pos < 0 || *pos >= MB_NUM_ORDERS(sb) + 1)
+ if (*pos < 0 || *pos >= 2*MB_NUM_ORDERS(sb))
return NULL;
position = *pos + 1;
return (void *) ((unsigned long) position);
@@ -3005,7 +2957,7 @@ static void *ext4_mb_seq_structs_summary_next(struct seq_file *seq, void *v, lof
unsigned long position;
++*pos;
- if (*pos < 0 || *pos >= MB_NUM_ORDERS(sb) + 1)
+ if (*pos < 0 || *pos >= 2*MB_NUM_ORDERS(sb))
return NULL;
position = *pos + 1;
return (void *) ((unsigned long) position);
@@ -3017,29 +2969,22 @@ static int ext4_mb_seq_structs_summary_show(struct seq_file *seq, void *v)
struct ext4_sb_info *sbi = EXT4_SB(sb);
unsigned long position = ((unsigned long) v);
struct ext4_group_info *grp;
- struct rb_node *n;
- unsigned int count, min, max;
+ unsigned int count;
position--;
if (position >= MB_NUM_ORDERS(sb)) {
- seq_puts(seq, "fragment_size_tree:\n");
- n = rb_first(&sbi->s_mb_avg_fragment_size_root);
- if (!n) {
- seq_puts(seq, "\ttree_min: 0\n\ttree_max: 0\n\ttree_nodes: 0\n");
- return 0;
- }
- grp = rb_entry(n, struct ext4_group_info, bb_avg_fragment_size_rb);
- min = grp->bb_fragments ? grp->bb_free / grp->bb_fragments : 0;
- count = 1;
- while (rb_next(n)) {
- count++;
- n = rb_next(n);
- }
- grp = rb_entry(n, struct ext4_group_info, bb_avg_fragment_size_rb);
- max = grp->bb_fragments ? grp->bb_free / grp->bb_fragments : 0;
+ position -= MB_NUM_ORDERS(sb);
+ if (position == 0)
+ seq_puts(seq, "avg_fragment_size_lists:\n");
- seq_printf(seq, "\ttree_min: %u\n\ttree_max: %u\n\ttree_nodes: %u\n",
- min, max, count);
+ count = 0;
+ read_lock(&sbi->s_mb_avg_fragment_size_locks[position]);
+ list_for_each_entry(grp, &sbi->s_mb_avg_fragment_size[position],
+ bb_avg_fragment_size_node)
+ count++;
+ read_unlock(&sbi->s_mb_avg_fragment_size_locks[position]);
+ seq_printf(seq, "\tlist_order_%u_groups: %u\n",
+ (unsigned int)position, count);
return 0;
}
@@ -3049,9 +2994,11 @@ static int ext4_mb_seq_structs_summary_show(struct seq_file *seq, void *v)
seq_puts(seq, "max_free_order_lists:\n");
}
count = 0;
+ read_lock(&sbi->s_mb_largest_free_orders_locks[position]);
list_for_each_entry(grp, &sbi->s_mb_largest_free_orders[position],
bb_largest_free_order_node)
count++;
+ read_unlock(&sbi->s_mb_largest_free_orders_locks[position]);
seq_printf(seq, "\tlist_order_%u_groups: %u\n",
(unsigned int)position, count);
@@ -3059,11 +3006,7 @@ static int ext4_mb_seq_structs_summary_show(struct seq_file *seq, void *v)
}
static void ext4_mb_seq_structs_summary_stop(struct seq_file *seq, void *v)
-__releases(&EXT4_SB(sb)->s_mb_rb_lock)
{
- struct super_block *sb = pde_data(file_inode(seq->file));
-
- read_unlock(&EXT4_SB(sb)->s_mb_rb_lock);
}
const struct seq_operations ext4_mb_seq_structs_summary_ops = {
@@ -3176,8 +3119,9 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
init_rwsem(&meta_group_info[i]->alloc_sem);
meta_group_info[i]->bb_free_root = RB_ROOT;
INIT_LIST_HEAD(&meta_group_info[i]->bb_largest_free_order_node);
- RB_CLEAR_NODE(&meta_group_info[i]->bb_avg_fragment_size_rb);
+ INIT_LIST_HEAD(&meta_group_info[i]->bb_avg_fragment_size_node);
meta_group_info[i]->bb_largest_free_order = -1; /* uninit */
+ meta_group_info[i]->bb_avg_fragment_size_order = -1; /* uninit */
meta_group_info[i]->bb_group = group;
mb_group_bb_bitmap_alloc(sb, meta_group_info[i], group);
@@ -3426,7 +3370,24 @@ int ext4_mb_init(struct super_block *sb)
i++;
} while (i < MB_NUM_ORDERS(sb));
- sbi->s_mb_avg_fragment_size_root = RB_ROOT;
+ sbi->s_mb_avg_fragment_size =
+ kmalloc_array(MB_NUM_ORDERS(sb), sizeof(struct list_head),
+ GFP_KERNEL);
+ if (!sbi->s_mb_avg_fragment_size) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ sbi->s_mb_avg_fragment_size_locks =
+ kmalloc_array(MB_NUM_ORDERS(sb), sizeof(rwlock_t),
+ GFP_KERNEL);
+ if (!sbi->s_mb_avg_fragment_size_locks) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ for (i = 0; i < MB_NUM_ORDERS(sb); i++) {
+ INIT_LIST_HEAD(&sbi->s_mb_avg_fragment_size[i]);
+ rwlock_init(&sbi->s_mb_avg_fragment_size_locks[i]);
+ }
sbi->s_mb_largest_free_orders =
kmalloc_array(MB_NUM_ORDERS(sb), sizeof(struct list_head),
GFP_KERNEL);
@@ -3445,7 +3406,6 @@ int ext4_mb_init(struct super_block *sb)
INIT_LIST_HEAD(&sbi->s_mb_largest_free_orders[i]);
rwlock_init(&sbi->s_mb_largest_free_orders_locks[i]);
}
- rwlock_init(&sbi->s_mb_rb_lock);
spin_lock_init(&sbi->s_md_lock);
sbi->s_mb_free_pending = 0;
@@ -3516,6 +3476,8 @@ out_free_locality_groups:
free_percpu(sbi->s_locality_groups);
sbi->s_locality_groups = NULL;
out:
+ kfree(sbi->s_mb_avg_fragment_size);
+ kfree(sbi->s_mb_avg_fragment_size_locks);
kfree(sbi->s_mb_largest_free_orders);
kfree(sbi->s_mb_largest_free_orders_locks);
kfree(sbi->s_mb_offsets);
@@ -3582,6 +3544,8 @@ int ext4_mb_release(struct super_block *sb)
kvfree(group_info);
rcu_read_unlock();
}
+ kfree(sbi->s_mb_avg_fragment_size);
+ kfree(sbi->s_mb_avg_fragment_size_locks);
kfree(sbi->s_mb_largest_free_orders);
kfree(sbi->s_mb_largest_free_orders_locks);
kfree(sbi->s_mb_offsets);
@@ -5193,6 +5157,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
int bsbits = ac->ac_sb->s_blocksize_bits;
loff_t size, isize;
+ bool inode_pa_eligible, group_pa_eligible;
if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
return;
@@ -5200,25 +5165,27 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
return;
+ group_pa_eligible = sbi->s_mb_group_prealloc > 0;
+ inode_pa_eligible = true;
size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1)
>> bsbits;
+ /* No point in using inode preallocation for closed files */
if ((size == isize) && !ext4_fs_is_busy(sbi) &&
- !inode_is_open_for_write(ac->ac_inode)) {
- ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC;
- return;
- }
+ !inode_is_open_for_write(ac->ac_inode))
+ inode_pa_eligible = false;
- if (sbi->s_mb_group_prealloc <= 0) {
- ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
- return;
- }
-
- /* don't use group allocation for large files */
size = max(size, isize);
- if (size > sbi->s_mb_stream_request) {
- ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
+ /* Don't use group allocation for large files */
+ if (size > sbi->s_mb_stream_request)
+ group_pa_eligible = false;
+
+ if (!group_pa_eligible) {
+ if (inode_pa_eligible)
+ ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
+ else
+ ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC;
return;
}
@@ -5565,6 +5532,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
ext4_fsblk_t block = 0;
unsigned int inquota = 0;
unsigned int reserv_clstrs = 0;
+ int retries = 0;
u64 seq;
might_sleep();
@@ -5667,7 +5635,8 @@ repeat:
ar->len = ac->ac_b_ex.fe_len;
}
} else {
- if (ext4_mb_discard_preallocations_should_retry(sb, ac, &seq))
+ if (++retries < 3 &&
+ ext4_mb_discard_preallocations_should_retry(sb, ac, &seq))
goto repeat;
/*
* If block allocation fails then the pa allocated above
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 39da92ceabf8..dcda2a943cee 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -178,7 +178,6 @@ struct ext4_allocation_context {
/* copy of the best found extent taken before preallocation efforts */
struct ext4_free_extent ac_f_ex;
- ext4_group_t ac_last_optimal_group;
__u32 ac_groups_considered;
__u32 ac_flags; /* allocation hints */
__u16 ac_groups_scanned;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 05221366a16d..08a1993ab7fd 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -134,10 +134,10 @@ static bool inode_io_list_move_locked(struct inode *inode,
static void wb_wakeup(struct bdi_writeback *wb)
{
- spin_lock_bh(&wb->work_lock);
+ spin_lock_irq(&wb->work_lock);
if (test_bit(WB_registered, &wb->state))
mod_delayed_work(bdi_wq, &wb->dwork, 0);
- spin_unlock_bh(&wb->work_lock);
+ spin_unlock_irq(&wb->work_lock);
}
static void finish_writeback_work(struct bdi_writeback *wb,
@@ -164,7 +164,7 @@ static void wb_queue_work(struct bdi_writeback *wb,
if (work->done)
atomic_inc(&work->done->cnt);
- spin_lock_bh(&wb->work_lock);
+ spin_lock_irq(&wb->work_lock);
if (test_bit(WB_registered, &wb->state)) {
list_add_tail(&work->list, &wb->work_list);
@@ -172,7 +172,7 @@ static void wb_queue_work(struct bdi_writeback *wb,
} else
finish_writeback_work(wb, work);
- spin_unlock_bh(&wb->work_lock);
+ spin_unlock_irq(&wb->work_lock);
}
/**
@@ -2082,13 +2082,13 @@ static struct wb_writeback_work *get_next_work_item(struct bdi_writeback *wb)
{
struct wb_writeback_work *work = NULL;
- spin_lock_bh(&wb->work_lock);
+ spin_lock_irq(&wb->work_lock);
if (!list_empty(&wb->work_list)) {
work = list_entry(wb->work_list.next,
struct wb_writeback_work, list);
list_del_init(&work->list);
}
- spin_unlock_bh(&wb->work_lock);
+ spin_unlock_irq(&wb->work_lock);
return work;
}
diff --git a/fs/inode.c b/fs/inode.c
index 6462276dfdf0..ba1de23c13c1 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -2018,23 +2018,25 @@ static int __file_remove_privs(struct file *file, unsigned int flags)
{
struct dentry *dentry = file_dentry(file);
struct inode *inode = file_inode(file);
- int error;
+ int error = 0;
int kill;
if (IS_NOSEC(inode) || !S_ISREG(inode->i_mode))
return 0;
kill = dentry_needs_remove_privs(dentry);
- if (kill <= 0)
+ if (kill < 0)
return kill;
- if (flags & IOCB_NOWAIT)
- return -EAGAIN;
+ if (kill) {
+ if (flags & IOCB_NOWAIT)
+ return -EAGAIN;
+
+ error = __remove_privs(file_mnt_user_ns(file), dentry, kill);
+ }
- error = __remove_privs(file_mnt_user_ns(file), dentry, kill);
if (!error)
inode_has_no_xattr(inode);
-
return error;
}
diff --git a/fs/internal.h b/fs/internal.h
index 87e96b9024ce..3e206d3e317c 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -16,6 +16,7 @@ struct shrink_control;
struct fs_context;
struct user_namespace;
struct pipe_inode_info;
+struct iov_iter;
/*
* block/bdev.c
@@ -221,3 +222,5 @@ ssize_t do_getxattr(struct user_namespace *mnt_userns,
int setxattr_copy(const char __user *name, struct xattr_ctx *ctx);
int do_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
struct xattr_ctx *ctx);
+
+ssize_t __kernel_write_iter(struct file *file, struct iov_iter *from, loff_t *pos);
diff --git a/fs/ksmbd/ksmbd_netlink.h b/fs/ksmbd/ksmbd_netlink.h
index 52aa0adeb951..e0cbcfa98c7e 100644
--- a/fs/ksmbd/ksmbd_netlink.h
+++ b/fs/ksmbd/ksmbd_netlink.h
@@ -349,6 +349,7 @@ enum KSMBD_TREE_CONN_STATUS {
#define KSMBD_SHARE_FLAG_STREAMS BIT(11)
#define KSMBD_SHARE_FLAG_FOLLOW_SYMLINKS BIT(12)
#define KSMBD_SHARE_FLAG_ACL_XATTR BIT(13)
+#define KSMBD_SHARE_FLAG_UPDATE BIT(14)
/*
* Tree connect request flags.
@@ -364,6 +365,7 @@ enum KSMBD_TREE_CONN_STATUS {
#define KSMBD_TREE_CONN_FLAG_READ_ONLY BIT(1)
#define KSMBD_TREE_CONN_FLAG_WRITABLE BIT(2)
#define KSMBD_TREE_CONN_FLAG_ADMIN_ACCOUNT BIT(3)
+#define KSMBD_TREE_CONN_FLAG_UPDATE BIT(4)
/*
* RPC over IPC.
diff --git a/fs/ksmbd/mgmt/share_config.c b/fs/ksmbd/mgmt/share_config.c
index 70655af93b44..c9bca1c2c834 100644
--- a/fs/ksmbd/mgmt/share_config.c
+++ b/fs/ksmbd/mgmt/share_config.c
@@ -51,12 +51,16 @@ static void kill_share(struct ksmbd_share_config *share)
kfree(share);
}
-void __ksmbd_share_config_put(struct ksmbd_share_config *share)
+void ksmbd_share_config_del(struct ksmbd_share_config *share)
{
down_write(&shares_table_lock);
hash_del(&share->hlist);
up_write(&shares_table_lock);
+}
+void __ksmbd_share_config_put(struct ksmbd_share_config *share)
+{
+ ksmbd_share_config_del(share);
kill_share(share);
}
diff --git a/fs/ksmbd/mgmt/share_config.h b/fs/ksmbd/mgmt/share_config.h
index 28bf3511763f..902f2cb1963a 100644
--- a/fs/ksmbd/mgmt/share_config.h
+++ b/fs/ksmbd/mgmt/share_config.h
@@ -64,6 +64,7 @@ static inline int test_share_config_flag(struct ksmbd_share_config *share,
return share->flags & flag;
}
+void ksmbd_share_config_del(struct ksmbd_share_config *share);
void __ksmbd_share_config_put(struct ksmbd_share_config *share);
static inline void ksmbd_share_config_put(struct ksmbd_share_config *share)
diff --git a/fs/ksmbd/mgmt/tree_connect.c b/fs/ksmbd/mgmt/tree_connect.c
index b35ea6a6abc5..97ab7987df6e 100644
--- a/fs/ksmbd/mgmt/tree_connect.c
+++ b/fs/ksmbd/mgmt/tree_connect.c
@@ -19,7 +19,7 @@ struct ksmbd_tree_conn_status
ksmbd_tree_conn_connect(struct ksmbd_conn *conn, struct ksmbd_session *sess,
char *share_name)
{
- struct ksmbd_tree_conn_status status = {-EINVAL, NULL};
+ struct ksmbd_tree_conn_status status = {-ENOENT, NULL};
struct ksmbd_tree_connect_response *resp = NULL;
struct ksmbd_share_config *sc;
struct ksmbd_tree_connect *tree_conn = NULL;
@@ -57,6 +57,20 @@ ksmbd_tree_conn_connect(struct ksmbd_conn *conn, struct ksmbd_session *sess,
goto out_error;
tree_conn->flags = resp->connection_flags;
+ if (test_tree_conn_flag(tree_conn, KSMBD_TREE_CONN_FLAG_UPDATE)) {
+ struct ksmbd_share_config *new_sc;
+
+ ksmbd_share_config_del(sc);
+ new_sc = ksmbd_share_config_get(share_name);
+ if (!new_sc) {
+ pr_err("Failed to update stale share config\n");
+ status.ret = -ESTALE;
+ goto out_error;
+ }
+ ksmbd_share_config_put(sc);
+ sc = new_sc;
+ }
+
tree_conn->user = sess->user;
tree_conn->share_conf = sc;
status.tree_conn = tree_conn;
diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c
index 9751cc92c111..19412ac701a6 100644
--- a/fs/ksmbd/smb2pdu.c
+++ b/fs/ksmbd/smb2pdu.c
@@ -1944,8 +1944,10 @@ out_err1:
rsp->hdr.Status = STATUS_SUCCESS;
rc = 0;
break;
+ case -ESTALE:
+ case -ENOENT:
case KSMBD_TREE_CONN_STATUS_NO_SHARE:
- rsp->hdr.Status = STATUS_BAD_NETWORK_PATH;
+ rsp->hdr.Status = STATUS_BAD_NETWORK_NAME;
break;
case -ENOMEM:
case KSMBD_TREE_CONN_STATUS_NOMEM:
@@ -2328,15 +2330,15 @@ static int smb2_remove_smb_xattrs(struct path *path)
name += strlen(name) + 1) {
ksmbd_debug(SMB, "%s, len %zd\n", name, strlen(name));
- if (strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) &&
- strncmp(&name[XATTR_USER_PREFIX_LEN], DOS_ATTRIBUTE_PREFIX,
- DOS_ATTRIBUTE_PREFIX_LEN) &&
- strncmp(&name[XATTR_USER_PREFIX_LEN], STREAM_PREFIX, STREAM_PREFIX_LEN))
- continue;
-
- err = ksmbd_vfs_remove_xattr(user_ns, path->dentry, name);
- if (err)
- ksmbd_debug(SMB, "remove xattr failed : %s\n", name);
+ if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) &&
+ !strncmp(&name[XATTR_USER_PREFIX_LEN], STREAM_PREFIX,
+ STREAM_PREFIX_LEN)) {
+ err = ksmbd_vfs_remove_xattr(user_ns, path->dentry,
+ name);
+ if (err)
+ ksmbd_debug(SMB, "remove xattr failed : %s\n",
+ name);
+ }
}
out:
kvfree(xattr_list);
@@ -3042,12 +3044,6 @@ int smb2_open(struct ksmbd_work *work)
list_add(&fp->node, &fp->f_ci->m_fp_list);
write_unlock(&fp->f_ci->m_lock);
- rc = ksmbd_vfs_getattr(&path, &stat);
- if (rc) {
- generic_fillattr(user_ns, d_inode(path.dentry), &stat);
- rc = 0;
- }
-
/* Check delete pending among previous fp before oplock break */
if (ksmbd_inode_pending_delete(fp)) {
rc = -EBUSY;
@@ -3134,6 +3130,10 @@ int smb2_open(struct ksmbd_work *work)
}
}
+ rc = ksmbd_vfs_getattr(&path, &stat);
+ if (rc)
+ goto err_out;
+
if (stat.result_mask & STATX_BTIME)
fp->create_time = ksmbd_UnixTimeToNT(stat.btime);
else
@@ -3149,9 +3149,6 @@ int smb2_open(struct ksmbd_work *work)
memcpy(fp->client_guid, conn->ClientGUID, SMB2_CLIENT_GUID_SIZE);
- generic_fillattr(user_ns, file_inode(fp->filp),
- &stat);
-
rsp->StructureSize = cpu_to_le16(89);
rcu_read_lock();
opinfo = rcu_dereference(fp->f_opinfo);
diff --git a/fs/locks.c b/fs/locks.c
index c266cfdc3291..607f94a0e789 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2129,6 +2129,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
else
error = locks_lock_file_wait(f.file, &fl);
+ locks_release_private(&fl);
out_putf:
fdput(f);
diff --git a/fs/namespace.c b/fs/namespace.c
index 68789f896f08..df137ba19d37 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -4238,6 +4238,13 @@ static int build_mount_idmapped(const struct mount_attr *attr, size_t usize,
err = -EPERM;
goto out_fput;
}
+
+ /* We're not controlling the target namespace. */
+ if (!ns_capable(mnt_userns, CAP_SYS_ADMIN)) {
+ err = -EPERM;
+ goto out_fput;
+ }
+
kattr->mnt_userns = get_user_ns(mnt_userns);
out_fput:
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index dbab3caa15ed..5d6c2ddc7ea6 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2382,7 +2382,8 @@ static void nfs_dentry_remove_handle_error(struct inode *dir,
{
switch (error) {
case -ENOENT:
- d_delete(dentry);
+ if (d_really_is_positive(dentry))
+ d_delete(dentry);
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
break;
case 0:
@@ -2484,8 +2485,10 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry)
*/
error = -ETXTBSY;
if (WARN_ON(dentry->d_flags & DCACHE_NFSFS_RENAMED) ||
- WARN_ON(dentry->d_fsdata == NFS_FSDATA_BLOCKED))
+ WARN_ON(dentry->d_fsdata == NFS_FSDATA_BLOCKED)) {
+ spin_unlock(&dentry->d_lock);
goto out;
+ }
if (dentry->d_fsdata)
/* old devname */
kfree(dentry->d_fsdata);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index d2bcd4834c0e..e032fe201a36 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -221,8 +221,10 @@ nfs_file_fsync_commit(struct file *file, int datasync)
int
nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
- struct nfs_open_context *ctx = nfs_file_open_context(file);
struct inode *inode = file_inode(file);
+ struct nfs_inode *nfsi = NFS_I(inode);
+ long save_nredirtied = atomic_long_read(&nfsi->redirtied_pages);
+ long nredirtied;
int ret;
trace_nfs_fsync_enter(inode);
@@ -237,15 +239,10 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
ret = pnfs_sync_inode(inode, !!datasync);
if (ret != 0)
break;
- if (!test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags))
+ nredirtied = atomic_long_read(&nfsi->redirtied_pages);
+ if (nredirtied == save_nredirtied)
break;
- /*
- * If nfs_file_fsync_commit detected a server reboot, then
- * resend all dirty pages that might have been covered by
- * the NFS_CONTEXT_RESEND_WRITES flag
- */
- start = 0;
- end = LLONG_MAX;
+ save_nredirtied = nredirtied;
}
trace_nfs_fsync_exit(inode, ret);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index b4e46b0ffa2d..bea7c005119c 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -426,6 +426,7 @@ nfs_ilookup(struct super_block *sb, struct nfs_fattr *fattr, struct nfs_fh *fh)
static void nfs_inode_init_regular(struct nfs_inode *nfsi)
{
atomic_long_set(&nfsi->nrequests, 0);
+ atomic_long_set(&nfsi->redirtied_pages, 0);
INIT_LIST_HEAD(&nfsi->commit_info.list);
atomic_long_set(&nfsi->commit_info.ncommit, 0);
atomic_set(&nfsi->commit_info.rpcs_out, 0);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 27c720d71b4e..898dd95bc7a7 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -606,6 +606,31 @@ static inline gfp_t nfs_io_gfp_mask(void)
return GFP_KERNEL;
}
+/*
+ * Special version of should_remove_suid() that ignores capabilities.
+ */
+static inline int nfs_should_remove_suid(const struct inode *inode)
+{
+ umode_t mode = inode->i_mode;
+ int kill = 0;
+
+ /* suid always must be killed */
+ if (unlikely(mode & S_ISUID))
+ kill = ATTR_KILL_SUID;
+
+ /*
+ * sgid without any exec bits is just a mandatory locking mark; leave
+ * it alone. If some exec bits are set, it's a real sgid; kill it.
+ */
+ if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
+ kill |= ATTR_KILL_SGID;
+
+ if (unlikely(kill && S_ISREG(mode)))
+ return kill;
+
+ return 0;
+}
+
/* unlink.c */
extern struct rpc_task *
nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 068c45b3bc1a..6dab9e408372 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -78,10 +78,15 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
status = nfs4_call_sync(server->client, server, msg,
&args.seq_args, &res.seq_res, 0);
- if (status == 0)
+ if (status == 0) {
+ if (nfs_should_remove_suid(inode)) {
+ spin_lock(&inode->i_lock);
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_MODE);
+ spin_unlock(&inode->i_lock);
+ }
status = nfs_post_op_update_inode_force_wcc(inode,
res.falloc_fattr);
-
+ }
if (msg->rpc_proc == &nfs4_procedures[NFSPROC4_CLNT_ALLOCATE])
trace_nfs4_fallocate(inode, &args, status);
else
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index e88f6b18445e..9eb181287879 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -340,6 +340,11 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt,
goto out;
}
+ if (!S_ISREG(fattr->mode)) {
+ res = ERR_PTR(-EBADF);
+ goto out;
+ }
+
res = ERR_PTR(-ENOMEM);
len = strlen(SSC_READ_NAME_BODY) + 16;
read_name = kzalloc(len, GFP_KERNEL);
@@ -357,6 +362,7 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt,
r_ino->i_fop);
if (IS_ERR(filep)) {
res = ERR_CAST(filep);
+ iput(r_ino);
goto out_free_name;
}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 41a9b6b58fb9..2613b7e36eb9 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -2817,7 +2817,6 @@ int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *hdr)
/* Resend all requests through the MDS */
nfs_pageio_init_write(&pgio, hdr->inode, FLUSH_STABLE, true,
hdr->completion_ops);
- set_bit(NFS_CONTEXT_RESEND_WRITES, &hdr->args.context->flags);
return nfs_pageio_resend(&pgio, hdr);
}
EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 82944e14fcea..ee66ffdb985e 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1051,22 +1051,31 @@ static void nfs_fill_super(struct super_block *sb, struct nfs_fs_context *ctx)
if (ctx->bsize)
sb->s_blocksize = nfs_block_size(ctx->bsize, &sb->s_blocksize_bits);
- if (server->nfs_client->rpc_ops->version != 2) {
- /* The VFS shouldn't apply the umask to mode bits. We will do
- * so ourselves when necessary.
+ switch (server->nfs_client->rpc_ops->version) {
+ case 2:
+ sb->s_time_gran = 1000;
+ sb->s_time_min = 0;
+ sb->s_time_max = U32_MAX;
+ break;
+ case 3:
+ /*
+ * The VFS shouldn't apply the umask to mode bits.
+ * We will do so ourselves when necessary.
*/
sb->s_flags |= SB_POSIXACL;
sb->s_time_gran = 1;
- sb->s_export_op = &nfs_export_ops;
- } else
- sb->s_time_gran = 1000;
-
- if (server->nfs_client->rpc_ops->version != 4) {
sb->s_time_min = 0;
sb->s_time_max = U32_MAX;
- } else {
+ sb->s_export_op = &nfs_export_ops;
+ break;
+ case 4:
+ sb->s_flags |= SB_POSIXACL;
+ sb->s_time_gran = 1;
sb->s_time_min = S64_MIN;
sb->s_time_max = S64_MAX;
+ if (server->caps & NFS_CAP_ATOMIC_OPEN_V1)
+ sb->s_export_op = &nfs_export_ops;
+ break;
}
sb->s_magic = NFS_SUPER_MAGIC;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 51a7e202d6e5..f41d24b54fd1 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1420,10 +1420,12 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr,
*/
static void nfs_redirty_request(struct nfs_page *req)
{
+ struct nfs_inode *nfsi = NFS_I(page_file_mapping(req->wb_page)->host);
+
/* Bump the transmission count */
req->wb_nio++;
nfs_mark_request_dirty(req);
- set_bit(NFS_CONTEXT_RESEND_WRITES, &nfs_req_openctx(req)->flags);
+ atomic_long_inc(&nfsi->redirtied_pages);
nfs_end_page_writeback(req);
nfs_release_request(req);
}
@@ -1494,31 +1496,6 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata)
NFS_PROTO(data->inode)->commit_rpc_prepare(task, data);
}
-/*
- * Special version of should_remove_suid() that ignores capabilities.
- */
-static int nfs_should_remove_suid(const struct inode *inode)
-{
- umode_t mode = inode->i_mode;
- int kill = 0;
-
- /* suid always must be killed */
- if (unlikely(mode & S_ISUID))
- kill = ATTR_KILL_SUID;
-
- /*
- * sgid without any exec bits is just a mandatory locking mark; leave
- * it alone. If some exec bits are set, it's a real sgid; kill it.
- */
- if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
- kill |= ATTR_KILL_SGID;
-
- if (unlikely(kill && S_ISREG(mode)))
- return kill;
-
- return 0;
-}
-
static void nfs_writeback_check_extend(struct nfs_pgio_header *hdr,
struct nfs_fattr *fattr)
{
@@ -1904,7 +1881,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
/* We have a mismatch. Write the page again */
dprintk_cont(" mismatch\n");
nfs_mark_request_dirty(req);
- set_bit(NFS_CONTEXT_RESEND_WRITES, &nfs_req_openctx(req)->flags);
+ atomic_long_inc(&NFS_I(data->inode)->redirtied_pages);
next:
nfs_unlock_and_release_request(req);
/* Latency breaker */
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 9f486b788ed0..fc17b0ac8729 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -300,6 +300,10 @@ commit_metadata(struct svc_fh *fhp)
static void
nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap)
{
+ /* Ignore mode updates on symlinks */
+ if (S_ISLNK(inode->i_mode))
+ iap->ia_valid &= ~ATTR_MODE;
+
/* sanitize the mode change */
if (iap->ia_valid & ATTR_MODE) {
iap->ia_mode &= S_IALLUGO;
@@ -353,7 +357,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
int accmode = NFSD_MAY_SATTR;
umode_t ftype = 0;
__be32 err;
- int host_err;
+ int host_err = 0;
bool get_write_count;
bool size_change = (iap->ia_valid & ATTR_SIZE);
@@ -391,13 +395,6 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
dentry = fhp->fh_dentry;
inode = d_inode(dentry);
- /* Ignore any mode updates on symlinks */
- if (S_ISLNK(inode->i_mode))
- iap->ia_valid &= ~ATTR_MODE;
-
- if (!iap->ia_valid)
- return 0;
-
nfsd_sanitize_attrs(inode, iap);
if (check_guard && guardtime != inode->i_ctime.tv_sec)
@@ -448,8 +445,10 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
goto out_unlock;
}
- iap->ia_valid |= ATTR_CTIME;
- host_err = notify_change(&init_user_ns, dentry, iap, NULL);
+ if (iap->ia_valid) {
+ iap->ia_valid |= ATTR_CTIME;
+ host_err = notify_change(&init_user_ns, dentry, iap, NULL);
+ }
out_unlock:
if (attr->na_seclabel && attr->na_seclabel->len)
@@ -846,10 +845,14 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
struct splice_desc *sd)
{
struct svc_rqst *rqstp = sd->u.data;
-
- svc_rqst_replace_page(rqstp, buf->page);
- if (rqstp->rq_res.page_len == 0)
- rqstp->rq_res.page_base = buf->offset;
+ struct page *page = buf->page; // may be a compound one
+ unsigned offset = buf->offset;
+
+ page += offset / PAGE_SIZE;
+ for (int i = sd->len; i > 0; i -= PAGE_SIZE)
+ svc_rqst_replace_page(rqstp, page++);
+ if (rqstp->rq_res.page_len == 0) // first call
+ rqstp->rq_res.page_base = offset % PAGE_SIZE;
rqstp->rq_res.page_len += sd->len;
return sd->len;
}
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 5ae8de09b271..001f4e053c85 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -2092,7 +2092,8 @@ get_ctx_vol_failed:
// TODO: Initialize security.
/* Get the extended system files' directory inode. */
vol->extend_ino = ntfs_iget(sb, FILE_Extend);
- if (IS_ERR(vol->extend_ino) || is_bad_inode(vol->extend_ino)) {
+ if (IS_ERR(vol->extend_ino) || is_bad_inode(vol->extend_ino) ||
+ !S_ISDIR(vol->extend_ino->i_mode)) {
if (!IS_ERR(vol->extend_ino))
iput(vol->extend_ino);
ntfs_error(sb, "Failed to load $Extend.");
diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c
index e8c00dda42ad..71f870d497ae 100644
--- a/fs/ntfs3/attrib.c
+++ b/fs/ntfs3/attrib.c
@@ -84,8 +84,8 @@ static inline bool attr_must_be_resident(struct ntfs_sb_info *sbi,
/*
* attr_load_runs - Load all runs stored in @attr.
*/
-int attr_load_runs(struct ATTRIB *attr, struct ntfs_inode *ni,
- struct runs_tree *run, const CLST *vcn)
+static int attr_load_runs(struct ATTRIB *attr, struct ntfs_inode *ni,
+ struct runs_tree *run, const CLST *vcn)
{
int err;
CLST svcn = le64_to_cpu(attr->nres.svcn);
@@ -140,7 +140,10 @@ failed:
}
if (lcn != SPARSE_LCN) {
- mark_as_free_ex(sbi, lcn, clen, trim);
+ if (sbi) {
+ /* mark bitmap range [lcn + clen) as free and trim clusters. */
+ mark_as_free_ex(sbi, lcn, clen, trim);
+ }
dn += clen;
}
@@ -173,7 +176,6 @@ int attr_allocate_clusters(struct ntfs_sb_info *sbi, struct runs_tree *run,
{
int err;
CLST flen, vcn0 = vcn, pre = pre_alloc ? *pre_alloc : 0;
- struct wnd_bitmap *wnd = &sbi->used.bitmap;
size_t cnt = run->count;
for (;;) {
@@ -196,9 +198,7 @@ int attr_allocate_clusters(struct ntfs_sb_info *sbi, struct runs_tree *run,
/* Add new fragment into run storage. */
if (!run_add_entry(run, vcn, lcn, flen, opt == ALLOCATE_MFT)) {
/* Undo last 'ntfs_look_for_free_space' */
- down_write_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS);
- wnd_set_free(wnd, lcn, flen);
- up_write(&wnd->rw_lock);
+ mark_as_free_ex(sbi, lcn, len, false);
err = -ENOMEM;
goto out;
}
@@ -320,7 +320,7 @@ int attr_make_nonresident(struct ntfs_inode *ni, struct ATTRIB *attr,
err = ni_insert_nonresident(ni, attr_s->type, attr_name(attr_s),
attr_s->name_len, run, 0, alen,
- attr_s->flags, &attr, NULL);
+ attr_s->flags, &attr, NULL, NULL);
if (err)
goto out3;
@@ -419,40 +419,44 @@ int attr_set_size(struct ntfs_inode *ni, enum ATTR_TYPE type,
struct mft_inode *mi, *mi_b;
CLST alen, vcn, lcn, new_alen, old_alen, svcn, evcn;
CLST next_svcn, pre_alloc = -1, done = 0;
- bool is_ext;
+ bool is_ext, is_bad = false;
u32 align;
struct MFT_REC *rec;
again:
+ alen = 0;
le_b = NULL;
attr_b = ni_find_attr(ni, NULL, &le_b, type, name, name_len, NULL,
&mi_b);
if (!attr_b) {
err = -ENOENT;
- goto out;
+ goto bad_inode;
}
if (!attr_b->non_res) {
err = attr_set_size_res(ni, attr_b, le_b, mi_b, new_size, run,
&attr_b);
- if (err || !attr_b->non_res)
- goto out;
+ if (err)
+ return err;
+
+ /* Return if file is still resident. */
+ if (!attr_b->non_res)
+ goto ok1;
/* Layout of records may be changed, so do a full search. */
goto again;
}
is_ext = is_attr_ext(attr_b);
-
-again_1:
align = sbi->cluster_size;
-
if (is_ext)
align <<= attr_b->nres.c_unit;
old_valid = le64_to_cpu(attr_b->nres.valid_size);
old_size = le64_to_cpu(attr_b->nres.data_size);
old_alloc = le64_to_cpu(attr_b->nres.alloc_size);
+
+again_1:
old_alen = old_alloc >> cluster_bits;
new_alloc = (new_size + align - 1) & ~(u64)(align - 1);
@@ -475,24 +479,27 @@ again_1:
mi = mi_b;
} else if (!le_b) {
err = -EINVAL;
- goto out;
+ goto bad_inode;
} else {
le = le_b;
attr = ni_find_attr(ni, attr_b, &le, type, name, name_len, &vcn,
&mi);
if (!attr) {
err = -EINVAL;
- goto out;
+ goto bad_inode;
}
next_le_1:
svcn = le64_to_cpu(attr->nres.svcn);
evcn = le64_to_cpu(attr->nres.evcn);
}
-
+ /*
+ * Here we have:
+ * attr,mi,le - last attribute segment (containing 'vcn').
+ * attr_b,mi_b,le_b - base (primary) attribute segment.
+ */
next_le:
rec = mi->mrec;
-
err = attr_load_runs(attr, ni, run, NULL);
if (err)
goto out;
@@ -507,6 +514,13 @@ next_le:
goto ok;
}
+ /*
+ * Add clusters. In simple case we have to:
+ * - allocate space (vcn, lcn, len)
+ * - update packed run in 'mi'
+ * - update attr->nres.evcn
+ * - update attr_b->nres.data_size/attr_b->nres.alloc_size
+ */
to_allocate = new_alen - old_alen;
add_alloc_in_same_attr_seg:
lcn = 0;
@@ -520,9 +534,11 @@ add_alloc_in_same_attr_seg:
pre_alloc = 0;
if (type == ATTR_DATA && !name_len &&
sbi->options->prealloc) {
- CLST new_alen2 = bytes_to_cluster(
- sbi, get_pre_allocated(new_size));
- pre_alloc = new_alen2 - new_alen;
+ pre_alloc =
+ bytes_to_cluster(
+ sbi,
+ get_pre_allocated(new_size)) -
+ new_alen;
}
/* Get the last LCN to allocate from. */
@@ -580,7 +596,7 @@ add_alloc_in_same_attr_seg:
pack_runs:
err = mi_pack_runs(mi, attr, run, vcn - svcn);
if (err)
- goto out;
+ goto undo_1;
next_svcn = le64_to_cpu(attr->nres.evcn) + 1;
new_alloc_tmp = (u64)next_svcn << cluster_bits;
@@ -614,7 +630,7 @@ pack_runs:
if (type == ATTR_LIST) {
err = ni_expand_list(ni);
if (err)
- goto out;
+ goto undo_2;
if (next_svcn < vcn)
goto pack_runs;
@@ -624,8 +640,9 @@ pack_runs:
if (!ni->attr_list.size) {
err = ni_create_attr_list(ni);
+ /* In case of error layout of records is not changed. */
if (err)
- goto out;
+ goto undo_2;
/* Layout of records is changed. */
}
@@ -637,48 +654,57 @@ pack_runs:
/* Insert new attribute segment. */
err = ni_insert_nonresident(ni, type, name, name_len, run,
next_svcn, vcn - next_svcn,
- attr_b->flags, &attr, &mi);
- if (err)
- goto out;
-
- if (!is_mft)
- run_truncate_head(run, evcn + 1);
-
- svcn = le64_to_cpu(attr->nres.svcn);
- evcn = le64_to_cpu(attr->nres.evcn);
+ attr_b->flags, &attr, &mi, NULL);
- le_b = NULL;
/*
* Layout of records maybe changed.
* Find base attribute to update.
*/
+ le_b = NULL;
attr_b = ni_find_attr(ni, NULL, &le_b, type, name, name_len,
NULL, &mi_b);
if (!attr_b) {
- err = -ENOENT;
- goto out;
+ err = -EINVAL;
+ goto bad_inode;
}
- attr_b->nres.alloc_size = cpu_to_le64((u64)vcn << cluster_bits);
- attr_b->nres.data_size = attr_b->nres.alloc_size;
- attr_b->nres.valid_size = attr_b->nres.alloc_size;
+ if (err) {
+ /* ni_insert_nonresident failed. */
+ attr = NULL;
+ goto undo_2;
+ }
+
+ if (!is_mft)
+ run_truncate_head(run, evcn + 1);
+
+ svcn = le64_to_cpu(attr->nres.svcn);
+ evcn = le64_to_cpu(attr->nres.evcn);
+
+ /*
+ * Attribute is in consistency state.
+ * Save this point to restore to if next steps fail.
+ */
+ old_valid = old_size = old_alloc = (u64)vcn << cluster_bits;
+ attr_b->nres.valid_size = attr_b->nres.data_size =
+ attr_b->nres.alloc_size = cpu_to_le64(old_size);
mi_b->dirty = true;
goto again_1;
}
if (new_size != old_size ||
(new_alloc != old_alloc && !keep_prealloc)) {
+ /*
+ * Truncate clusters. In simple case we have to:
+ * - update packed run in 'mi'
+ * - update attr->nres.evcn
+ * - update attr_b->nres.data_size/attr_b->nres.alloc_size
+ * - mark and trim clusters as free (vcn, lcn, len)
+ */
+ CLST dlen = 0;
+
vcn = max(svcn, new_alen);
new_alloc_tmp = (u64)vcn << cluster_bits;
- alen = 0;
- err = run_deallocate_ex(sbi, run, vcn, evcn - vcn + 1, &alen,
- true);
- if (err)
- goto out;
-
- run_truncate(run, vcn);
-
if (vcn > svcn) {
err = mi_pack_runs(mi, attr, run, vcn - svcn);
if (err)
@@ -697,7 +723,7 @@ pack_runs:
if (!al_remove_le(ni, le)) {
err = -EINVAL;
- goto out;
+ goto bad_inode;
}
le = (struct ATTR_LIST_ENTRY *)((u8 *)le - le_sz);
@@ -723,12 +749,20 @@ pack_runs:
attr_b->nres.valid_size =
attr_b->nres.alloc_size;
}
+ mi_b->dirty = true;
- if (is_ext)
+ err = run_deallocate_ex(sbi, run, vcn, evcn - vcn + 1, &dlen,
+ true);
+ if (err)
+ goto out;
+
+ if (is_ext) {
+ /* dlen - really deallocated clusters. */
le64_sub_cpu(&attr_b->nres.total_size,
- ((u64)alen << cluster_bits));
+ ((u64)dlen << cluster_bits));
+ }
- mi_b->dirty = true;
+ run_truncate(run, vcn);
if (new_alloc_tmp <= new_alloc)
goto ok;
@@ -747,7 +781,7 @@ pack_runs:
if (le->type != type || le->name_len != name_len ||
memcmp(le_name(le), name, name_len * sizeof(short))) {
err = -EINVAL;
- goto out;
+ goto bad_inode;
}
err = ni_load_mi(ni, le, &mi);
@@ -757,7 +791,7 @@ pack_runs:
attr = mi_find_attr(mi, NULL, type, name, name_len, &le->id);
if (!attr) {
err = -EINVAL;
- goto out;
+ goto bad_inode;
}
goto next_le_1;
}
@@ -772,13 +806,13 @@ ok:
}
}
-out:
- if (!err && attr_b && ret)
+ok1:
+ if (ret)
*ret = attr_b;
/* Update inode_set_bytes. */
- if (!err && ((type == ATTR_DATA && !name_len) ||
- (type == ATTR_ALLOC && name == I30_NAME))) {
+ if (((type == ATTR_DATA && !name_len) ||
+ (type == ATTR_ALLOC && name == I30_NAME))) {
bool dirty = false;
if (ni->vfs_inode.i_size != new_size) {
@@ -786,7 +820,7 @@ out:
dirty = true;
}
- if (attr_b && attr_b->non_res) {
+ if (attr_b->non_res) {
new_alloc = le64_to_cpu(attr_b->nres.alloc_size);
if (inode_get_bytes(&ni->vfs_inode) != new_alloc) {
inode_set_bytes(&ni->vfs_inode, new_alloc);
@@ -800,6 +834,47 @@ out:
}
}
+ return 0;
+
+undo_2:
+ vcn -= alen;
+ attr_b->nres.data_size = cpu_to_le64(old_size);
+ attr_b->nres.valid_size = cpu_to_le64(old_valid);
+ attr_b->nres.alloc_size = cpu_to_le64(old_alloc);
+
+ /* Restore 'attr' and 'mi'. */
+ if (attr)
+ goto restore_run;
+
+ if (le64_to_cpu(attr_b->nres.svcn) <= svcn &&
+ svcn <= le64_to_cpu(attr_b->nres.evcn)) {
+ attr = attr_b;
+ le = le_b;
+ mi = mi_b;
+ } else if (!le_b) {
+ err = -EINVAL;
+ goto bad_inode;
+ } else {
+ le = le_b;
+ attr = ni_find_attr(ni, attr_b, &le, type, name, name_len,
+ &svcn, &mi);
+ if (!attr)
+ goto bad_inode;
+ }
+
+restore_run:
+ if (mi_pack_runs(mi, attr, run, evcn - svcn + 1))
+ is_bad = true;
+
+undo_1:
+ run_deallocate_ex(sbi, run, vcn, alen, NULL, false);
+
+ run_truncate(run, vcn);
+out:
+ if (is_bad) {
+bad_inode:
+ _ntfs_bad_inode(&ni->vfs_inode);
+ }
return err;
}
@@ -855,7 +930,7 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn,
goto out;
}
- asize = le64_to_cpu(attr_b->nres.alloc_size) >> sbi->cluster_bits;
+ asize = le64_to_cpu(attr_b->nres.alloc_size) >> cluster_bits;
if (vcn >= asize) {
err = -EINVAL;
goto out;
@@ -1047,7 +1122,7 @@ ins_ext:
if (evcn1 > next_svcn) {
err = ni_insert_nonresident(ni, ATTR_DATA, NULL, 0, run,
next_svcn, evcn1 - next_svcn,
- attr_b->flags, &attr, &mi);
+ attr_b->flags, &attr, &mi, NULL);
if (err)
goto out;
}
@@ -1173,7 +1248,7 @@ int attr_load_runs_range(struct ntfs_inode *ni, enum ATTR_TYPE type,
{
struct ntfs_sb_info *sbi = ni->mi.sbi;
u8 cluster_bits = sbi->cluster_bits;
- CLST vcn = from >> cluster_bits;
+ CLST vcn;
CLST vcn_last = (to - 1) >> cluster_bits;
CLST lcn, clen;
int err;
@@ -1647,7 +1722,7 @@ ins_ext:
if (evcn1 > next_svcn) {
err = ni_insert_nonresident(ni, ATTR_DATA, NULL, 0, run,
next_svcn, evcn1 - next_svcn,
- attr_b->flags, &attr, &mi);
+ attr_b->flags, &attr, &mi, NULL);
if (err)
goto out;
}
@@ -1812,18 +1887,12 @@ int attr_collapse_range(struct ntfs_inode *ni, u64 vbo, u64 bytes)
err = ni_insert_nonresident(
ni, ATTR_DATA, NULL, 0, run, next_svcn,
evcn1 - eat - next_svcn, a_flags, &attr,
- &mi);
+ &mi, &le);
if (err)
goto out;
/* Layout of records maybe changed. */
attr_b = NULL;
- le = al_find_ex(ni, NULL, ATTR_DATA, NULL, 0,
- &next_svcn);
- if (!le) {
- err = -EINVAL;
- goto out;
- }
}
/* Free all allocated memory. */
@@ -1918,7 +1987,7 @@ next_attr:
out:
up_write(&ni->file.run_lock);
if (err)
- make_bad_inode(&ni->vfs_inode);
+ _ntfs_bad_inode(&ni->vfs_inode);
return err;
}
@@ -1936,9 +2005,11 @@ int attr_punch_hole(struct ntfs_inode *ni, u64 vbo, u64 bytes, u32 *frame_size)
struct ATTRIB *attr = NULL, *attr_b;
struct ATTR_LIST_ENTRY *le, *le_b;
struct mft_inode *mi, *mi_b;
- CLST svcn, evcn1, vcn, len, end, alen, dealloc;
+ CLST svcn, evcn1, vcn, len, end, alen, hole, next_svcn;
u64 total_size, alloc_size;
u32 mask;
+ __le16 a_flags;
+ struct runs_tree run2;
if (!bytes)
return 0;
@@ -1990,6 +2061,9 @@ int attr_punch_hole(struct ntfs_inode *ni, u64 vbo, u64 bytes, u32 *frame_size)
}
down_write(&ni->file.run_lock);
+ run_init(&run2);
+ run_truncate(run, 0);
+
/*
* Enumerate all attribute segments and punch hole where necessary.
*/
@@ -1997,10 +2071,11 @@ int attr_punch_hole(struct ntfs_inode *ni, u64 vbo, u64 bytes, u32 *frame_size)
vcn = vbo >> sbi->cluster_bits;
len = bytes >> sbi->cluster_bits;
end = vcn + len;
- dealloc = 0;
+ hole = 0;
svcn = le64_to_cpu(attr_b->nres.svcn);
evcn1 = le64_to_cpu(attr_b->nres.evcn) + 1;
+ a_flags = attr_b->flags;
if (svcn <= vcn && vcn < evcn1) {
attr = attr_b;
@@ -2008,14 +2083,14 @@ int attr_punch_hole(struct ntfs_inode *ni, u64 vbo, u64 bytes, u32 *frame_size)
mi = mi_b;
} else if (!le_b) {
err = -EINVAL;
- goto out;
+ goto bad_inode;
} else {
le = le_b;
attr = ni_find_attr(ni, attr_b, &le, ATTR_DATA, NULL, 0, &vcn,
&mi);
if (!attr) {
err = -EINVAL;
- goto out;
+ goto bad_inode;
}
svcn = le64_to_cpu(attr->nres.svcn);
@@ -2023,49 +2098,91 @@ int attr_punch_hole(struct ntfs_inode *ni, u64 vbo, u64 bytes, u32 *frame_size)
}
while (svcn < end) {
- CLST vcn1, zero, dealloc2;
+ CLST vcn1, zero, hole2 = hole;
err = attr_load_runs(attr, ni, run, &svcn);
if (err)
- goto out;
+ goto done;
vcn1 = max(vcn, svcn);
zero = min(end, evcn1) - vcn1;
- dealloc2 = dealloc;
- err = run_deallocate_ex(sbi, run, vcn1, zero, &dealloc, true);
+ /*
+ * Check range [vcn1 + zero).
+ * Calculate how many clusters there are.
+ * Don't do any destructive actions.
+ */
+ err = run_deallocate_ex(NULL, run, vcn1, zero, &hole2, false);
if (err)
- goto out;
+ goto done;
- if (dealloc2 == dealloc) {
- /* Looks like the required range is already sparsed. */
- } else {
- if (!run_add_entry(run, vcn1, SPARSE_LCN, zero,
- false)) {
- err = -ENOMEM;
- goto out;
- }
+ /* Check if required range is already hole. */
+ if (hole2 == hole)
+ goto next_attr;
+
+ /* Make a clone of run to undo. */
+ err = run_clone(run, &run2);
+ if (err)
+ goto done;
+
+ /* Make a hole range (sparse) [vcn1 + zero). */
+ if (!run_add_entry(run, vcn1, SPARSE_LCN, zero, false)) {
+ err = -ENOMEM;
+ goto done;
+ }
- err = mi_pack_runs(mi, attr, run, evcn1 - svcn);
+ /* Update run in attribute segment. */
+ err = mi_pack_runs(mi, attr, run, evcn1 - svcn);
+ if (err)
+ goto done;
+ next_svcn = le64_to_cpu(attr->nres.evcn) + 1;
+ if (next_svcn < evcn1) {
+ /* Insert new attribute segment. */
+ err = ni_insert_nonresident(ni, ATTR_DATA, NULL, 0, run,
+ next_svcn,
+ evcn1 - next_svcn, a_flags,
+ &attr, &mi, &le);
if (err)
- goto out;
+ goto undo_punch;
+
+ /* Layout of records maybe changed. */
+ attr_b = NULL;
}
+
+ /* Real deallocate. Should not fail. */
+ run_deallocate_ex(sbi, &run2, vcn1, zero, &hole, true);
+
+next_attr:
/* Free all allocated memory. */
run_truncate(run, 0);
if (evcn1 >= alen)
break;
+ /* Get next attribute segment. */
attr = ni_enum_attr_ex(ni, attr, &le, &mi);
if (!attr) {
err = -EINVAL;
- goto out;
+ goto bad_inode;
}
svcn = le64_to_cpu(attr->nres.svcn);
evcn1 = le64_to_cpu(attr->nres.evcn) + 1;
}
- total_size -= (u64)dealloc << sbi->cluster_bits;
+done:
+ if (!hole)
+ goto out;
+
+ if (!attr_b) {
+ attr_b = ni_find_attr(ni, NULL, NULL, ATTR_DATA, NULL, 0, NULL,
+ &mi_b);
+ if (!attr_b) {
+ err = -EINVAL;
+ goto bad_inode;
+ }
+ }
+
+ total_size -= (u64)hole << sbi->cluster_bits;
attr_b->nres.total_size = cpu_to_le64(total_size);
mi_b->dirty = true;
@@ -2075,9 +2192,263 @@ int attr_punch_hole(struct ntfs_inode *ni, u64 vbo, u64 bytes, u32 *frame_size)
mark_inode_dirty(&ni->vfs_inode);
out:
+ run_close(&run2);
up_write(&ni->file.run_lock);
+ return err;
+
+bad_inode:
+ _ntfs_bad_inode(&ni->vfs_inode);
+ goto out;
+
+undo_punch:
+ /*
+ * Restore packed runs.
+ * 'mi_pack_runs' should not fail, cause we restore original.
+ */
+ if (mi_pack_runs(mi, attr, &run2, evcn1 - svcn))
+ goto bad_inode;
+
+ goto done;
+}
+
+/*
+ * attr_insert_range - Insert range (hole) in file.
+ * Not for normal files.
+ */
+int attr_insert_range(struct ntfs_inode *ni, u64 vbo, u64 bytes)
+{
+ int err = 0;
+ struct runs_tree *run = &ni->file.run;
+ struct ntfs_sb_info *sbi = ni->mi.sbi;
+ struct ATTRIB *attr = NULL, *attr_b;
+ struct ATTR_LIST_ENTRY *le, *le_b;
+ struct mft_inode *mi, *mi_b;
+ CLST vcn, svcn, evcn1, len, next_svcn;
+ u64 data_size, alloc_size;
+ u32 mask;
+ __le16 a_flags;
+
+ if (!bytes)
+ return 0;
+
+ le_b = NULL;
+ attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL, &mi_b);
+ if (!attr_b)
+ return -ENOENT;
+
+ if (!is_attr_ext(attr_b)) {
+ /* It was checked above. See fallocate. */
+ return -EOPNOTSUPP;
+ }
+
+ if (!attr_b->non_res) {
+ data_size = le32_to_cpu(attr_b->res.data_size);
+ alloc_size = data_size;
+ mask = sbi->cluster_mask; /* cluster_size - 1 */
+ } else {
+ data_size = le64_to_cpu(attr_b->nres.data_size);
+ alloc_size = le64_to_cpu(attr_b->nres.alloc_size);
+ mask = (sbi->cluster_size << attr_b->nres.c_unit) - 1;
+ }
+
+ if (vbo > data_size) {
+ /* Insert range after the file size is not allowed. */
+ return -EINVAL;
+ }
+
+ if ((vbo & mask) || (bytes & mask)) {
+ /* Allow to insert only frame aligned ranges. */
+ return -EINVAL;
+ }
+
+ /*
+ * valid_size <= data_size <= alloc_size
+ * Check alloc_size for maximum possible.
+ */
+ if (bytes > sbi->maxbytes_sparse - alloc_size)
+ return -EFBIG;
+
+ vcn = vbo >> sbi->cluster_bits;
+ len = bytes >> sbi->cluster_bits;
+
+ down_write(&ni->file.run_lock);
+
+ if (!attr_b->non_res) {
+ err = attr_set_size(ni, ATTR_DATA, NULL, 0, run,
+ data_size + bytes, NULL, false, NULL);
+
+ le_b = NULL;
+ attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL,
+ &mi_b);
+ if (!attr_b) {
+ err = -EINVAL;
+ goto bad_inode;
+ }
+
+ if (err)
+ goto out;
+
+ if (!attr_b->non_res) {
+ /* Still resident. */
+ char *data = Add2Ptr(attr_b, attr_b->res.data_off);
+
+ memmove(data + bytes, data, bytes);
+ memset(data, 0, bytes);
+ goto done;
+ }
+
+ /* Resident files becomes nonresident. */
+ data_size = le64_to_cpu(attr_b->nres.data_size);
+ alloc_size = le64_to_cpu(attr_b->nres.alloc_size);
+ }
+
+ /*
+ * Enumerate all attribute segments and shift start vcn.
+ */
+ a_flags = attr_b->flags;
+ svcn = le64_to_cpu(attr_b->nres.svcn);
+ evcn1 = le64_to_cpu(attr_b->nres.evcn) + 1;
+
+ if (svcn <= vcn && vcn < evcn1) {
+ attr = attr_b;
+ le = le_b;
+ mi = mi_b;
+ } else if (!le_b) {
+ err = -EINVAL;
+ goto bad_inode;
+ } else {
+ le = le_b;
+ attr = ni_find_attr(ni, attr_b, &le, ATTR_DATA, NULL, 0, &vcn,
+ &mi);
+ if (!attr) {
+ err = -EINVAL;
+ goto bad_inode;
+ }
+
+ svcn = le64_to_cpu(attr->nres.svcn);
+ evcn1 = le64_to_cpu(attr->nres.evcn) + 1;
+ }
+
+ run_truncate(run, 0); /* clear cached values. */
+ err = attr_load_runs(attr, ni, run, NULL);
+ if (err)
+ goto out;
+
+ if (!run_insert_range(run, vcn, len)) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ /* Try to pack in current record as much as possible. */
+ err = mi_pack_runs(mi, attr, run, evcn1 + len - svcn);
if (err)
- make_bad_inode(&ni->vfs_inode);
+ goto out;
+
+ next_svcn = le64_to_cpu(attr->nres.evcn) + 1;
+
+ while ((attr = ni_enum_attr_ex(ni, attr, &le, &mi)) &&
+ attr->type == ATTR_DATA && !attr->name_len) {
+ le64_add_cpu(&attr->nres.svcn, len);
+ le64_add_cpu(&attr->nres.evcn, len);
+ if (le) {
+ le->vcn = attr->nres.svcn;
+ ni->attr_list.dirty = true;
+ }
+ mi->dirty = true;
+ }
+
+ if (next_svcn < evcn1 + len) {
+ err = ni_insert_nonresident(ni, ATTR_DATA, NULL, 0, run,
+ next_svcn, evcn1 + len - next_svcn,
+ a_flags, NULL, NULL, NULL);
+
+ le_b = NULL;
+ attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL,
+ &mi_b);
+ if (!attr_b) {
+ err = -EINVAL;
+ goto bad_inode;
+ }
+
+ if (err) {
+ /* ni_insert_nonresident failed. Try to undo. */
+ goto undo_insert_range;
+ }
+ }
+
+ /*
+ * Update primary attribute segment.
+ */
+ if (vbo <= ni->i_valid)
+ ni->i_valid += bytes;
+
+ attr_b->nres.data_size = le64_to_cpu(data_size + bytes);
+ attr_b->nres.alloc_size = le64_to_cpu(alloc_size + bytes);
+
+ /* ni->valid may be not equal valid_size (temporary). */
+ if (ni->i_valid > data_size + bytes)
+ attr_b->nres.valid_size = attr_b->nres.data_size;
+ else
+ attr_b->nres.valid_size = cpu_to_le64(ni->i_valid);
+ mi_b->dirty = true;
+
+done:
+ ni->vfs_inode.i_size += bytes;
+ ni->ni_flags |= NI_FLAG_UPDATE_PARENT;
+ mark_inode_dirty(&ni->vfs_inode);
+
+out:
+ run_truncate(run, 0); /* clear cached values. */
+
+ up_write(&ni->file.run_lock);
return err;
+
+bad_inode:
+ _ntfs_bad_inode(&ni->vfs_inode);
+ goto out;
+
+undo_insert_range:
+ svcn = le64_to_cpu(attr_b->nres.svcn);
+ evcn1 = le64_to_cpu(attr_b->nres.evcn) + 1;
+
+ if (svcn <= vcn && vcn < evcn1) {
+ attr = attr_b;
+ le = le_b;
+ mi = mi_b;
+ } else if (!le_b) {
+ goto bad_inode;
+ } else {
+ le = le_b;
+ attr = ni_find_attr(ni, attr_b, &le, ATTR_DATA, NULL, 0, &vcn,
+ &mi);
+ if (!attr) {
+ goto bad_inode;
+ }
+
+ svcn = le64_to_cpu(attr->nres.svcn);
+ evcn1 = le64_to_cpu(attr->nres.evcn) + 1;
+ }
+
+ if (attr_load_runs(attr, ni, run, NULL))
+ goto bad_inode;
+
+ if (!run_collapse_range(run, vcn, len))
+ goto bad_inode;
+
+ if (mi_pack_runs(mi, attr, run, evcn1 + len - svcn))
+ goto bad_inode;
+
+ while ((attr = ni_enum_attr_ex(ni, attr, &le, &mi)) &&
+ attr->type == ATTR_DATA && !attr->name_len) {
+ le64_sub_cpu(&attr->nres.svcn, len);
+ le64_sub_cpu(&attr->nres.evcn, len);
+ if (le) {
+ le->vcn = attr->nres.svcn;
+ ni->attr_list.dirty = true;
+ }
+ mi->dirty = true;
+ }
+
+ goto out;
}
diff --git a/fs/ntfs3/bitmap.c b/fs/ntfs3/bitmap.c
index aa184407520f..5d44ceac855b 100644
--- a/fs/ntfs3/bitmap.c
+++ b/fs/ntfs3/bitmap.c
@@ -51,11 +51,6 @@ void ntfs3_exit_bitmap(void)
kmem_cache_destroy(ntfs_enode_cachep);
}
-static inline u32 wnd_bits(const struct wnd_bitmap *wnd, size_t i)
-{
- return i + 1 == wnd->nwnd ? wnd->bits_last : wnd->sb->s_blocksize * 8;
-}
-
/*
* wnd_scan
*
@@ -1333,9 +1328,7 @@ int wnd_extend(struct wnd_bitmap *wnd, size_t new_bits)
if (!new_free)
return -ENOMEM;
- if (new_free != wnd->free_bits)
- memcpy(new_free, wnd->free_bits,
- wnd->nwnd * sizeof(short));
+ memcpy(new_free, wnd->free_bits, wnd->nwnd * sizeof(short));
memset(new_free + wnd->nwnd, 0,
(new_wnd - wnd->nwnd) * sizeof(short));
kfree(wnd->free_bits);
@@ -1395,9 +1388,8 @@ int wnd_extend(struct wnd_bitmap *wnd, size_t new_bits)
void wnd_zone_set(struct wnd_bitmap *wnd, size_t lcn, size_t len)
{
- size_t zlen;
+ size_t zlen = wnd->zone_end - wnd->zone_bit;
- zlen = wnd->zone_end - wnd->zone_bit;
if (zlen)
wnd_add_free_ext(wnd, wnd->zone_bit, zlen, false);
diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c
index 4a21745711fe..4f2ffc7ef296 100644
--- a/fs/ntfs3/file.c
+++ b/fs/ntfs3/file.c
@@ -530,21 +530,35 @@ static int ntfs_truncate(struct inode *inode, loff_t new_size)
static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len)
{
struct inode *inode = file->f_mapping->host;
+ struct address_space *mapping = inode->i_mapping;
struct super_block *sb = inode->i_sb;
struct ntfs_sb_info *sbi = sb->s_fs_info;
struct ntfs_inode *ni = ntfs_i(inode);
loff_t end = vbo + len;
loff_t vbo_down = round_down(vbo, PAGE_SIZE);
- loff_t i_size;
+ bool is_supported_holes = is_sparsed(ni) || is_compressed(ni);
+ loff_t i_size, new_size;
+ bool map_locked;
int err;
/* No support for dir. */
if (!S_ISREG(inode->i_mode))
return -EOPNOTSUPP;
- /* Return error if mode is not supported. */
- if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
- FALLOC_FL_COLLAPSE_RANGE)) {
+ /*
+ * vfs_fallocate checks all possible combinations of mode.
+ * Do additional checks here before ntfs_set_state(dirty).
+ */
+ if (mode & FALLOC_FL_PUNCH_HOLE) {
+ if (!is_supported_holes)
+ return -EOPNOTSUPP;
+ } else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
+ } else if (mode & FALLOC_FL_INSERT_RANGE) {
+ if (!is_supported_holes)
+ return -EOPNOTSUPP;
+ } else if (mode &
+ ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
+ FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE)) {
ntfs_inode_warn(inode, "fallocate(0x%x) is not supported",
mode);
return -EOPNOTSUPP;
@@ -554,6 +568,8 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len)
inode_lock(inode);
i_size = inode->i_size;
+ new_size = max(end, i_size);
+ map_locked = false;
if (WARN_ON(ni->ni_flags & NI_FLAG_COMPRESSED_MASK)) {
/* Should never be here, see ntfs_file_open. */
@@ -561,38 +577,27 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len)
goto out;
}
+ if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE |
+ FALLOC_FL_INSERT_RANGE)) {
+ inode_dio_wait(inode);
+ filemap_invalidate_lock(mapping);
+ map_locked = true;
+ }
+
if (mode & FALLOC_FL_PUNCH_HOLE) {
u32 frame_size;
loff_t mask, vbo_a, end_a, tmp;
- if (!(mode & FALLOC_FL_KEEP_SIZE)) {
- err = -EINVAL;
- goto out;
- }
-
- err = filemap_write_and_wait_range(inode->i_mapping, vbo,
- end - 1);
+ err = filemap_write_and_wait_range(mapping, vbo, end - 1);
if (err)
goto out;
- err = filemap_write_and_wait_range(inode->i_mapping, end,
- LLONG_MAX);
+ err = filemap_write_and_wait_range(mapping, end, LLONG_MAX);
if (err)
goto out;
- inode_dio_wait(inode);
-
truncate_pagecache(inode, vbo_down);
- if (!is_sparsed(ni) && !is_compressed(ni)) {
- /*
- * Normal file, can't make hole.
- * TODO: Try to find way to save info about hole.
- */
- err = -EOPNOTSUPP;
- goto out;
- }
-
ni_lock(ni);
err = attr_punch_hole(ni, vbo, len, &frame_size);
ni_unlock(ni);
@@ -624,17 +629,11 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len)
ni_unlock(ni);
}
} else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
- if (mode & ~FALLOC_FL_COLLAPSE_RANGE) {
- err = -EINVAL;
- goto out;
- }
-
/*
* Write tail of the last page before removed range since
* it will get removed from the page cache below.
*/
- err = filemap_write_and_wait_range(inode->i_mapping, vbo_down,
- vbo);
+ err = filemap_write_and_wait_range(mapping, vbo_down, vbo);
if (err)
goto out;
@@ -642,34 +641,58 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len)
* Write data that will be shifted to preserve them
* when discarding page cache below.
*/
- err = filemap_write_and_wait_range(inode->i_mapping, end,
- LLONG_MAX);
+ err = filemap_write_and_wait_range(mapping, end, LLONG_MAX);
if (err)
goto out;
- /* Wait for existing dio to complete. */
- inode_dio_wait(inode);
-
truncate_pagecache(inode, vbo_down);
ni_lock(ni);
err = attr_collapse_range(ni, vbo, len);
ni_unlock(ni);
+ } else if (mode & FALLOC_FL_INSERT_RANGE) {
+ /* Check new size. */
+ err = inode_newsize_ok(inode, new_size);
+ if (err)
+ goto out;
+
+ /* Write out all dirty pages. */
+ err = filemap_write_and_wait_range(mapping, vbo_down,
+ LLONG_MAX);
+ if (err)
+ goto out;
+ truncate_pagecache(inode, vbo_down);
+
+ ni_lock(ni);
+ err = attr_insert_range(ni, vbo, len);
+ ni_unlock(ni);
} else {
- /*
- * Normal file: Allocate clusters, do not change 'valid' size.
- */
- loff_t new_size = max(end, i_size);
+ /* Check new size. */
+
+ /* generic/213: expected -ENOSPC instead of -EFBIG. */
+ if (!is_supported_holes) {
+ loff_t to_alloc = new_size - inode_get_bytes(inode);
+
+ if (to_alloc > 0 &&
+ (to_alloc >> sbi->cluster_bits) >
+ wnd_zeroes(&sbi->used.bitmap)) {
+ err = -ENOSPC;
+ goto out;
+ }
+ }
err = inode_newsize_ok(inode, new_size);
if (err)
goto out;
+ /*
+ * Allocate clusters, do not change 'valid' size.
+ */
err = ntfs_set_size(inode, new_size);
if (err)
goto out;
- if (is_sparsed(ni) || is_compressed(ni)) {
+ if (is_supported_holes) {
CLST vcn_v = ni->i_valid >> sbi->cluster_bits;
CLST vcn = vbo >> sbi->cluster_bits;
CLST cend = bytes_to_cluster(sbi, end);
@@ -717,8 +740,8 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len)
}
out:
- if (err == -EFBIG)
- err = -ENOSPC;
+ if (map_locked)
+ filemap_invalidate_unlock(mapping);
if (!err) {
inode->i_ctime = inode->i_mtime = current_time(inode);
@@ -989,7 +1012,6 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
if (bytes > count)
bytes = count;
- frame = pos >> frame_bits;
frame_vbo = pos & ~(frame_size - 1);
index = frame_vbo >> PAGE_SHIFT;
diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c
index 18842998c8fa..381a38a06ec2 100644
--- a/fs/ntfs3/frecord.c
+++ b/fs/ntfs3/frecord.c
@@ -7,6 +7,7 @@
#include <linux/fiemap.h>
#include <linux/fs.h>
+#include <linux/minmax.h>
#include <linux/vmalloc.h>
#include "debug.h"
@@ -468,7 +469,7 @@ ni_ins_new_attr(struct ntfs_inode *ni, struct mft_inode *mi,
&ref, &le);
if (err) {
/* No memory or no space. */
- return NULL;
+ return ERR_PTR(err);
}
le_added = true;
@@ -649,6 +650,7 @@ static int ni_try_remove_attr_list(struct ntfs_inode *ni)
struct mft_inode *mi;
u32 asize, free;
struct MFT_REF ref;
+ struct MFT_REC *mrec;
__le16 id;
if (!ni->attr_list.dirty)
@@ -692,11 +694,17 @@ static int ni_try_remove_attr_list(struct ntfs_inode *ni)
free -= asize;
}
+ /* Make a copy of primary record to restore if error. */
+ mrec = kmemdup(ni->mi.mrec, sbi->record_size, GFP_NOFS);
+ if (!mrec)
+ return 0; /* Not critical. */
+
/* It seems that attribute list can be removed from primary record. */
mi_remove_attr(NULL, &ni->mi, attr_list);
/*
- * Repeat the cycle above and move all attributes to primary record.
+ * Repeat the cycle above and copy all attributes to primary record.
+ * Do not remove original attributes from subrecords!
* It should be success!
*/
le = NULL;
@@ -707,14 +715,14 @@ static int ni_try_remove_attr_list(struct ntfs_inode *ni)
mi = ni_find_mi(ni, ino_get(&le->ref));
if (!mi) {
/* Should never happened, 'cause already checked. */
- goto bad;
+ goto out;
}
attr = mi_find_attr(mi, NULL, le->type, le_name(le),
le->name_len, &le->id);
if (!attr) {
/* Should never happened, 'cause already checked. */
- goto bad;
+ goto out;
}
asize = le32_to_cpu(attr->size);
@@ -724,18 +732,33 @@ static int ni_try_remove_attr_list(struct ntfs_inode *ni)
le16_to_cpu(attr->name_off));
if (!attr_ins) {
/*
- * Internal error.
- * Either no space in primary record (already checked).
- * Either tried to insert another
- * non indexed attribute (logic error).
+ * No space in primary record (already checked).
*/
- goto bad;
+ goto out;
}
/* Copy all except id. */
id = attr_ins->id;
memcpy(attr_ins, attr, asize);
attr_ins->id = id;
+ }
+
+ /*
+ * Repeat the cycle above and remove all attributes from subrecords.
+ */
+ le = NULL;
+ while ((le = al_enumerate(ni, le))) {
+ if (!memcmp(&le->ref, &ref, sizeof(ref)))
+ continue;
+
+ mi = ni_find_mi(ni, ino_get(&le->ref));
+ if (!mi)
+ continue;
+
+ attr = mi_find_attr(mi, NULL, le->type, le_name(le),
+ le->name_len, &le->id);
+ if (!attr)
+ continue;
/* Remove from original record. */
mi_remove_attr(NULL, mi, attr);
@@ -748,11 +771,13 @@ static int ni_try_remove_attr_list(struct ntfs_inode *ni)
ni->attr_list.le = NULL;
ni->attr_list.dirty = false;
+ kfree(mrec);
+ return 0;
+out:
+ /* Restore primary record. */
+ swap(mrec, ni->mi.mrec);
+ kfree(mrec);
return 0;
-bad:
- ntfs_inode_err(&ni->vfs_inode, "Internal error");
- make_bad_inode(&ni->vfs_inode);
- return -EINVAL;
}
/*
@@ -986,6 +1011,8 @@ static int ni_ins_attr_ext(struct ntfs_inode *ni, struct ATTR_LIST_ENTRY *le,
name_off, svcn, ins_le);
if (!attr)
continue;
+ if (IS_ERR(attr))
+ return PTR_ERR(attr);
if (ins_attr)
*ins_attr = attr;
@@ -1007,8 +1034,15 @@ insert_ext:
attr = ni_ins_new_attr(ni, mi, le, type, name, name_len, asize,
name_off, svcn, ins_le);
- if (!attr)
+ if (!attr) {
+ err = -EINVAL;
goto out2;
+ }
+
+ if (IS_ERR(attr)) {
+ err = PTR_ERR(attr);
+ goto out2;
+ }
if (ins_attr)
*ins_attr = attr;
@@ -1020,10 +1054,9 @@ insert_ext:
out2:
ni_remove_mi(ni, mi);
mi_put(mi);
- err = -EINVAL;
out1:
- ntfs_mark_rec_free(sbi, rno);
+ ntfs_mark_rec_free(sbi, rno, is_mft);
out:
return err;
@@ -1076,6 +1109,11 @@ static int ni_insert_attr(struct ntfs_inode *ni, enum ATTR_TYPE type,
if (asize <= free) {
attr = ni_ins_new_attr(ni, &ni->mi, NULL, type, name, name_len,
asize, name_off, svcn, ins_le);
+ if (IS_ERR(attr)) {
+ err = PTR_ERR(attr);
+ goto out;
+ }
+
if (attr) {
if (ins_attr)
*ins_attr = attr;
@@ -1173,6 +1211,11 @@ static int ni_insert_attr(struct ntfs_inode *ni, enum ATTR_TYPE type,
goto out;
}
+ if (IS_ERR(attr)) {
+ err = PTR_ERR(attr);
+ goto out;
+ }
+
if (ins_attr)
*ins_attr = attr;
if (ins_mi)
@@ -1218,7 +1261,7 @@ static int ni_expand_mft_list(struct ntfs_inode *ni)
mft_min = mft_new;
mi_min = mi_new;
} else {
- ntfs_mark_rec_free(sbi, mft_new);
+ ntfs_mark_rec_free(sbi, mft_new, true);
mft_new = 0;
ni_remove_mi(ni, mi_new);
}
@@ -1262,7 +1305,7 @@ static int ni_expand_mft_list(struct ntfs_inode *ni)
done = asize - run_size - SIZEOF_NONRESIDENT;
le32_sub_cpu(&ni->mi.mrec->used, done);
- /* Estimate the size of second part: run_buf=NULL. */
+ /* Estimate packed size (run_buf=NULL). */
err = run_pack(run, svcn, evcn + 1 - svcn, NULL, sbi->record_size,
&plen);
if (err < 0)
@@ -1288,10 +1331,16 @@ static int ni_expand_mft_list(struct ntfs_inode *ni)
goto out;
}
+ if (IS_ERR(attr)) {
+ err = PTR_ERR(attr);
+ goto out;
+ }
+
attr->non_res = 1;
attr->name_off = SIZEOF_NONRESIDENT_LE;
attr->flags = 0;
+ /* This function can't fail - cause already checked above. */
run_pack(run, svcn, evcn + 1 - svcn, Add2Ptr(attr, SIZEOF_NONRESIDENT),
run_size, &plen);
@@ -1301,7 +1350,7 @@ static int ni_expand_mft_list(struct ntfs_inode *ni)
out:
if (mft_new) {
- ntfs_mark_rec_free(sbi, mft_new);
+ ntfs_mark_rec_free(sbi, mft_new, true);
ni_remove_mi(ni, mi_new);
}
@@ -1367,8 +1416,6 @@ int ni_expand_list(struct ntfs_inode *ni)
/* Split MFT data as much as possible. */
err = ni_expand_mft_list(ni);
- if (err)
- goto out;
out:
return !err && !done ? -EOPNOTSUPP : err;
@@ -1381,7 +1428,7 @@ int ni_insert_nonresident(struct ntfs_inode *ni, enum ATTR_TYPE type,
const __le16 *name, u8 name_len,
const struct runs_tree *run, CLST svcn, CLST len,
__le16 flags, struct ATTRIB **new_attr,
- struct mft_inode **mi)
+ struct mft_inode **mi, struct ATTR_LIST_ENTRY **le)
{
int err;
CLST plen;
@@ -1394,6 +1441,7 @@ int ni_insert_nonresident(struct ntfs_inode *ni, enum ATTR_TYPE type,
u32 run_size, asize;
struct ntfs_sb_info *sbi = ni->mi.sbi;
+ /* Estimate packed size (run_buf=NULL). */
err = run_pack(run, svcn, len, NULL, sbi->max_bytes_per_attr - run_off,
&plen);
if (err < 0)
@@ -1414,7 +1462,7 @@ int ni_insert_nonresident(struct ntfs_inode *ni, enum ATTR_TYPE type,
}
err = ni_insert_attr(ni, type, name, name_len, asize, name_off, svcn,
- &attr, mi, NULL);
+ &attr, mi, le);
if (err)
goto out;
@@ -1423,12 +1471,12 @@ int ni_insert_nonresident(struct ntfs_inode *ni, enum ATTR_TYPE type,
attr->name_off = cpu_to_le16(name_off);
attr->flags = flags;
+ /* This function can't fail - cause already checked above. */
run_pack(run, svcn, len, Add2Ptr(attr, run_off), run_size, &plen);
attr->nres.svcn = cpu_to_le64(svcn);
attr->nres.evcn = cpu_to_le64((u64)svcn + len - 1);
- err = 0;
if (new_attr)
*new_attr = attr;
@@ -1560,7 +1608,7 @@ int ni_delete_all(struct ntfs_inode *ni)
mi->dirty = true;
mi_write(mi, 0);
- ntfs_mark_rec_free(sbi, mi->rno);
+ ntfs_mark_rec_free(sbi, mi->rno, false);
ni_remove_mi(ni, mi);
mi_put(mi);
node = next;
@@ -1571,7 +1619,7 @@ int ni_delete_all(struct ntfs_inode *ni)
ni->mi.dirty = true;
err = mi_write(&ni->mi, 0);
- ntfs_mark_rec_free(sbi, ni->mi.rno);
+ ntfs_mark_rec_free(sbi, ni->mi.rno, false);
return err;
}
@@ -1589,7 +1637,8 @@ struct ATTR_FILE_NAME *ni_fname_name(struct ntfs_inode *ni,
struct ATTRIB *attr = NULL;
struct ATTR_FILE_NAME *fname;
- *le = NULL;
+ if (le)
+ *le = NULL;
/* Enumerate all names. */
next:
@@ -1605,7 +1654,7 @@ next:
goto next;
if (!uni)
- goto next;
+ return fname;
if (uni->len != fname->name_len)
goto next;
@@ -2302,10 +2351,8 @@ remove_wof:
out:
kfree(pages);
- if (err) {
- make_bad_inode(inode);
- ntfs_set_state(sbi, NTFS_DIRTY_ERROR);
- }
+ if (err)
+ _ntfs_bad_inode(inode);
return err;
}
@@ -2944,7 +2991,7 @@ bool ni_remove_name_undo(struct ntfs_inode *dir_ni, struct ntfs_inode *ni,
}
/*
- * ni_add_name - Add new name in MFT and in directory.
+ * ni_add_name - Add new name into MFT and into directory.
*/
int ni_add_name(struct ntfs_inode *dir_ni, struct ntfs_inode *ni,
struct NTFS_DE *de)
@@ -2953,13 +3000,20 @@ int ni_add_name(struct ntfs_inode *dir_ni, struct ntfs_inode *ni,
struct ATTRIB *attr;
struct ATTR_LIST_ENTRY *le;
struct mft_inode *mi;
+ struct ATTR_FILE_NAME *fname;
struct ATTR_FILE_NAME *de_name = (struct ATTR_FILE_NAME *)(de + 1);
u16 de_key_size = le16_to_cpu(de->key_size);
mi_get_ref(&ni->mi, &de->ref);
mi_get_ref(&dir_ni->mi, &de_name->home);
- /* Insert new name in MFT. */
+ /* Fill duplicate from any ATTR_NAME. */
+ fname = ni_fname_name(ni, NULL, NULL, NULL, NULL);
+ if (fname)
+ memcpy(&de_name->dup, &fname->dup, sizeof(fname->dup));
+ de_name->dup.fa = ni->std_fa;
+
+ /* Insert new name into MFT. */
err = ni_insert_resident(ni, de_key_size, ATTR_NAME, NULL, 0, &attr,
&mi, &le);
if (err)
@@ -2967,7 +3021,7 @@ int ni_add_name(struct ntfs_inode *dir_ni, struct ntfs_inode *ni,
memcpy(Add2Ptr(attr, SIZEOF_RESIDENT), de_name, de_key_size);
- /* Insert new name in directory. */
+ /* Insert new name into directory. */
err = indx_insert_entry(&dir_ni->dir, dir_ni, de, ni->mi.sbi, NULL, 0);
if (err)
ni_remove_attr_le(ni, attr, mi, le);
@@ -2991,7 +3045,7 @@ int ni_rename(struct ntfs_inode *dir_ni, struct ntfs_inode *new_dir_ni,
* 1) Add new name and remove old name.
* 2) Remove old name and add new name.
*
- * In most cases (not all!) adding new name in MFT and in directory can
+ * In most cases (not all!) adding new name into MFT and into directory can
* allocate additional cluster(s).
* Second way may result to bad inode if we can't add new name
* and then can't restore (add) old name.
@@ -3261,7 +3315,7 @@ int ni_write_inode(struct inode *inode, int sync, const char *hint)
err = err2;
if (is_empty) {
- ntfs_mark_rec_free(sbi, mi->rno);
+ ntfs_mark_rec_free(sbi, mi->rno, false);
rb_erase(node, &ni->mi_tree);
mi_put(mi);
}
diff --git a/fs/ntfs3/fslog.c b/fs/ntfs3/fslog.c
index 49b7df616778..e7c494005122 100644
--- a/fs/ntfs3/fslog.c
+++ b/fs/ntfs3/fslog.c
@@ -3843,6 +3843,8 @@ int log_replay(struct ntfs_inode *ni, bool *initialized)
memset(&rst_info2, 0, sizeof(struct restart_info));
err = log_read_rst(log, l_size, false, &rst_info2);
+ if (err)
+ goto out;
/* Determine which restart area to use. */
if (!rst_info2.restart || rst_info2.last_lsn <= rst_info.last_lsn)
@@ -5057,7 +5059,7 @@ undo_action_next:
goto add_allocated_vcns;
vcn = le64_to_cpu(lrh->target_vcn);
- vcn &= ~(log->clst_per_page - 1);
+ vcn &= ~(u64)(log->clst_per_page - 1);
add_allocated_vcns:
for (i = 0, vcn = le64_to_cpu(lrh->target_vcn),
diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c
index 1835e35199c2..4ed15f64b17f 100644
--- a/fs/ntfs3/fsntfs.c
+++ b/fs/ntfs3/fsntfs.c
@@ -703,12 +703,14 @@ out:
/*
* ntfs_mark_rec_free - Mark record as free.
+ * is_mft - true if we are changing MFT
*/
-void ntfs_mark_rec_free(struct ntfs_sb_info *sbi, CLST rno)
+void ntfs_mark_rec_free(struct ntfs_sb_info *sbi, CLST rno, bool is_mft)
{
struct wnd_bitmap *wnd = &sbi->mft.bitmap;
- down_write_nested(&wnd->rw_lock, BITMAP_MUTEX_MFT);
+ if (!is_mft)
+ down_write_nested(&wnd->rw_lock, BITMAP_MUTEX_MFT);
if (rno >= wnd->nbits)
goto out;
@@ -727,7 +729,8 @@ void ntfs_mark_rec_free(struct ntfs_sb_info *sbi, CLST rno)
sbi->mft.next_free = rno;
out:
- up_write(&wnd->rw_lock);
+ if (!is_mft)
+ up_write(&wnd->rw_lock);
}
/*
@@ -780,7 +783,7 @@ out:
*/
int ntfs_refresh_zone(struct ntfs_sb_info *sbi)
{
- CLST zone_limit, zone_max, lcn, vcn, len;
+ CLST lcn, vcn, len;
size_t lcn_s, zlen;
struct wnd_bitmap *wnd = &sbi->used.bitmap;
struct ntfs_inode *ni = sbi->mft.ni;
@@ -789,16 +792,6 @@ int ntfs_refresh_zone(struct ntfs_sb_info *sbi)
if (wnd_zone_len(wnd))
return 0;
- /*
- * Compute the MFT zone at two steps.
- * It would be nice if we are able to allocate 1/8 of
- * total clusters for MFT but not more then 512 MB.
- */
- zone_limit = (512 * 1024 * 1024) >> sbi->cluster_bits;
- zone_max = wnd->nbits >> 3;
- if (zone_max > zone_limit)
- zone_max = zone_limit;
-
vcn = bytes_to_cluster(sbi,
(u64)sbi->mft.bitmap.nbits << sbi->record_bits);
@@ -812,13 +805,7 @@ int ntfs_refresh_zone(struct ntfs_sb_info *sbi)
lcn_s = lcn + 1;
/* Try to allocate clusters after last MFT run. */
- zlen = wnd_find(wnd, zone_max, lcn_s, 0, &lcn_s);
- if (!zlen) {
- ntfs_notice(sbi->sb, "MftZone: unavailable");
- return 0;
- }
-
- /* Truncate too large zone. */
+ zlen = wnd_find(wnd, sbi->zone_max, lcn_s, 0, &lcn_s);
wnd_zone_set(wnd, lcn_s, zlen);
return 0;
@@ -827,16 +814,21 @@ int ntfs_refresh_zone(struct ntfs_sb_info *sbi)
/*
* ntfs_update_mftmirr - Update $MFTMirr data.
*/
-int ntfs_update_mftmirr(struct ntfs_sb_info *sbi, int wait)
+void ntfs_update_mftmirr(struct ntfs_sb_info *sbi, int wait)
{
int err;
struct super_block *sb = sbi->sb;
- u32 blocksize = sb->s_blocksize;
+ u32 blocksize;
sector_t block1, block2;
u32 bytes;
+ if (!sb)
+ return;
+
+ blocksize = sb->s_blocksize;
+
if (!(sbi->flags & NTFS_FLAGS_MFTMIRR))
- return 0;
+ return;
err = 0;
bytes = sbi->mft.recs_mirr << sbi->record_bits;
@@ -847,16 +839,13 @@ int ntfs_update_mftmirr(struct ntfs_sb_info *sbi, int wait)
struct buffer_head *bh1, *bh2;
bh1 = sb_bread(sb, block1++);
- if (!bh1) {
- err = -EIO;
- goto out;
- }
+ if (!bh1)
+ return;
bh2 = sb_getblk(sb, block2++);
if (!bh2) {
put_bh(bh1);
- err = -EIO;
- goto out;
+ return;
}
if (buffer_locked(bh2))
@@ -876,13 +865,24 @@ int ntfs_update_mftmirr(struct ntfs_sb_info *sbi, int wait)
put_bh(bh2);
if (err)
- goto out;
+ return;
}
sbi->flags &= ~NTFS_FLAGS_MFTMIRR;
+}
-out:
- return err;
+/*
+ * ntfs_bad_inode
+ *
+ * Marks inode as bad and marks fs as 'dirty'
+ */
+void ntfs_bad_inode(struct inode *inode, const char *hint)
+{
+ struct ntfs_sb_info *sbi = inode->i_sb->s_fs_info;
+
+ ntfs_inode_err(inode, "%s", hint);
+ make_bad_inode(inode);
+ ntfs_set_state(sbi, NTFS_DIRTY_ERROR);
}
/*
@@ -1395,7 +1395,7 @@ int ntfs_write_bh(struct ntfs_sb_info *sbi, struct NTFS_RECORD_HEADER *rhdr,
if (buffer_locked(bh))
__wait_on_buffer(bh);
- lock_buffer(nb->bh[idx]);
+ lock_buffer(bh);
bh_data = bh->b_data + off;
end_data = Add2Ptr(bh_data, op);
@@ -2424,7 +2424,7 @@ static inline void ntfs_unmap_and_discard(struct ntfs_sb_info *sbi, CLST lcn,
void mark_as_free_ex(struct ntfs_sb_info *sbi, CLST lcn, CLST len, bool trim)
{
- CLST end, i;
+ CLST end, i, zone_len, zlen;
struct wnd_bitmap *wnd = &sbi->used.bitmap;
down_write_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS);
@@ -2459,6 +2459,28 @@ void mark_as_free_ex(struct ntfs_sb_info *sbi, CLST lcn, CLST len, bool trim)
ntfs_unmap_and_discard(sbi, lcn, len);
wnd_set_free(wnd, lcn, len);
+ /* append to MFT zone, if possible. */
+ zone_len = wnd_zone_len(wnd);
+ zlen = min(zone_len + len, sbi->zone_max);
+
+ if (zlen == zone_len) {
+ /* MFT zone already has maximum size. */
+ } else if (!zone_len) {
+ /* Create MFT zone only if 'zlen' is large enough. */
+ if (zlen == sbi->zone_max)
+ wnd_zone_set(wnd, lcn, zlen);
+ } else {
+ CLST zone_lcn = wnd_zone_bit(wnd);
+
+ if (lcn + len == zone_lcn) {
+ /* Append into head MFT zone. */
+ wnd_zone_set(wnd, lcn, zlen);
+ } else if (zone_lcn + zone_len == lcn) {
+ /* Append into tail MFT zone. */
+ wnd_zone_set(wnd, zone_lcn, zlen);
+ }
+ }
+
out:
up_write(&wnd->rw_lock);
}
diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c
index 6f81e3a49abf..440328147e7e 100644
--- a/fs/ntfs3/index.c
+++ b/fs/ntfs3/index.c
@@ -1042,19 +1042,16 @@ int indx_find(struct ntfs_index *indx, struct ntfs_inode *ni,
{
int err;
struct NTFS_DE *e;
- const struct INDEX_HDR *hdr;
struct indx_node *node;
if (!root)
root = indx_get_root(&ni->dir, ni, NULL, NULL);
if (!root) {
- err = -EINVAL;
- goto out;
+ /* Should not happen. */
+ return -EINVAL;
}
- hdr = &root->ihdr;
-
/* Check cache. */
e = fnd->level ? fnd->de[fnd->level - 1] : fnd->root_de;
if (e && !de_is_last(e) &&
@@ -1068,39 +1065,35 @@ int indx_find(struct ntfs_index *indx, struct ntfs_inode *ni,
fnd_clear(fnd);
/* Lookup entry that is <= to the search value. */
- e = hdr_find_e(indx, hdr, key, key_len, ctx, diff);
+ e = hdr_find_e(indx, &root->ihdr, key, key_len, ctx, diff);
if (!e)
return -EINVAL;
fnd->root_de = e;
- err = 0;
for (;;) {
node = NULL;
- if (*diff >= 0 || !de_has_vcn_ex(e)) {
- *entry = e;
- goto out;
- }
+ if (*diff >= 0 || !de_has_vcn_ex(e))
+ break;
/* Read next level. */
err = indx_read(indx, ni, de_get_vbn(e), &node);
if (err)
- goto out;
+ return err;
/* Lookup entry that is <= to the search value. */
e = hdr_find_e(indx, &node->index->ihdr, key, key_len, ctx,
diff);
if (!e) {
- err = -EINVAL;
put_indx_node(node);
- goto out;
+ return -EINVAL;
}
fnd_push(fnd, node, e);
}
-out:
- return err;
+ *entry = e;
+ return 0;
}
int indx_find_sort(struct ntfs_index *indx, struct ntfs_inode *ni,
@@ -1354,7 +1347,7 @@ static int indx_create_allocate(struct ntfs_index *indx, struct ntfs_inode *ni,
goto out;
err = ni_insert_nonresident(ni, ATTR_ALLOC, in->name, in->name_len,
- &run, 0, len, 0, &alloc, NULL);
+ &run, 0, len, 0, &alloc, NULL, NULL);
if (err)
goto out1;
@@ -1685,8 +1678,8 @@ indx_insert_into_buffer(struct ntfs_index *indx, struct ntfs_inode *ni,
{
int err;
const struct NTFS_DE *sp;
- struct NTFS_DE *e, *de_t, *up_e = NULL;
- struct indx_node *n2 = NULL;
+ struct NTFS_DE *e, *de_t, *up_e;
+ struct indx_node *n2;
struct indx_node *n1 = fnd->nodes[level];
struct INDEX_HDR *hdr1 = &n1->index->ihdr;
struct INDEX_HDR *hdr2;
@@ -1994,7 +1987,7 @@ static int indx_free_children(struct ntfs_index *indx, struct ntfs_inode *ni,
const struct NTFS_DE *e, bool trim)
{
int err;
- struct indx_node *n;
+ struct indx_node *n = NULL;
struct INDEX_HDR *hdr;
CLST vbn = de_get_vbn(e);
size_t i;
diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c
index 80104afeb2cd..51363d4e8636 100644
--- a/fs/ntfs3/inode.c
+++ b/fs/ntfs3/inode.c
@@ -430,6 +430,7 @@ end_enum:
} else if (fname && fname->home.low == cpu_to_le32(MFT_REC_EXTEND) &&
fname->home.seq == cpu_to_le16(MFT_REC_EXTEND)) {
/* Records in $Extend are not a files or general directories. */
+ inode->i_op = &ntfs_file_inode_operations;
} else {
err = -EINVAL;
goto out;
@@ -500,7 +501,7 @@ struct inode *ntfs_iget5(struct super_block *sb, const struct MFT_REF *ref,
inode = ntfs_read_mft(inode, name, ref);
else if (ref->seq != ntfs_i(inode)->mi.mrec->seq) {
/* Inode overlaps? */
- make_bad_inode(inode);
+ _ntfs_bad_inode(inode);
}
return inode;
@@ -1632,7 +1633,7 @@ out4:
ni->mi.dirty = false;
discard_new_inode(inode);
out3:
- ntfs_mark_rec_free(sbi, ino);
+ ntfs_mark_rec_free(sbi, ino, false);
out2:
__putname(new_de);
@@ -1655,7 +1656,6 @@ int ntfs_link_inode(struct inode *inode, struct dentry *dentry)
struct ntfs_inode *ni = ntfs_i(inode);
struct ntfs_sb_info *sbi = inode->i_sb->s_fs_info;
struct NTFS_DE *de;
- struct ATTR_FILE_NAME *de_name;
/* Allocate PATH_MAX bytes. */
de = __getname();
@@ -1670,15 +1670,6 @@ int ntfs_link_inode(struct inode *inode, struct dentry *dentry)
if (err)
goto out;
- de_name = (struct ATTR_FILE_NAME *)(de + 1);
- /* Fill duplicate info. */
- de_name->dup.cr_time = de_name->dup.m_time = de_name->dup.c_time =
- de_name->dup.a_time = kernel2nt(&inode->i_ctime);
- de_name->dup.alloc_size = de_name->dup.data_size =
- cpu_to_le64(inode->i_size);
- de_name->dup.fa = ni->std_fa;
- de_name->dup.ea_size = de_name->dup.reparse = 0;
-
err = ni_add_name(ntfs_i(d_inode(dentry->d_parent)), ni, de);
out:
__putname(de);
@@ -1731,9 +1722,7 @@ int ntfs_unlink_inode(struct inode *dir, const struct dentry *dentry)
if (inode->i_nlink)
mark_inode_dirty(inode);
} else if (!ni_remove_name_undo(dir_ni, ni, de, de2, undo_remove)) {
- make_bad_inode(inode);
- ntfs_inode_err(inode, "failed to undo unlink");
- ntfs_set_state(sbi, NTFS_DIRTY_ERROR);
+ _ntfs_bad_inode(inode);
} else {
if (ni_is_dirty(dir))
mark_inode_dirty(dir);
diff --git a/fs/ntfs3/namei.c b/fs/ntfs3/namei.c
index bc741213ad84..bc22cc321a74 100644
--- a/fs/ntfs3/namei.c
+++ b/fs/ntfs3/namei.c
@@ -208,7 +208,7 @@ static int ntfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
}
/*
- * ntfs_rmdir - inode_operations::rm_dir
+ * ntfs_rmdir - inode_operations::rmdir
*/
static int ntfs_rmdir(struct inode *dir, struct dentry *dentry)
{
@@ -308,9 +308,7 @@ static int ntfs_rename(struct user_namespace *mnt_userns, struct inode *dir,
err = ni_rename(dir_ni, new_dir_ni, ni, de, new_de, &is_bad);
if (is_bad) {
/* Restore after failed rename failed too. */
- make_bad_inode(inode);
- ntfs_inode_err(inode, "failed to undo rename");
- ntfs_set_state(sbi, NTFS_DIRTY_ERROR);
+ _ntfs_bad_inode(inode);
} else if (!err) {
inode->i_ctime = dir->i_ctime = dir->i_mtime =
current_time(dir);
diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h
index 8dbdca03e1af..2c791222c4e2 100644
--- a/fs/ntfs3/ntfs_fs.h
+++ b/fs/ntfs3/ntfs_fs.h
@@ -220,6 +220,7 @@ struct ntfs_sb_info {
u32 flags; // See NTFS_FLAGS_XXX.
+ CLST zone_max; // Maximum MFT zone length in clusters
CLST bad_clusters; // The count of marked bad clusters.
u16 max_bytes_per_attr; // Maximum attribute size in record.
@@ -408,8 +409,6 @@ enum REPARSE_SIGN {
};
/* Functions from attrib.c */
-int attr_load_runs(struct ATTRIB *attr, struct ntfs_inode *ni,
- struct runs_tree *run, const CLST *vcn);
int attr_allocate_clusters(struct ntfs_sb_info *sbi, struct runs_tree *run,
CLST vcn, CLST lcn, CLST len, CLST *pre_alloc,
enum ALLOCATE_OPT opt, CLST *alen, const size_t fr,
@@ -440,6 +439,7 @@ int attr_is_frame_compressed(struct ntfs_inode *ni, struct ATTRIB *attr,
int attr_allocate_frame(struct ntfs_inode *ni, CLST frame, size_t compr_size,
u64 new_valid);
int attr_collapse_range(struct ntfs_inode *ni, u64 vbo, u64 bytes);
+int attr_insert_range(struct ntfs_inode *ni, u64 vbo, u64 bytes);
int attr_punch_hole(struct ntfs_inode *ni, u64 vbo, u64 bytes, u32 *frame_size);
/* Functions from attrlist.c */
@@ -528,7 +528,7 @@ int ni_insert_nonresident(struct ntfs_inode *ni, enum ATTR_TYPE type,
const __le16 *name, u8 name_len,
const struct runs_tree *run, CLST svcn, CLST len,
__le16 flags, struct ATTRIB **new_attr,
- struct mft_inode **mi);
+ struct mft_inode **mi, struct ATTR_LIST_ENTRY **le);
int ni_insert_resident(struct ntfs_inode *ni, u32 data_size,
enum ATTR_TYPE type, const __le16 *name, u8 name_len,
struct ATTRIB **new_attr, struct mft_inode **mi,
@@ -589,10 +589,12 @@ int ntfs_look_for_free_space(struct ntfs_sb_info *sbi, CLST lcn, CLST len,
enum ALLOCATE_OPT opt);
int ntfs_look_free_mft(struct ntfs_sb_info *sbi, CLST *rno, bool mft,
struct ntfs_inode *ni, struct mft_inode **mi);
-void ntfs_mark_rec_free(struct ntfs_sb_info *sbi, CLST rno);
+void ntfs_mark_rec_free(struct ntfs_sb_info *sbi, CLST rno, bool is_mft);
int ntfs_clear_mft_tail(struct ntfs_sb_info *sbi, size_t from, size_t to);
int ntfs_refresh_zone(struct ntfs_sb_info *sbi);
-int ntfs_update_mftmirr(struct ntfs_sb_info *sbi, int wait);
+void ntfs_update_mftmirr(struct ntfs_sb_info *sbi, int wait);
+void ntfs_bad_inode(struct inode *inode, const char *hint);
+#define _ntfs_bad_inode(i) ntfs_bad_inode(i, __func__)
enum NTFS_DIRTY_FLAGS {
NTFS_DIRTY_CLEAR = 0,
NTFS_DIRTY_DIRTY = 1,
@@ -738,7 +740,6 @@ static inline struct ATTRIB *rec_find_attr_le(struct mft_inode *rec,
int mi_write(struct mft_inode *mi, int wait);
int mi_format_new(struct mft_inode *mi, struct ntfs_sb_info *sbi, CLST rno,
__le16 flags, bool is_mft);
-void mi_mark_free(struct mft_inode *mi);
struct ATTRIB *mi_insert_attr(struct mft_inode *mi, enum ATTR_TYPE type,
const __le16 *name, u8 name_len, u32 asize,
u16 name_off);
@@ -780,10 +781,10 @@ bool run_lookup_entry(const struct runs_tree *run, CLST vcn, CLST *lcn,
void run_truncate(struct runs_tree *run, CLST vcn);
void run_truncate_head(struct runs_tree *run, CLST vcn);
void run_truncate_around(struct runs_tree *run, CLST vcn);
-bool run_lookup(const struct runs_tree *run, CLST vcn, size_t *Index);
bool run_add_entry(struct runs_tree *run, CLST vcn, CLST lcn, CLST len,
bool is_mft);
bool run_collapse_range(struct runs_tree *run, CLST vcn, CLST len);
+bool run_insert_range(struct runs_tree *run, CLST vcn, CLST len);
bool run_get_entry(const struct runs_tree *run, size_t index, CLST *vcn,
CLST *lcn, CLST *len);
bool run_is_mapped_full(const struct runs_tree *run, CLST svcn, CLST evcn);
@@ -802,6 +803,7 @@ int run_unpack_ex(struct runs_tree *run, struct ntfs_sb_info *sbi, CLST ino,
#define run_unpack_ex run_unpack
#endif
int run_get_highest_vcn(CLST vcn, const u8 *run_buf, u64 *highest_vcn);
+int run_clone(const struct runs_tree *run, struct runs_tree *new_run);
/* Globals from super.c */
void *ntfs_set_shared(void *ptr, u32 bytes);
diff --git a/fs/ntfs3/record.c b/fs/ntfs3/record.c
index 861e35791506..7d2fac5ee215 100644
--- a/fs/ntfs3/record.c
+++ b/fs/ntfs3/record.c
@@ -395,28 +395,6 @@ int mi_format_new(struct mft_inode *mi, struct ntfs_sb_info *sbi, CLST rno,
}
/*
- * mi_mark_free - Mark record as unused and marks it as free in bitmap.
- */
-void mi_mark_free(struct mft_inode *mi)
-{
- CLST rno = mi->rno;
- struct ntfs_sb_info *sbi = mi->sbi;
-
- if (rno >= MFT_REC_RESERVED && rno < MFT_REC_FREE) {
- ntfs_clear_mft_tail(sbi, rno, rno + 1);
- mi->dirty = false;
- return;
- }
-
- if (mi->mrec) {
- clear_rec_inuse(mi->mrec);
- mi->dirty = true;
- mi_write(mi, 0);
- }
- ntfs_mark_rec_free(sbi, rno);
-}
-
-/*
* mi_insert_attr - Reserve space for new attribute.
*
* Return: Not full constructed attribute or NULL if not possible to create.
@@ -445,12 +423,11 @@ struct ATTRIB *mi_insert_attr(struct mft_inode *mi, enum ATTR_TYPE type,
attr = NULL;
while ((attr = mi_enum_attr(mi, attr))) {
diff = compare_attr(attr, type, name, name_len, upcase);
- if (diff > 0)
- break;
+
if (diff < 0)
continue;
- if (!is_attr_indexed(attr))
+ if (!diff && !is_attr_indexed(attr))
return NULL;
break;
}
diff --git a/fs/ntfs3/run.c b/fs/ntfs3/run.c
index a8fec651f973..aaaa0d3d35a2 100644
--- a/fs/ntfs3/run.c
+++ b/fs/ntfs3/run.c
@@ -31,7 +31,7 @@ struct ntfs_run {
* Case of entry missing from list 'index' will be set to
* point to insertion position for the entry question.
*/
-bool run_lookup(const struct runs_tree *run, CLST vcn, size_t *index)
+static bool run_lookup(const struct runs_tree *run, CLST vcn, size_t *index)
{
size_t min_idx, max_idx, mid_idx;
struct ntfs_run *r;
@@ -547,6 +547,48 @@ bool run_collapse_range(struct runs_tree *run, CLST vcn, CLST len)
return true;
}
+/* run_insert_range
+ *
+ * Helper for attr_insert_range(),
+ * which is helper for fallocate(insert_range).
+ */
+bool run_insert_range(struct runs_tree *run, CLST vcn, CLST len)
+{
+ size_t index;
+ struct ntfs_run *r, *e;
+
+ if (WARN_ON(!run_lookup(run, vcn, &index)))
+ return false; /* Should never be here. */
+
+ e = run->runs + run->count;
+ r = run->runs + index;
+
+ if (vcn > r->vcn)
+ r += 1;
+
+ for (; r < e; r++)
+ r->vcn += len;
+
+ r = run->runs + index;
+
+ if (vcn > r->vcn) {
+ /* split fragment. */
+ CLST len1 = vcn - r->vcn;
+ CLST len2 = r->len - len1;
+ CLST lcn2 = r->lcn == SPARSE_LCN ? SPARSE_LCN : (r->lcn + len1);
+
+ r->len = len1;
+
+ if (!run_add_entry(run, vcn + len, lcn2, len2, false))
+ return false;
+ }
+
+ if (!run_add_entry(run, vcn, SPARSE_LCN, len, false))
+ return false;
+
+ return true;
+}
+
/*
* run_get_entry - Return index-th mapped region.
*/
@@ -778,26 +820,36 @@ int run_pack(const struct runs_tree *run, CLST svcn, CLST len, u8 *run_buf,
CLST next_vcn, vcn, lcn;
CLST prev_lcn = 0;
CLST evcn1 = svcn + len;
+ const struct ntfs_run *r, *r_end;
int packed_size = 0;
size_t i;
- bool ok;
s64 dlcn;
int offset_size, size_size, tmp;
- next_vcn = vcn = svcn;
-
*packed_vcns = 0;
if (!len)
goto out;
- ok = run_lookup_entry(run, vcn, &lcn, &len, &i);
+ /* Check all required entries [svcn, encv1) available. */
+ if (!run_lookup(run, svcn, &i))
+ return -ENOENT;
+
+ r_end = run->runs + run->count;
+ r = run->runs + i;
- if (!ok)
- goto error;
+ for (next_vcn = r->vcn + r->len; next_vcn < evcn1;
+ next_vcn = r->vcn + r->len) {
+ if (++r >= r_end || r->vcn != next_vcn)
+ return -ENOENT;
+ }
- if (next_vcn != vcn)
- goto error;
+ /* Repeat cycle above and pack runs. Assume no errors. */
+ r = run->runs + i;
+ len = svcn - r->vcn;
+ vcn = svcn;
+ lcn = r->lcn == SPARSE_LCN ? SPARSE_LCN : (r->lcn + len);
+ len = r->len - len;
for (;;) {
next_vcn = vcn + len;
@@ -846,12 +898,10 @@ int run_pack(const struct runs_tree *run, CLST svcn, CLST len, u8 *run_buf,
if (packed_size + 1 >= run_buf_size || next_vcn >= evcn1)
goto out;
- ok = run_get_entry(run, ++i, &vcn, &lcn, &len);
- if (!ok)
- goto error;
-
- if (next_vcn != vcn)
- goto error;
+ r += 1;
+ vcn = r->vcn;
+ lcn = r->lcn;
+ len = r->len;
}
out:
@@ -860,9 +910,6 @@ out:
run_buf[0] = 0;
return packed_size + 1;
-
-error:
- return -EOPNOTSUPP;
}
/*
@@ -1109,3 +1156,28 @@ int run_get_highest_vcn(CLST vcn, const u8 *run_buf, u64 *highest_vcn)
*highest_vcn = vcn64 - 1;
return 0;
}
+
+/*
+ * run_clone
+ *
+ * Make a copy of run
+ */
+int run_clone(const struct runs_tree *run, struct runs_tree *new_run)
+{
+ size_t bytes = run->count * sizeof(struct ntfs_run);
+
+ if (bytes > new_run->allocated) {
+ struct ntfs_run *new_ptr = kvmalloc(bytes, GFP_KERNEL);
+
+ if (!new_ptr)
+ return -ENOMEM;
+
+ kvfree(new_run->runs);
+ new_run->runs = new_ptr;
+ new_run->allocated = bytes;
+ }
+
+ memcpy(new_run->runs, run->runs, bytes);
+ new_run->count = run->count;
+ return 0;
+}
diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c
index 0c6de6287737..47012c9bf505 100644
--- a/fs/ntfs3/super.c
+++ b/fs/ntfs3/super.c
@@ -30,6 +30,7 @@
#include <linux/fs_context.h>
#include <linux/fs_parser.h>
#include <linux/log2.h>
+#include <linux/minmax.h>
#include <linux/module.h>
#include <linux/nls.h>
#include <linux/seq_file.h>
@@ -390,7 +391,7 @@ static int ntfs_fs_reconfigure(struct fs_context *fc)
return -EINVAL;
}
- memcpy(sbi->options, new_opts, sizeof(*new_opts));
+ swap(sbi->options, fc->fs_private);
return 0;
}
@@ -870,6 +871,13 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size,
sb->s_maxbytes = 0xFFFFFFFFull << sbi->cluster_bits;
#endif
+ /*
+ * Compute the MFT zone at two steps.
+ * It would be nice if we are able to allocate 1/8 of
+ * total clusters for MFT but not more then 512 MB.
+ */
+ sbi->zone_max = min_t(CLST, 0x20000000 >> sbi->cluster_bits, clusters >> 3);
+
err = 0;
out:
@@ -900,6 +908,8 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc)
ref.high = 0;
sbi->sb = sb;
+ sbi->options = fc->fs_private;
+ fc->fs_private = NULL;
sb->s_flags |= SB_NODIRATIME;
sb->s_magic = 0x7366746e; // "ntfs"
sb->s_op = &ntfs_sops;
@@ -1262,8 +1272,6 @@ load_root:
goto put_inode_out;
}
- fc->fs_private = NULL;
-
return 0;
put_inode_out:
@@ -1378,7 +1386,7 @@ static const struct fs_context_operations ntfs_context_ops = {
/*
* ntfs_init_fs_context - Initialize spi and opts
*
- * This will called when mount/remount. We will first initiliaze
+ * This will called when mount/remount. We will first initialize
* options so that if remount we can use just that.
*/
static int ntfs_init_fs_context(struct fs_context *fc)
@@ -1416,7 +1424,6 @@ static int ntfs_init_fs_context(struct fs_context *fc)
mutex_init(&sbi->compress.mtx_lzx);
#endif
- sbi->options = opts;
fc->s_fs_info = sbi;
ok:
fc->fs_private = opts;
diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c
index 5e0e0280e70d..6ae1f56b7358 100644
--- a/fs/ntfs3/xattr.c
+++ b/fs/ntfs3/xattr.c
@@ -118,7 +118,7 @@ static int ntfs_read_ea(struct ntfs_inode *ni, struct EA_FULL **ea,
run_init(&run);
- err = attr_load_runs(attr_ea, ni, &run, NULL);
+ err = attr_load_runs_range(ni, ATTR_EA, NULL, 0, &run, 0, size);
if (!err)
err = ntfs_read_run_nb(sbi, &run, 0, ea_p, size, NULL);
run_close(&run);
@@ -444,6 +444,11 @@ update_ea:
/* Delete xattr, ATTR_EA */
ni_remove_attr_le(ni, attr, mi, le);
} else if (attr->non_res) {
+ err = attr_load_runs_range(ni, ATTR_EA, NULL, 0, &ea_run, 0,
+ size);
+ if (err)
+ goto out;
+
err = ntfs_sb_write_run(sbi, &ea_run, 0, ea_all, size, 0);
if (err)
goto out;
@@ -478,8 +483,7 @@ out:
}
#ifdef CONFIG_NTFS3_FS_POSIX_ACL
-static struct posix_acl *ntfs_get_acl_ex(struct user_namespace *mnt_userns,
- struct inode *inode, int type,
+static struct posix_acl *ntfs_get_acl_ex(struct inode *inode, int type,
int locked)
{
struct ntfs_inode *ni = ntfs_i(inode);
@@ -514,7 +518,7 @@ static struct posix_acl *ntfs_get_acl_ex(struct user_namespace *mnt_userns,
/* Translate extended attribute to acl. */
if (err >= 0) {
- acl = posix_acl_from_xattr(mnt_userns, buf, err);
+ acl = posix_acl_from_xattr(&init_user_ns, buf, err);
} else if (err == -ENODATA) {
acl = NULL;
} else {
@@ -537,8 +541,7 @@ struct posix_acl *ntfs_get_acl(struct inode *inode, int type, bool rcu)
if (rcu)
return ERR_PTR(-ECHILD);
- /* TODO: init_user_ns? */
- return ntfs_get_acl_ex(&init_user_ns, inode, type, 0);
+ return ntfs_get_acl_ex(inode, type, 0);
}
static noinline int ntfs_set_acl_ex(struct user_namespace *mnt_userns,
@@ -547,28 +550,23 @@ static noinline int ntfs_set_acl_ex(struct user_namespace *mnt_userns,
{
const char *name;
size_t size, name_len;
- void *value = NULL;
- int err = 0;
+ void *value;
+ int err;
int flags;
+ umode_t mode;
if (S_ISLNK(inode->i_mode))
return -EOPNOTSUPP;
+ mode = inode->i_mode;
switch (type) {
case ACL_TYPE_ACCESS:
/* Do not change i_mode if we are in init_acl */
if (acl && !init_acl) {
- umode_t mode;
-
err = posix_acl_update_mode(mnt_userns, inode, &mode,
&acl);
if (err)
- goto out;
-
- if (inode->i_mode != mode) {
- inode->i_mode = mode;
- mark_inode_dirty(inode);
- }
+ return err;
}
name = XATTR_NAME_POSIX_ACL_ACCESS;
name_len = sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1;
@@ -595,7 +593,7 @@ static noinline int ntfs_set_acl_ex(struct user_namespace *mnt_userns,
value = kmalloc(size, GFP_NOFS);
if (!value)
return -ENOMEM;
- err = posix_acl_to_xattr(mnt_userns, acl, value, size);
+ err = posix_acl_to_xattr(&init_user_ns, acl, value, size);
if (err < 0)
goto out;
flags = 0;
@@ -604,8 +602,13 @@ static noinline int ntfs_set_acl_ex(struct user_namespace *mnt_userns,
err = ntfs_set_ea(inode, name, name_len, value, size, flags, 0);
if (err == -ENODATA && !size)
err = 0; /* Removing non existed xattr. */
- if (!err)
+ if (!err) {
set_cached_acl(inode, type, acl);
+ if (inode->i_mode != mode) {
+ inode->i_mode = mode;
+ mark_inode_dirty(inode);
+ }
+ }
out:
kfree(value);
@@ -641,7 +644,7 @@ static int ntfs_xattr_get_acl(struct user_namespace *mnt_userns,
if (!acl)
return -ENODATA;
- err = posix_acl_to_xattr(mnt_userns, acl, buffer, size);
+ err = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
posix_acl_release(acl);
return err;
@@ -665,12 +668,12 @@ static int ntfs_xattr_set_acl(struct user_namespace *mnt_userns,
if (!value) {
acl = NULL;
} else {
- acl = posix_acl_from_xattr(mnt_userns, value, size);
+ acl = posix_acl_from_xattr(&init_user_ns, value, size);
if (IS_ERR(acl))
return PTR_ERR(acl);
if (acl) {
- err = posix_acl_valid(mnt_userns, acl);
+ err = posix_acl_valid(&init_user_ns, acl);
if (err)
goto release_and_out;
}
@@ -706,13 +709,13 @@ int ntfs_init_acl(struct user_namespace *mnt_userns, struct inode *inode,
inode->i_default_acl = NULL;
}
- if (!acl)
- inode->i_acl = NULL;
- else {
+ if (acl) {
if (!err)
err = ntfs_set_acl_ex(mnt_userns, inode, acl,
ACL_TYPE_ACCESS, true);
posix_acl_release(acl);
+ } else {
+ inode->i_acl = NULL;
}
return err;
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 801e60bab955..c28bc983a7b1 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -3403,10 +3403,12 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb,
ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres);
ocfs2_lock_res_free(&osb->osb_orphan_scan.os_lockres);
- ocfs2_cluster_disconnect(osb->cconn, hangup_pending);
- osb->cconn = NULL;
+ if (osb->cconn) {
+ ocfs2_cluster_disconnect(osb->cconn, hangup_pending);
+ osb->cconn = NULL;
- ocfs2_dlm_shutdown_debug(osb);
+ ocfs2_dlm_shutdown_debug(osb);
+ }
}
static int ocfs2_drop_lock(struct ocfs2_super *osb,
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 013a727bd7c8..e2cc9eec287c 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1914,8 +1914,7 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
!ocfs2_is_hard_readonly(osb))
hangup_needed = 1;
- if (osb->cconn)
- ocfs2_dlm_shutdown(osb, hangup_needed);
+ ocfs2_dlm_shutdown(osb, hangup_needed);
ocfs2_blockcheck_stats_debugfs_remove(&osb->osb_ecc_stats);
debugfs_remove_recursive(osb->osb_debug_root);
diff --git a/fs/open.c b/fs/open.c
index 8a813fa5ca56..cf7e5c350a54 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -716,6 +716,8 @@ int chown_common(const struct path *path, uid_t user, gid_t group)
fs_userns = i_user_ns(inode);
retry_deleg:
+ newattrs.ia_vfsuid = INVALID_VFSUID;
+ newattrs.ia_vfsgid = INVALID_VFSGID;
newattrs.ia_valid = ATTR_CTIME;
if ((user != (uid_t)-1) && !setattr_vfsuid(&newattrs, uid))
return -EINVAL;
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index b45fea69fff3..0fbcb590af84 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -460,9 +460,12 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
* of the POSIX ACLs retrieved from the lower layer to this function to not
* alter the POSIX ACLs for the underlying filesystem.
*/
-static void ovl_idmap_posix_acl(struct user_namespace *mnt_userns,
+static void ovl_idmap_posix_acl(struct inode *realinode,
+ struct user_namespace *mnt_userns,
struct posix_acl *acl)
{
+ struct user_namespace *fs_userns = i_user_ns(realinode);
+
for (unsigned int i = 0; i < acl->a_count; i++) {
vfsuid_t vfsuid;
vfsgid_t vfsgid;
@@ -470,11 +473,11 @@ static void ovl_idmap_posix_acl(struct user_namespace *mnt_userns,
struct posix_acl_entry *e = &acl->a_entries[i];
switch (e->e_tag) {
case ACL_USER:
- vfsuid = make_vfsuid(mnt_userns, &init_user_ns, e->e_uid);
+ vfsuid = make_vfsuid(mnt_userns, fs_userns, e->e_uid);
e->e_uid = vfsuid_into_kuid(vfsuid);
break;
case ACL_GROUP:
- vfsgid = make_vfsgid(mnt_userns, &init_user_ns, e->e_gid);
+ vfsgid = make_vfsgid(mnt_userns, fs_userns, e->e_gid);
e->e_gid = vfsgid_into_kgid(vfsgid);
break;
}
@@ -536,7 +539,7 @@ struct posix_acl *ovl_get_acl(struct inode *inode, int type, bool rcu)
if (!clone)
clone = ERR_PTR(-ENOMEM);
else
- ovl_idmap_posix_acl(mnt_user_ns(realpath.mnt), clone);
+ ovl_idmap_posix_acl(realinode, mnt_user_ns(realpath.mnt), clone);
/*
* Since we're not in RCU path walk we always need to release the
* original ACLs.
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 1d17d7b13dcd..5af33800743e 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -361,6 +361,7 @@ posix_acl_permission(struct user_namespace *mnt_userns, struct inode *inode,
const struct posix_acl *acl, int want)
{
const struct posix_acl_entry *pa, *pe, *mask_obj;
+ struct user_namespace *fs_userns = i_user_ns(inode);
int found = 0;
vfsuid_t vfsuid;
vfsgid_t vfsgid;
@@ -376,7 +377,7 @@ posix_acl_permission(struct user_namespace *mnt_userns, struct inode *inode,
goto check_perm;
break;
case ACL_USER:
- vfsuid = make_vfsuid(mnt_userns, &init_user_ns,
+ vfsuid = make_vfsuid(mnt_userns, fs_userns,
pa->e_uid);
if (vfsuid_eq_kuid(vfsuid, current_fsuid()))
goto mask;
@@ -390,7 +391,7 @@ posix_acl_permission(struct user_namespace *mnt_userns, struct inode *inode,
}
break;
case ACL_GROUP:
- vfsgid = make_vfsgid(mnt_userns, &init_user_ns,
+ vfsgid = make_vfsgid(mnt_userns, fs_userns,
pa->e_gid);
if (vfsgid_in_group_p(vfsgid)) {
found = 1;
@@ -736,6 +737,7 @@ void posix_acl_getxattr_idmapped_mnt(struct user_namespace *mnt_userns,
{
struct posix_acl_xattr_header *header = value;
struct posix_acl_xattr_entry *entry = (void *)(header + 1), *end;
+ struct user_namespace *fs_userns = i_user_ns(inode);
int count;
vfsuid_t vfsuid;
vfsgid_t vfsgid;
@@ -753,13 +755,13 @@ void posix_acl_getxattr_idmapped_mnt(struct user_namespace *mnt_userns,
switch (le16_to_cpu(entry->e_tag)) {
case ACL_USER:
uid = make_kuid(&init_user_ns, le32_to_cpu(entry->e_id));
- vfsuid = make_vfsuid(mnt_userns, &init_user_ns, uid);
+ vfsuid = make_vfsuid(mnt_userns, fs_userns, uid);
entry->e_id = cpu_to_le32(from_kuid(&init_user_ns,
vfsuid_into_kuid(vfsuid)));
break;
case ACL_GROUP:
gid = make_kgid(&init_user_ns, le32_to_cpu(entry->e_id));
- vfsgid = make_vfsgid(mnt_userns, &init_user_ns, gid);
+ vfsgid = make_vfsgid(mnt_userns, fs_userns, gid);
entry->e_id = cpu_to_le32(from_kgid(&init_user_ns,
vfsgid_into_kgid(vfsgid)));
break;
@@ -775,6 +777,7 @@ void posix_acl_setxattr_idmapped_mnt(struct user_namespace *mnt_userns,
{
struct posix_acl_xattr_header *header = value;
struct posix_acl_xattr_entry *entry = (void *)(header + 1), *end;
+ struct user_namespace *fs_userns = i_user_ns(inode);
int count;
vfsuid_t vfsuid;
vfsgid_t vfsgid;
@@ -793,13 +796,13 @@ void posix_acl_setxattr_idmapped_mnt(struct user_namespace *mnt_userns,
case ACL_USER:
uid = make_kuid(&init_user_ns, le32_to_cpu(entry->e_id));
vfsuid = VFSUIDT_INIT(uid);
- uid = from_vfsuid(mnt_userns, &init_user_ns, vfsuid);
+ uid = from_vfsuid(mnt_userns, fs_userns, vfsuid);
entry->e_id = cpu_to_le32(from_kuid(&init_user_ns, uid));
break;
case ACL_GROUP:
gid = make_kgid(&init_user_ns, le32_to_cpu(entry->e_id));
vfsgid = VFSGIDT_INIT(gid);
- gid = from_vfsgid(mnt_userns, &init_user_ns, vfsgid);
+ gid = from_vfsgid(mnt_userns, fs_userns, vfsgid);
entry->e_id = cpu_to_le32(from_kgid(&init_user_ns, gid));
break;
default:
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index a3398d0f1927..4e0023643f8b 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -527,10 +527,12 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
struct vm_area_struct *vma = walk->vma;
bool locked = !!(vma->vm_flags & VM_LOCKED);
struct page *page = NULL;
- bool migration = false;
+ bool migration = false, young = false, dirty = false;
if (pte_present(*pte)) {
page = vm_normal_page(vma, addr, *pte);
+ young = pte_young(*pte);
+ dirty = pte_dirty(*pte);
} else if (is_swap_pte(*pte)) {
swp_entry_t swpent = pte_to_swp_entry(*pte);
@@ -560,8 +562,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
if (!page)
return;
- smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte),
- locked, migration);
+ smaps_account(mss, page, false, young, dirty, locked, migration);
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index b2fd3c20e7c2..0c034ea39954 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -28,14 +28,11 @@
#include <linux/crypto.h>
#include <linux/string.h>
#include <linux/timer.h>
-#include <linux/scatterlist.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/jiffies.h>
#include <linux/workqueue.h>
-#include <crypto/acompress.h>
-
#include "internal.h"
/*
@@ -93,8 +90,7 @@ module_param(compress, charp, 0444);
MODULE_PARM_DESC(compress, "compression to use");
/* Compression parameters */
-static struct crypto_acomp *tfm;
-static struct acomp_req *creq;
+static struct crypto_comp *tfm;
struct pstore_zbackend {
int (*zbufsize)(size_t size);
@@ -272,21 +268,12 @@ static const struct pstore_zbackend zbackends[] = {
static int pstore_compress(const void *in, void *out,
unsigned int inlen, unsigned int outlen)
{
- struct scatterlist src, dst;
int ret;
if (!IS_ENABLED(CONFIG_PSTORE_COMPRESS))
return -EINVAL;
- sg_init_table(&src, 1);
- sg_set_buf(&src, in, inlen);
-
- sg_init_table(&dst, 1);
- sg_set_buf(&dst, out, outlen);
-
- acomp_request_set_params(creq, &src, &dst, inlen, outlen);
-
- ret = crypto_acomp_compress(creq);
+ ret = crypto_comp_compress(tfm, in, inlen, out, &outlen);
if (ret) {
pr_err("crypto_comp_compress failed, ret = %d!\n", ret);
return ret;
@@ -297,7 +284,7 @@ static int pstore_compress(const void *in, void *out,
static void allocate_buf_for_compression(void)
{
- struct crypto_acomp *acomp;
+ struct crypto_comp *ctx;
int size;
char *buf;
@@ -309,7 +296,7 @@ static void allocate_buf_for_compression(void)
if (!psinfo || tfm)
return;
- if (!crypto_has_acomp(zbackend->name, 0, CRYPTO_ALG_ASYNC)) {
+ if (!crypto_has_comp(zbackend->name, 0, 0)) {
pr_err("Unknown compression: %s\n", zbackend->name);
return;
}
@@ -328,24 +315,16 @@ static void allocate_buf_for_compression(void)
return;
}
- acomp = crypto_alloc_acomp(zbackend->name, 0, CRYPTO_ALG_ASYNC);
- if (IS_ERR_OR_NULL(acomp)) {
+ ctx = crypto_alloc_comp(zbackend->name, 0, 0);
+ if (IS_ERR_OR_NULL(ctx)) {
kfree(buf);
pr_err("crypto_alloc_comp('%s') failed: %ld\n", zbackend->name,
- PTR_ERR(acomp));
- return;
- }
-
- creq = acomp_request_alloc(acomp);
- if (!creq) {
- crypto_free_acomp(acomp);
- kfree(buf);
- pr_err("acomp_request_alloc('%s') failed\n", zbackend->name);
+ PTR_ERR(ctx));
return;
}
/* A non-NULL big_oops_buf indicates compression is available. */
- tfm = acomp;
+ tfm = ctx;
big_oops_buf_sz = size;
big_oops_buf = buf;
@@ -355,8 +334,7 @@ static void allocate_buf_for_compression(void)
static void free_buf_for_compression(void)
{
if (IS_ENABLED(CONFIG_PSTORE_COMPRESS) && tfm) {
- acomp_request_free(creq);
- crypto_free_acomp(tfm);
+ crypto_free_comp(tfm);
tfm = NULL;
}
kfree(big_oops_buf);
@@ -693,8 +671,6 @@ static void decompress_record(struct pstore_record *record)
int ret;
int unzipped_len;
char *unzipped, *workspace;
- struct acomp_req *dreq;
- struct scatterlist src, dst;
if (!IS_ENABLED(CONFIG_PSTORE_COMPRESS) || !record->compressed)
return;
@@ -718,30 +694,16 @@ static void decompress_record(struct pstore_record *record)
if (!workspace)
return;
- dreq = acomp_request_alloc(tfm);
- if (!dreq) {
- kfree(workspace);
- return;
- }
-
- sg_init_table(&src, 1);
- sg_set_buf(&src, record->buf, record->size);
-
- sg_init_table(&dst, 1);
- sg_set_buf(&dst, workspace, unzipped_len);
-
- acomp_request_set_params(dreq, &src, &dst, record->size, unzipped_len);
-
/* After decompression "unzipped_len" is almost certainly smaller. */
- ret = crypto_acomp_decompress(dreq);
+ ret = crypto_comp_decompress(tfm, record->buf, record->size,
+ workspace, &unzipped_len);
if (ret) {
- pr_err("crypto_acomp_decompress failed, ret = %d!\n", ret);
+ pr_err("crypto_comp_decompress failed, ret = %d!\n", ret);
kfree(workspace);
return;
}
/* Append ECC notice to decompressed buffer. */
- unzipped_len = dreq->dlen;
memcpy(workspace + unzipped_len, record->buf + record->size,
record->ecc_notice_size);
@@ -749,7 +711,6 @@ static void decompress_record(struct pstore_record *record)
unzipped = kmemdup(workspace, unzipped_len + record->ecc_notice_size,
GFP_KERNEL);
kfree(workspace);
- acomp_request_free(dreq);
if (!unzipped)
return;
diff --git a/fs/read_write.c b/fs/read_write.c
index 1a261dcf1778..328ce8cf9a85 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -496,14 +496,9 @@ static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t
}
/* caller is responsible for file_start_write/file_end_write */
-ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos)
+ssize_t __kernel_write_iter(struct file *file, struct iov_iter *from, loff_t *pos)
{
- struct kvec iov = {
- .iov_base = (void *)buf,
- .iov_len = min_t(size_t, count, MAX_RW_COUNT),
- };
struct kiocb kiocb;
- struct iov_iter iter;
ssize_t ret;
if (WARN_ON_ONCE(!(file->f_mode & FMODE_WRITE)))
@@ -519,8 +514,7 @@ ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t
init_sync_kiocb(&kiocb, file);
kiocb.ki_pos = pos ? *pos : 0;
- iov_iter_kvec(&iter, WRITE, &iov, 1, iov.iov_len);
- ret = file->f_op->write_iter(&kiocb, &iter);
+ ret = file->f_op->write_iter(&kiocb, from);
if (ret > 0) {
if (pos)
*pos = kiocb.ki_pos;
@@ -530,6 +524,18 @@ ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t
inc_syscw(current);
return ret;
}
+
+/* caller is responsible for file_start_write/file_end_write */
+ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos)
+{
+ struct kvec iov = {
+ .iov_base = (void *)buf,
+ .iov_len = min_t(size_t, count, MAX_RW_COUNT),
+ };
+ struct iov_iter iter;
+ iov_iter_kvec(&iter, WRITE, &iov, 1, iov.iov_len);
+ return __kernel_write_iter(file, &iter, pos);
+}
/*
* This "EXPORT_SYMBOL_GPL()" is more of a "EXPORT_SYMBOL_DONTUSE()",
* but autofs is one of the few internal kernel users that actually
diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index 98e64fec75b7..e56510964b22 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -593,7 +593,7 @@ static void squashfs_readahead(struct readahead_control *ractl)
res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor);
- kfree(actor);
+ squashfs_page_actor_free(actor);
if (res == expected) {
int bytes;
diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c
index be4b12d31e0c..f1ccad519e28 100644
--- a/fs/squashfs/file_direct.c
+++ b/fs/squashfs/file_direct.c
@@ -74,7 +74,7 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize,
/* Decompress directly into the page cache buffers */
res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor);
- kfree(actor);
+ squashfs_page_actor_free(actor);
if (res < 0)
goto mark_errored;
diff --git a/fs/squashfs/page_actor.c b/fs/squashfs/page_actor.c
index b23b780d8f42..54b93bf4a25c 100644
--- a/fs/squashfs/page_actor.c
+++ b/fs/squashfs/page_actor.c
@@ -52,6 +52,7 @@ struct squashfs_page_actor *squashfs_page_actor_init(void **buffer,
actor->buffer = buffer;
actor->pages = pages;
actor->next_page = 0;
+ actor->tmp_buffer = NULL;
actor->squashfs_first_page = cache_first_page;
actor->squashfs_next_page = cache_next_page;
actor->squashfs_finish_page = cache_finish_page;
@@ -68,20 +69,9 @@ static void *handle_next_page(struct squashfs_page_actor *actor)
if ((actor->next_page == actor->pages) ||
(actor->next_index != actor->page[actor->next_page]->index)) {
- if (actor->alloc_buffer) {
- void *tmp_buffer = kmalloc(PAGE_SIZE, GFP_KERNEL);
-
- if (tmp_buffer) {
- actor->tmp_buffer = tmp_buffer;
- actor->next_index++;
- actor->returned_pages++;
- return tmp_buffer;
- }
- }
-
actor->next_index++;
actor->returned_pages++;
- return ERR_PTR(-ENOMEM);
+ return actor->alloc_buffer ? actor->tmp_buffer : ERR_PTR(-ENOMEM);
}
actor->next_index++;
@@ -96,11 +86,10 @@ static void *direct_first_page(struct squashfs_page_actor *actor)
static void *direct_next_page(struct squashfs_page_actor *actor)
{
- if (actor->pageaddr)
+ if (actor->pageaddr) {
kunmap_local(actor->pageaddr);
-
- kfree(actor->tmp_buffer);
- actor->pageaddr = actor->tmp_buffer = NULL;
+ actor->pageaddr = NULL;
+ }
return handle_next_page(actor);
}
@@ -109,8 +98,6 @@ static void direct_finish_page(struct squashfs_page_actor *actor)
{
if (actor->pageaddr)
kunmap_local(actor->pageaddr);
-
- kfree(actor->tmp_buffer);
}
struct squashfs_page_actor *squashfs_page_actor_init_special(struct squashfs_sb_info *msblk,
@@ -121,6 +108,16 @@ struct squashfs_page_actor *squashfs_page_actor_init_special(struct squashfs_sb_
if (actor == NULL)
return NULL;
+ if (msblk->decompressor->alloc_buffer) {
+ actor->tmp_buffer = kmalloc(PAGE_SIZE, GFP_KERNEL);
+
+ if (actor->tmp_buffer == NULL) {
+ kfree(actor);
+ return NULL;
+ }
+ } else
+ actor->tmp_buffer = NULL;
+
actor->length = length ? : pages * PAGE_SIZE;
actor->page = page;
actor->pages = pages;
@@ -128,7 +125,6 @@ struct squashfs_page_actor *squashfs_page_actor_init_special(struct squashfs_sb_
actor->returned_pages = 0;
actor->next_index = page[0]->index & ~((1 << (msblk->block_log - PAGE_SHIFT)) - 1);
actor->pageaddr = NULL;
- actor->tmp_buffer = NULL;
actor->alloc_buffer = msblk->decompressor->alloc_buffer;
actor->squashfs_first_page = direct_first_page;
actor->squashfs_next_page = direct_next_page;
diff --git a/fs/squashfs/page_actor.h b/fs/squashfs/page_actor.h
index 24841d28bc0f..95ffbb543d91 100644
--- a/fs/squashfs/page_actor.h
+++ b/fs/squashfs/page_actor.h
@@ -29,6 +29,11 @@ extern struct squashfs_page_actor *squashfs_page_actor_init(void **buffer,
extern struct squashfs_page_actor *squashfs_page_actor_init_special(
struct squashfs_sb_info *msblk,
struct page **page, int pages, int length);
+static inline void squashfs_page_actor_free(struct squashfs_page_actor *actor)
+{
+ kfree(actor->tmp_buffer);
+ kfree(actor);
+}
static inline void *squashfs_first_page(struct squashfs_page_actor *actor)
{
return actor->squashfs_first_page(actor);
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
index 81d26abf486f..da85b3979195 100644
--- a/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@ -141,6 +141,8 @@ struct tracefs_mount_opts {
kuid_t uid;
kgid_t gid;
umode_t mode;
+ /* Opt_* bitfield. */
+ unsigned int opts;
};
enum {
@@ -241,6 +243,7 @@ static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts)
kgid_t gid;
char *p;
+ opts->opts = 0;
opts->mode = TRACEFS_DEFAULT_MODE;
while ((p = strsep(&data, ",")) != NULL) {
@@ -275,24 +278,36 @@ static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts)
* but traditionally tracefs has ignored all mount options
*/
}
+
+ opts->opts |= BIT(token);
}
return 0;
}
-static int tracefs_apply_options(struct super_block *sb)
+static int tracefs_apply_options(struct super_block *sb, bool remount)
{
struct tracefs_fs_info *fsi = sb->s_fs_info;
struct inode *inode = d_inode(sb->s_root);
struct tracefs_mount_opts *opts = &fsi->mount_opts;
- inode->i_mode &= ~S_IALLUGO;
- inode->i_mode |= opts->mode;
+ /*
+ * On remount, only reset mode/uid/gid if they were provided as mount
+ * options.
+ */
+
+ if (!remount || opts->opts & BIT(Opt_mode)) {
+ inode->i_mode &= ~S_IALLUGO;
+ inode->i_mode |= opts->mode;
+ }
- inode->i_uid = opts->uid;
+ if (!remount || opts->opts & BIT(Opt_uid))
+ inode->i_uid = opts->uid;
- /* Set all the group ids to the mount option */
- set_gid(sb->s_root, opts->gid);
+ if (!remount || opts->opts & BIT(Opt_gid)) {
+ /* Set all the group ids to the mount option */
+ set_gid(sb->s_root, opts->gid);
+ }
return 0;
}
@@ -307,7 +322,7 @@ static int tracefs_remount(struct super_block *sb, int *flags, char *data)
if (err)
goto fail;
- tracefs_apply_options(sb);
+ tracefs_apply_options(sb, true);
fail:
return err;
@@ -359,7 +374,7 @@ static int trace_fill_super(struct super_block *sb, void *data, int silent)
sb->s_op = &tracefs_super_operations;
- tracefs_apply_options(sb);
+ tracefs_apply_options(sb, false);
return 0;
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index e6ffe7bc59e3..0c1d33c4f74c 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -1601,6 +1601,10 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
wake_userfault(vma->vm_userfaultfd_ctx.ctx, &range);
}
+ /* Reset ptes for the whole vma range if wr-protected */
+ if (userfaultfd_wp(vma))
+ uffd_wp_range(mm, vma, start, vma_end - start, false);
+
new_flags = vma->vm_flags & ~__VM_UFFD_FLAGS;
prev = vma_merge(mm, prev, start, vma_end, new_flags,
vma->anon_vma, vma->vm_file, vma->vm_pgoff,
diff --git a/fs/xfs/xfs_notify_failure.c b/fs/xfs/xfs_notify_failure.c
index 69d9c83ea4b2..5b1f9a24ed59 100644
--- a/fs/xfs/xfs_notify_failure.c
+++ b/fs/xfs/xfs_notify_failure.c
@@ -175,13 +175,13 @@ xfs_dax_notify_failure(
u64 ddev_start;
u64 ddev_end;
- if (!(mp->m_sb.sb_flags & SB_BORN)) {
+ if (!(mp->m_super->s_flags & SB_BORN)) {
xfs_warn(mp, "filesystem is not ready for notify_failure()!");
return -EIO;
}
if (mp->m_rtdev_targp && mp->m_rtdev_targp->bt_daxdev == dax_dev) {
- xfs_warn(mp,
+ xfs_debug(mp,
"notify_failure() not supported on realtime device!");
return -EOPNOTSUPP;
}
@@ -194,7 +194,7 @@ xfs_dax_notify_failure(
}
if (!xfs_has_rmapbt(mp)) {
- xfs_warn(mp, "notify_failure() needs rmapbt enabled!");
+ xfs_debug(mp, "notify_failure() needs rmapbt enabled!");
return -EOPNOTSUPP;
}