diff options
Diffstat (limited to 'fs')
278 files changed, 4420 insertions, 3024 deletions
diff --git a/fs/9p/Makefile b/fs/9p/Makefile index ff7be98f84f2..9619ccadd2fc 100644 --- a/fs/9p/Makefile +++ b/fs/9p/Makefile @@ -10,10 +10,7 @@ obj-$(CONFIG_9P_FS) := 9p.o vfs_dentry.o \ v9fs.o \ fid.o \ - xattr.o \ - xattr_user.o \ - xattr_trusted.o + xattr.o 9p-$(CONFIG_9P_FSCACHE) += cache.o 9p-$(CONFIG_9P_FS_POSIX_ACL) += acl.o -9p-$(CONFIG_9P_FS_SECURITY) += xattr_security.o diff --git a/fs/9p/acl.c b/fs/9p/acl.c index 31c010372660..a7e28890f5ef 100644 --- a/fs/9p/acl.c +++ b/fs/9p/acl.c @@ -212,26 +212,9 @@ int v9fs_acl_mode(struct inode *dir, umode_t *modep, return 0; } -static int v9fs_remote_get_acl(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) -{ - char *full_name; - - switch (type) { - case ACL_TYPE_ACCESS: - full_name = POSIX_ACL_XATTR_ACCESS; - break; - case ACL_TYPE_DEFAULT: - full_name = POSIX_ACL_XATTR_DEFAULT; - break; - default: - BUG(); - } - return v9fs_xattr_get(dentry, full_name, buffer, size); -} - -static int v9fs_xattr_get_acl(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) +static int v9fs_xattr_get_acl(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + void *buffer, size_t size) { struct v9fs_session_info *v9ses; struct posix_acl *acl; @@ -245,9 +228,9 @@ static int v9fs_xattr_get_acl(struct dentry *dentry, const char *name, * We allow set/get/list of acl when access=client is not specified */ if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) - return v9fs_remote_get_acl(dentry, name, buffer, size, type); + return v9fs_xattr_get(dentry, handler->prefix, buffer, size); - acl = v9fs_get_cached_acl(d_inode(dentry), type); + acl = v9fs_get_cached_acl(d_inode(dentry), handler->flags); if (IS_ERR(acl)) return PTR_ERR(acl); if (acl == NULL) @@ -258,29 +241,9 @@ static int v9fs_xattr_get_acl(struct dentry *dentry, const char *name, return error; } -static int v9fs_remote_set_acl(struct dentry *dentry, const char *name, - const void *value, size_t size, - int flags, int type) -{ - char *full_name; - - switch (type) { - case ACL_TYPE_ACCESS: - full_name = POSIX_ACL_XATTR_ACCESS; - break; - case ACL_TYPE_DEFAULT: - full_name = POSIX_ACL_XATTR_DEFAULT; - break; - default: - BUG(); - } - return v9fs_xattr_set(dentry, full_name, value, size, flags); -} - - -static int v9fs_xattr_set_acl(struct dentry *dentry, const char *name, - const void *value, size_t size, - int flags, int type) +static int v9fs_xattr_set_acl(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) { int retval; struct posix_acl *acl; @@ -296,8 +259,8 @@ static int v9fs_xattr_set_acl(struct dentry *dentry, const char *name, * xattr value. We leave it to the server to validate */ if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) - return v9fs_remote_set_acl(dentry, name, - value, size, flags, type); + return v9fs_xattr_set(dentry, handler->prefix, value, size, + flags); if (S_ISLNK(inode->i_mode)) return -EOPNOTSUPP; @@ -316,9 +279,8 @@ static int v9fs_xattr_set_acl(struct dentry *dentry, const char *name, } else acl = NULL; - switch (type) { + switch (handler->flags) { case ACL_TYPE_ACCESS: - name = POSIX_ACL_XATTR_ACCESS; if (acl) { umode_t mode = inode->i_mode; retval = posix_acl_equiv_mode(acl, &mode); @@ -349,7 +311,6 @@ static int v9fs_xattr_set_acl(struct dentry *dentry, const char *name, } break; case ACL_TYPE_DEFAULT: - name = POSIX_ACL_XATTR_DEFAULT; if (!S_ISDIR(inode->i_mode)) { retval = acl ? -EINVAL : 0; goto err_out; @@ -358,9 +319,9 @@ static int v9fs_xattr_set_acl(struct dentry *dentry, const char *name, default: BUG(); } - retval = v9fs_xattr_set(dentry, name, value, size, flags); + retval = v9fs_xattr_set(dentry, handler->prefix, value, size, flags); if (!retval) - set_cached_acl(inode, type, acl); + set_cached_acl(inode, handler->flags, acl); err_out: posix_acl_release(acl); return retval; diff --git a/fs/9p/cache.h b/fs/9p/cache.h index 2f9675491095..247e47e54bcc 100644 --- a/fs/9p/cache.h +++ b/fs/9p/cache.h @@ -21,6 +21,7 @@ */ #ifndef _9P_CACHE_H +#define _9P_CACHE_H #ifdef CONFIG_9P_FSCACHE #include <linux/fscache.h> #include <linux/spinlock.h> diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index b1dc51888048..699941e90667 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -1368,9 +1368,6 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rde dir->i_ino, dentry, mode, MAJOR(rdev), MINOR(rdev)); - if (!new_valid_dev(rdev)) - return -EINVAL; - /* build extension */ if (S_ISBLK(mode)) sprintf(name, "b %u %u", MAJOR(rdev), MINOR(rdev)); diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index e8aa57dc8d6d..cb899af1babc 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -829,9 +829,6 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode, dir->i_ino, dentry, omode, MAJOR(rdev), MINOR(rdev)); - if (!new_valid_dev(rdev)) - return -EINVAL; - v9ses = v9fs_inode2v9ses(dir); dir_dentry = dentry->d_parent; dfid = v9fs_fid_lookup(dir_dentry); diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c index 0cf44b6cccd6..e3d026ac382e 100644 --- a/fs/9p/xattr.c +++ b/fs/9p/xattr.c @@ -137,6 +137,48 @@ ssize_t v9fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) return v9fs_xattr_get(dentry, NULL, buffer, buffer_size); } +static int v9fs_xattr_handler_get(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + void *buffer, size_t size) +{ + const char *full_name = xattr_full_name(handler, name); + + if (strcmp(name, "") == 0) + return -EINVAL; + return v9fs_xattr_get(dentry, full_name, buffer, size); +} + +static int v9fs_xattr_handler_set(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) +{ + const char *full_name = xattr_full_name(handler, name); + + if (strcmp(name, "") == 0) + return -EINVAL; + return v9fs_xattr_set(dentry, full_name, value, size, flags); +} + +static struct xattr_handler v9fs_xattr_user_handler = { + .prefix = XATTR_USER_PREFIX, + .get = v9fs_xattr_handler_get, + .set = v9fs_xattr_handler_set, +}; + +static struct xattr_handler v9fs_xattr_trusted_handler = { + .prefix = XATTR_TRUSTED_PREFIX, + .get = v9fs_xattr_handler_get, + .set = v9fs_xattr_handler_set, +}; + +#ifdef CONFIG_9P_FS_SECURITY +static struct xattr_handler v9fs_xattr_security_handler = { + .prefix = XATTR_SECURITY_PREFIX, + .get = v9fs_xattr_handler_get, + .set = v9fs_xattr_handler_set, +}; +#endif + const struct xattr_handler *v9fs_xattr_handlers[] = { &v9fs_xattr_user_handler, &v9fs_xattr_trusted_handler, diff --git a/fs/9p/xattr.h b/fs/9p/xattr.h index d3e2ea3840be..c63c3bea5de5 100644 --- a/fs/9p/xattr.h +++ b/fs/9p/xattr.h @@ -19,9 +19,6 @@ #include <net/9p/client.h> extern const struct xattr_handler *v9fs_xattr_handlers[]; -extern struct xattr_handler v9fs_xattr_user_handler; -extern struct xattr_handler v9fs_xattr_trusted_handler; -extern struct xattr_handler v9fs_xattr_security_handler; extern const struct xattr_handler v9fs_xattr_acl_access_handler; extern const struct xattr_handler v9fs_xattr_acl_default_handler; diff --git a/fs/9p/xattr_security.c b/fs/9p/xattr_security.c deleted file mode 100644 index cb247a142a6e..000000000000 --- a/fs/9p/xattr_security.c +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright IBM Corporation, 2010 - * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of version 2.1 of the GNU Lesser General Public License - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - * - */ - - -#include <linux/module.h> -#include <linux/string.h> -#include <linux/fs.h> -#include <linux/slab.h> -#include "xattr.h" - -static int v9fs_xattr_security_get(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) -{ - int retval; - char *full_name; - size_t name_len; - size_t prefix_len = XATTR_SECURITY_PREFIX_LEN; - - if (name == NULL) - return -EINVAL; - - if (strcmp(name, "") == 0) - return -EINVAL; - - name_len = strlen(name); - full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL); - if (!full_name) - return -ENOMEM; - memcpy(full_name, XATTR_SECURITY_PREFIX, prefix_len); - memcpy(full_name+prefix_len, name, name_len); - full_name[prefix_len + name_len] = '\0'; - - retval = v9fs_xattr_get(dentry, full_name, buffer, size); - kfree(full_name); - return retval; -} - -static int v9fs_xattr_security_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, int type) -{ - int retval; - char *full_name; - size_t name_len; - size_t prefix_len = XATTR_SECURITY_PREFIX_LEN; - - if (name == NULL) - return -EINVAL; - - if (strcmp(name, "") == 0) - return -EINVAL; - - name_len = strlen(name); - full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL); - if (!full_name) - return -ENOMEM; - memcpy(full_name, XATTR_SECURITY_PREFIX, prefix_len); - memcpy(full_name + prefix_len, name, name_len); - full_name[prefix_len + name_len] = '\0'; - - retval = v9fs_xattr_set(dentry, full_name, value, size, flags); - kfree(full_name); - return retval; -} - -struct xattr_handler v9fs_xattr_security_handler = { - .prefix = XATTR_SECURITY_PREFIX, - .get = v9fs_xattr_security_get, - .set = v9fs_xattr_security_set, -}; diff --git a/fs/9p/xattr_trusted.c b/fs/9p/xattr_trusted.c deleted file mode 100644 index e30d33b8a3fb..000000000000 --- a/fs/9p/xattr_trusted.c +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright IBM Corporation, 2010 - * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of version 2.1 of the GNU Lesser General Public License - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - * - */ - - -#include <linux/module.h> -#include <linux/string.h> -#include <linux/fs.h> -#include <linux/slab.h> -#include "xattr.h" - -static int v9fs_xattr_trusted_get(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) -{ - int retval; - char *full_name; - size_t name_len; - size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN; - - if (name == NULL) - return -EINVAL; - - if (strcmp(name, "") == 0) - return -EINVAL; - - name_len = strlen(name); - full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL); - if (!full_name) - return -ENOMEM; - memcpy(full_name, XATTR_TRUSTED_PREFIX, prefix_len); - memcpy(full_name+prefix_len, name, name_len); - full_name[prefix_len + name_len] = '\0'; - - retval = v9fs_xattr_get(dentry, full_name, buffer, size); - kfree(full_name); - return retval; -} - -static int v9fs_xattr_trusted_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, int type) -{ - int retval; - char *full_name; - size_t name_len; - size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN; - - if (name == NULL) - return -EINVAL; - - if (strcmp(name, "") == 0) - return -EINVAL; - - name_len = strlen(name); - full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL); - if (!full_name) - return -ENOMEM; - memcpy(full_name, XATTR_TRUSTED_PREFIX, prefix_len); - memcpy(full_name + prefix_len, name, name_len); - full_name[prefix_len + name_len] = '\0'; - - retval = v9fs_xattr_set(dentry, full_name, value, size, flags); - kfree(full_name); - return retval; -} - -struct xattr_handler v9fs_xattr_trusted_handler = { - .prefix = XATTR_TRUSTED_PREFIX, - .get = v9fs_xattr_trusted_get, - .set = v9fs_xattr_trusted_set, -}; diff --git a/fs/9p/xattr_user.c b/fs/9p/xattr_user.c deleted file mode 100644 index d0b701b72080..000000000000 --- a/fs/9p/xattr_user.c +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright IBM Corporation, 2010 - * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of version 2.1 of the GNU Lesser General Public License - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - * - */ - - -#include <linux/module.h> -#include <linux/string.h> -#include <linux/fs.h> -#include <linux/slab.h> -#include "xattr.h" - -static int v9fs_xattr_user_get(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) -{ - int retval; - char *full_name; - size_t name_len; - size_t prefix_len = XATTR_USER_PREFIX_LEN; - - if (name == NULL) - return -EINVAL; - - if (strcmp(name, "") == 0) - return -EINVAL; - - name_len = strlen(name); - full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL); - if (!full_name) - return -ENOMEM; - memcpy(full_name, XATTR_USER_PREFIX, prefix_len); - memcpy(full_name+prefix_len, name, name_len); - full_name[prefix_len + name_len] = '\0'; - - retval = v9fs_xattr_get(dentry, full_name, buffer, size); - kfree(full_name); - return retval; -} - -static int v9fs_xattr_user_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, int type) -{ - int retval; - char *full_name; - size_t name_len; - size_t prefix_len = XATTR_USER_PREFIX_LEN; - - if (name == NULL) - return -EINVAL; - - if (strcmp(name, "") == 0) - return -EINVAL; - - name_len = strlen(name); - full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL); - if (!full_name) - return -ENOMEM; - memcpy(full_name, XATTR_USER_PREFIX, prefix_len); - memcpy(full_name + prefix_len, name, name_len); - full_name[prefix_len + name_len] = '\0'; - - retval = v9fs_xattr_set(dentry, full_name, value, size, flags); - kfree(full_name); - return retval; -} - -struct xattr_handler v9fs_xattr_user_handler = { - .prefix = XATTR_USER_PREFIX, - .get = v9fs_xattr_user_get, - .set = v9fs_xattr_user_set, -}; diff --git a/fs/Kconfig b/fs/Kconfig index da3f32f1a4e4..6ce72d8d1ee1 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -46,6 +46,12 @@ config FS_DAX or if unsure, say N. Saying Y will increase the size of the kernel by about 5kB. +config FS_DAX_PMD + bool + default FS_DAX + depends on FS_DAX + depends on BROKEN + endif # BLOCK # Posix ACL utility routines diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 6b659967898e..3a93755e880f 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -35,6 +35,7 @@ #include <linux/utsname.h> #include <linux/coredump.h> #include <linux/sched.h> +#include <linux/dax.h> #include <asm/uaccess.h> #include <asm/param.h> #include <asm/page.h> @@ -487,7 +488,7 @@ static inline int arch_elf_pt_proc(struct elfhdr *ehdr, } /** - * arch_check_elf() - check a PT_LOPROC..PT_HIPROC ELF program header + * arch_check_elf() - check an ELF executable * @ehdr: The main ELF header * @has_interp: True if the ELF has an interpreter, else false. * @state: Architecture-specific state preserved throughout the process @@ -759,16 +760,16 @@ static int load_elf_binary(struct linux_binprm *bprm) */ would_dump(bprm, interpreter); - retval = kernel_read(interpreter, 0, bprm->buf, - BINPRM_BUF_SIZE); - if (retval != BINPRM_BUF_SIZE) { + /* Get the exec headers */ + retval = kernel_read(interpreter, 0, + (void *)&loc->interp_elf_ex, + sizeof(loc->interp_elf_ex)); + if (retval != sizeof(loc->interp_elf_ex)) { if (retval >= 0) retval = -EIO; goto out_free_dentry; } - /* Get the exec headers */ - loc->interp_elf_ex = *((struct elfhdr *)bprm->buf); break; } elf_ppnt++; @@ -1236,6 +1237,15 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma, if (vma->vm_flags & VM_DONTDUMP) return 0; + /* support for DAX */ + if (vma_is_dax(vma)) { + if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED)) + goto whole; + if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE)) + goto whole; + return 0; + } + /* Hugetlb memory check */ if (vma->vm_flags & VM_HUGETLB) { if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED)) diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index d3634bfb7fe1..b1adb92e69de 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -35,6 +35,7 @@ #include <linux/elf-fdpic.h> #include <linux/elfcore.h> #include <linux/coredump.h> +#include <linux/dax.h> #include <asm/uaccess.h> #include <asm/param.h> @@ -103,19 +104,36 @@ static void __exit exit_elf_fdpic_binfmt(void) core_initcall(init_elf_fdpic_binfmt); module_exit(exit_elf_fdpic_binfmt); -static int is_elf_fdpic(struct elfhdr *hdr, struct file *file) +static int is_elf(struct elfhdr *hdr, struct file *file) { if (memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0) return 0; if (hdr->e_type != ET_EXEC && hdr->e_type != ET_DYN) return 0; - if (!elf_check_arch(hdr) || !elf_check_fdpic(hdr)) + if (!elf_check_arch(hdr)) return 0; if (!file->f_op->mmap) return 0; return 1; } +#ifndef elf_check_fdpic +#define elf_check_fdpic(x) 0 +#endif + +#ifndef elf_check_const_displacement +#define elf_check_const_displacement(x) 0 +#endif + +static int is_constdisp(struct elfhdr *hdr) +{ + if (!elf_check_fdpic(hdr)) + return 1; + if (elf_check_const_displacement(hdr)) + return 1; + return 0; +} + /*****************************************************************************/ /* * read the program headers table into memory @@ -191,8 +209,18 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm) /* check that this is a binary we know how to deal with */ retval = -ENOEXEC; - if (!is_elf_fdpic(&exec_params.hdr, bprm->file)) + if (!is_elf(&exec_params.hdr, bprm->file)) + goto error; + if (!elf_check_fdpic(&exec_params.hdr)) { +#ifdef CONFIG_MMU + /* binfmt_elf handles non-fdpic elf except on nommu */ goto error; +#else + /* nommu can only load ET_DYN (PIE) ELF */ + if (exec_params.hdr.e_type != ET_DYN) + goto error; +#endif + } /* read the program header table */ retval = elf_fdpic_fetch_phdrs(&exec_params, bprm->file); @@ -269,13 +297,13 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm) } - if (elf_check_const_displacement(&exec_params.hdr)) + if (is_constdisp(&exec_params.hdr)) exec_params.flags |= ELF_FDPIC_FLAG_CONSTDISP; /* perform insanity checks on the interpreter */ if (interpreter_name) { retval = -ELIBBAD; - if (!is_elf_fdpic(&interp_params.hdr, interpreter)) + if (!is_elf(&interp_params.hdr, interpreter)) goto error; interp_params.flags = ELF_FDPIC_FLAG_PRESENT; @@ -306,9 +334,9 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm) retval = -ENOEXEC; if (stack_size == 0) - goto error; + stack_size = 131072UL; /* same as exec.c's default commit */ - if (elf_check_const_displacement(&interp_params.hdr)) + if (is_constdisp(&interp_params.hdr)) interp_params.flags |= ELF_FDPIC_FLAG_CONSTDISP; /* flush all traces of the currently running executable */ @@ -319,7 +347,10 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm) /* there's now no turning back... the old userspace image is dead, * defunct, deceased, etc. */ - set_personality(PER_LINUX_FDPIC); + if (elf_check_fdpic(&exec_params.hdr)) + set_personality(PER_LINUX_FDPIC); + else + set_personality(PER_LINUX); if (elf_read_implies_exec(&exec_params.hdr, executable_stack)) current->personality |= READ_IMPLIES_EXEC; @@ -374,10 +405,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm) PAGE_ALIGN(current->mm->start_brk); #else - /* create a stack and brk area big enough for everyone - * - the brk heap starts at the bottom and works up - * - the stack starts at the top and works down - */ + /* create a stack area and zero-size brk area */ stack_size = (stack_size + PAGE_SIZE - 1) & PAGE_MASK; if (stack_size < PAGE_SIZE * 2) stack_size = PAGE_SIZE * 2; @@ -400,8 +428,6 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm) current->mm->brk = current->mm->start_brk; current->mm->context.end_brk = current->mm->start_brk; - current->mm->context.end_brk += - (stack_size > PAGE_SIZE) ? (stack_size - PAGE_SIZE) : 0; current->mm->start_stack = current->mm->start_brk + stack_size; #endif @@ -1206,6 +1232,20 @@ static int maydump(struct vm_area_struct *vma, unsigned long mm_flags) return 0; } + /* support for DAX */ + if (vma_is_dax(vma)) { + if (vma->vm_flags & VM_SHARED) { + dump_ok = test_bit(MMF_DUMP_DAX_SHARED, &mm_flags); + kdcore("%08lx: %08lx: %s (DAX shared)", vma->vm_start, + vma->vm_flags, dump_ok ? "yes" : "no"); + } else { + dump_ok = test_bit(MMF_DUMP_DAX_PRIVATE, &mm_flags); + kdcore("%08lx: %08lx: %s (DAX private)", vma->vm_start, + vma->vm_flags, dump_ok ? "yes" : "no"); + } + return dump_ok; + } + /* By default, dump shared memory if mapped from an anonymous file. */ if (vma->vm_flags & VM_SHARED) { if (file_inode(vma->vm_file)->i_nlink == 0) { diff --git a/fs/block_dev.c b/fs/block_dev.c index 0a793c7930eb..c25639e907bd 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -50,12 +50,21 @@ struct block_device *I_BDEV(struct inode *inode) } EXPORT_SYMBOL(I_BDEV); -static void bdev_write_inode(struct inode *inode) +static void bdev_write_inode(struct block_device *bdev) { + struct inode *inode = bdev->bd_inode; + int ret; + spin_lock(&inode->i_lock); while (inode->i_state & I_DIRTY) { spin_unlock(&inode->i_lock); - WARN_ON_ONCE(write_inode_now(inode, true)); + ret = write_inode_now(inode, true); + if (ret) { + char name[BDEVNAME_SIZE]; + pr_warn_ratelimited("VFS: Dirty inode writeback failed " + "for block device %s (err=%d).\n", + bdevname(bdev, name), ret); + } spin_lock(&inode->i_lock); } spin_unlock(&inode->i_lock); @@ -381,9 +390,17 @@ int bdev_read_page(struct block_device *bdev, sector_t sector, struct page *page) { const struct block_device_operations *ops = bdev->bd_disk->fops; + int result = -EOPNOTSUPP; + if (!ops->rw_page || bdev_get_integrity(bdev)) - return -EOPNOTSUPP; - return ops->rw_page(bdev, sector + get_start_sect(bdev), page, READ); + return result; + + result = blk_queue_enter(bdev->bd_queue, GFP_KERNEL); + if (result) + return result; + result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, READ); + blk_queue_exit(bdev->bd_queue); + return result; } EXPORT_SYMBOL_GPL(bdev_read_page); @@ -412,14 +429,20 @@ int bdev_write_page(struct block_device *bdev, sector_t sector, int result; int rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE; const struct block_device_operations *ops = bdev->bd_disk->fops; + if (!ops->rw_page || bdev_get_integrity(bdev)) return -EOPNOTSUPP; + result = blk_queue_enter(bdev->bd_queue, GFP_KERNEL); + if (result) + return result; + set_page_writeback(page); result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, rw); if (result) end_page_writeback(page); else unlock_page(page); + blk_queue_exit(bdev->bd_queue); return result; } EXPORT_SYMBOL_GPL(bdev_write_page); @@ -1504,7 +1527,7 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) * ->release can cause the queue to disappear, so flush all * dirty data before. */ - bdev_write_inode(bdev->bd_inode); + bdev_write_inode(bdev); } if (bdev->bd_contains == bdev) { if (disk->fops->release) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 6dcdb2ec9211..d453d62ab0c6 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -355,7 +355,7 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, index = srcu_read_lock(&fs_info->subvol_srcu); - root = btrfs_read_fs_root_no_name(fs_info, &root_key); + root = btrfs_get_fs_root(fs_info, &root_key, false); if (IS_ERR(root)) { srcu_read_unlock(&fs_info->subvol_srcu, index); ret = PTR_ERR(root); diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 97b049ad0594..c473c42d7d6c 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -482,13 +482,12 @@ static noinline int add_ra_bio_pages(struct inode *inode, goto next; } - page = __page_cache_alloc(mapping_gfp_mask(mapping) & - ~__GFP_FS); + page = __page_cache_alloc(mapping_gfp_constraint(mapping, + ~__GFP_FS)); if (!page) break; - if (add_to_page_cache_lru(page, mapping, pg_index, - GFP_NOFS)) { + if (add_to_page_cache_lru(page, mapping, pg_index, GFP_NOFS)) { page_cache_release(page); goto next; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index a2e73f6053a8..35489e7129a7 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3367,7 +3367,7 @@ static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info) static inline gfp_t btrfs_alloc_write_mask(struct address_space *mapping) { - return mapping_gfp_mask(mapping) & ~__GFP_FS; + return mapping_gfp_constraint(mapping, ~__GFP_FS); } /* extent-tree.c */ @@ -3416,6 +3416,7 @@ int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, struct btrfs_block_group_cache *btrfs_lookup_block_group( struct btrfs_fs_info *info, u64 bytenr); +void btrfs_get_block_group(struct btrfs_block_group_cache *cache); void btrfs_put_block_group(struct btrfs_block_group_cache *cache); int get_block_group_index(struct btrfs_block_group_cache *cache); struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, @@ -3479,6 +3480,9 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytes_used, u64 type, u64 chunk_objectid, u64 chunk_offset, u64 size); +struct btrfs_trans_handle *btrfs_start_trans_remove_block_group( + struct btrfs_fs_info *fs_info, + const u64 chunk_offset); int btrfs_remove_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 group_start, struct extent_map *em); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2d4667594681..974be09e7556 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2575,7 +2575,7 @@ int open_ctree(struct super_block *sb, fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL; fs_info->avg_delayed_ref_runtime = NSEC_PER_SEC >> 6; /* div by 64 */ /* readahead state */ - INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); + INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_DIRECT_RECLAIM); spin_lock_init(&fs_info->reada_lock); fs_info->thread_pool_size = min_t(unsigned long, @@ -3780,6 +3780,9 @@ void close_ctree(struct btrfs_root *root) fs_info->closing = 1; smp_mb(); + /* wait for the qgroup rescan worker to stop */ + btrfs_qgroup_wait_for_completion(fs_info); + /* wait for the uuid_scan task to finish */ down(&fs_info->uuid_tree_rescan_sem); /* avoid complains from lockdep et al., set sem back to initial state */ diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 99a8e57da8a1..4b89680a1923 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -124,7 +124,7 @@ static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) return (cache->flags & bits) == bits; } -static void btrfs_get_block_group(struct btrfs_block_group_cache *cache) +void btrfs_get_block_group(struct btrfs_block_group_cache *cache) { atomic_inc(&cache->count); } @@ -5915,19 +5915,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, set_extent_dirty(info->pinned_extents, bytenr, bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL); - /* - * No longer have used bytes in this block group, queue - * it for deletion. - */ - if (old_val == 0) { - spin_lock(&info->unused_bgs_lock); - if (list_empty(&cache->bg_list)) { - btrfs_get_block_group(cache); - list_add_tail(&cache->bg_list, - &info->unused_bgs); - } - spin_unlock(&info->unused_bgs_lock); - } } spin_lock(&trans->transaction->dirty_bgs_lock); @@ -5939,6 +5926,22 @@ static int update_block_group(struct btrfs_trans_handle *trans, } spin_unlock(&trans->transaction->dirty_bgs_lock); + /* + * No longer have used bytes in this block group, queue it for + * deletion. We do this after adding the block group to the + * dirty list to avoid races between cleaner kthread and space + * cache writeout. + */ + if (!alloc && old_val == 0) { + spin_lock(&info->unused_bgs_lock); + if (list_empty(&cache->bg_list)) { + btrfs_get_block_group(cache); + list_add_tail(&cache->bg_list, + &info->unused_bgs); + } + spin_unlock(&info->unused_bgs_lock); + } + btrfs_put_block_group(cache); total -= num_bytes; bytenr += num_bytes; @@ -8105,21 +8108,47 @@ reada: } /* - * TODO: Modify related function to add related node/leaf to dirty_extent_root, - * for later qgroup accounting. - * - * Current, this function does nothing. + * These may not be seen by the usual inc/dec ref code so we have to + * add them here. */ +static int record_one_subtree_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytenr, + u64 num_bytes) +{ + struct btrfs_qgroup_extent_record *qrecord; + struct btrfs_delayed_ref_root *delayed_refs; + + qrecord = kmalloc(sizeof(*qrecord), GFP_NOFS); + if (!qrecord) + return -ENOMEM; + + qrecord->bytenr = bytenr; + qrecord->num_bytes = num_bytes; + qrecord->old_roots = NULL; + + delayed_refs = &trans->transaction->delayed_refs; + spin_lock(&delayed_refs->lock); + if (btrfs_qgroup_insert_dirty_extent(delayed_refs, qrecord)) + kfree(qrecord); + spin_unlock(&delayed_refs->lock); + + return 0; +} + static int account_leaf_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *eb) { int nr = btrfs_header_nritems(eb); - int i, extent_type; + int i, extent_type, ret; struct btrfs_key key; struct btrfs_file_extent_item *fi; u64 bytenr, num_bytes; + /* We can be called directly from walk_up_proc() */ + if (!root->fs_info->quota_enabled) + return 0; + for (i = 0; i < nr; i++) { btrfs_item_key_to_cpu(eb, &key, i); @@ -8138,6 +8167,10 @@ static int account_leaf_items(struct btrfs_trans_handle *trans, continue; num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); + + ret = record_one_subtree_extent(trans, root, bytenr, num_bytes); + if (ret) + return ret; } return 0; } @@ -8206,8 +8239,6 @@ static int adjust_slots_upwards(struct btrfs_root *root, /* * root_eb is the subtree root and is locked before this function is called. - * TODO: Modify this function to mark all (including complete shared node) - * to dirty_extent_root to allow it get accounted in qgroup. */ static int account_shared_subtree(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -8285,6 +8316,11 @@ walk_down: btrfs_tree_read_lock(eb); btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); path->locks[level] = BTRFS_READ_LOCK_BLOCKING; + + ret = record_one_subtree_extent(trans, root, child_bytenr, + root->nodesize); + if (ret) + goto out; } if (level == 0) { @@ -10256,6 +10292,47 @@ out: return ret; } +struct btrfs_trans_handle * +btrfs_start_trans_remove_block_group(struct btrfs_fs_info *fs_info, + const u64 chunk_offset) +{ + struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree; + struct extent_map *em; + struct map_lookup *map; + unsigned int num_items; + + read_lock(&em_tree->lock); + em = lookup_extent_mapping(em_tree, chunk_offset, 1); + read_unlock(&em_tree->lock); + ASSERT(em && em->start == chunk_offset); + + /* + * We need to reserve 3 + N units from the metadata space info in order + * to remove a block group (done at btrfs_remove_chunk() and at + * btrfs_remove_block_group()), which are used for: + * + * 1 unit for adding the free space inode's orphan (located in the tree + * of tree roots). + * 1 unit for deleting the block group item (located in the extent + * tree). + * 1 unit for deleting the free space item (located in tree of tree + * roots). + * N units for deleting N device extent items corresponding to each + * stripe (located in the device tree). + * + * In order to remove a block group we also need to reserve units in the + * system space info in order to update the chunk tree (update one or + * more device items and remove one chunk item), but this is done at + * btrfs_remove_chunk() through a call to check_system_chunk(). + */ + map = (struct map_lookup *)em->bdev; + num_items = 3 + map->num_stripes; + free_extent_map(em); + + return btrfs_start_transaction_fallback_global_rsv(fs_info->extent_root, + num_items, 1); +} + /* * Process the unused_bgs list and remove any that don't have any allocated * space inside of them. @@ -10279,22 +10356,25 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) block_group = list_first_entry(&fs_info->unused_bgs, struct btrfs_block_group_cache, bg_list); - space_info = block_group->space_info; list_del_init(&block_group->bg_list); + + space_info = block_group->space_info; + if (ret || btrfs_mixed_space_info(space_info)) { btrfs_put_block_group(block_group); continue; } spin_unlock(&fs_info->unused_bgs_lock); - mutex_lock(&root->fs_info->delete_unused_bgs_mutex); + mutex_lock(&fs_info->delete_unused_bgs_mutex); /* Don't want to race with allocators so take the groups_sem */ down_write(&space_info->groups_sem); spin_lock(&block_group->lock); if (block_group->reserved || btrfs_block_group_used(&block_group->item) || - block_group->ro) { + block_group->ro || + list_is_singular(&block_group->list)) { /* * We want to bail if we made new allocations or have * outstanding allocations in this block group. We do @@ -10319,8 +10399,8 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) * Want to do this before we do anything else so we can recover * properly if we fail to join the transaction. */ - /* 1 for btrfs_orphan_reserve_metadata() */ - trans = btrfs_start_transaction(root, 1); + trans = btrfs_start_trans_remove_block_group(fs_info, + block_group->key.objectid); if (IS_ERR(trans)) { btrfs_dec_block_group_ro(root, block_group); ret = PTR_ERR(trans); @@ -10410,7 +10490,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) end_trans: btrfs_end_transaction(trans, root); next: - mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); + mutex_unlock(&fs_info->delete_unused_bgs_mutex); btrfs_put_block_group(block_group); spin_lock(&fs_info->unused_bgs_lock); } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 33a01ea41465..9abe18763a7f 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -616,7 +616,7 @@ static int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY)) clear = 1; again: - if (!prealloc && (mask & __GFP_WAIT)) { + if (!prealloc && gfpflags_allow_blocking(mask)) { /* * Don't care for allocation failure here because we might end * up not needing the pre-allocated extent state at all, which @@ -741,7 +741,7 @@ search_again: if (start > end) goto out; spin_unlock(&tree->lock); - if (mask & __GFP_WAIT) + if (gfpflags_allow_blocking(mask)) cond_resched(); goto again; } @@ -874,7 +874,7 @@ __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, bits |= EXTENT_FIRST_DELALLOC; again: - if (!prealloc && (mask & __GFP_WAIT)) { + if (!prealloc && gfpflags_allow_blocking(mask)) { prealloc = alloc_extent_state(mask); BUG_ON(!prealloc); } @@ -1052,7 +1052,7 @@ search_again: if (start > end) goto out; spin_unlock(&tree->lock); - if (mask & __GFP_WAIT) + if (gfpflags_allow_blocking(mask)) cond_resched(); goto again; } @@ -1100,7 +1100,7 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, btrfs_debug_check_extent_io_range(tree, start, end); again: - if (!prealloc && (mask & __GFP_WAIT)) { + if (!prealloc && gfpflags_allow_blocking(mask)) { /* * Best effort, don't worry if extent state allocation fails * here for the first iteration. We might have a cached state @@ -1278,7 +1278,7 @@ search_again: if (start > end) goto out; spin_unlock(&tree->lock); - if (mask & __GFP_WAIT) + if (gfpflags_allow_blocking(mask)) cond_resched(); first_iteration = false; goto again; @@ -4386,7 +4386,7 @@ int try_release_extent_mapping(struct extent_map_tree *map, u64 start = page_offset(page); u64 end = start + PAGE_CACHE_SIZE - 1; - if ((mask & __GFP_WAIT) && + if (gfpflags_allow_blocking(mask) && page->mapping->host->i_size > 16 * 1024 * 1024) { u64 len; while (start <= end) { diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 6bd5ce9d75f0..72e73461c064 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -756,8 +756,16 @@ next_slot: } btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); - if (key.objectid > ino || - key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end) + + if (key.objectid > ino) + break; + if (WARN_ON_ONCE(key.objectid < ino) || + key.type < BTRFS_EXTENT_DATA_KEY) { + ASSERT(del_nr == 0); + path->slots[0]++; + goto next_slot; + } + if (key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end) break; fi = btrfs_item_ptr(leaf, path->slots[0], @@ -776,8 +784,8 @@ next_slot: btrfs_file_extent_inline_len(leaf, path->slots[0], fi); } else { - WARN_ON(1); - extent_end = search_start; + /* can't happen */ + BUG(); } /* @@ -1874,8 +1882,13 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) struct btrfs_log_ctx ctx; int ret = 0; bool full_sync = 0; - const u64 len = end - start + 1; + u64 len; + /* + * The range length can be represented by u64, we have to do the typecasts + * to avoid signed overflow if it's [0, LLONG_MAX] eg. from fsync() + */ + len = (u64)end - (u64)start + 1; trace_btrfs_sync_file(file, datasync); /* @@ -2063,8 +2076,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) } } if (!full_sync) { - ret = btrfs_wait_ordered_range(inode, start, - end - start + 1); + ret = btrfs_wait_ordered_range(inode, start, len); if (ret) { btrfs_end_transaction(trans, root); goto out; diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 0948d34cb84a..85a1f8621b51 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -85,8 +85,8 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root, } mapping_set_gfp_mask(inode->i_mapping, - mapping_gfp_mask(inode->i_mapping) & - ~(__GFP_FS | __GFP_HIGHMEM)); + mapping_gfp_constraint(inode->i_mapping, + ~(__GFP_FS | __GFP_HIGHMEM))); return inode; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4439fbb4ff45..a70c5790f8f5 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1304,8 +1304,14 @@ next_slot: num_bytes = 0; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - if (found_key.objectid > ino || - found_key.type > BTRFS_EXTENT_DATA_KEY || + if (found_key.objectid > ino) + break; + if (WARN_ON_ONCE(found_key.objectid < ino) || + found_key.type < BTRFS_EXTENT_DATA_KEY) { + path->slots[0]++; + goto next_slot; + } + if (found_key.type > BTRFS_EXTENT_DATA_KEY || found_key.offset > end) break; @@ -4040,9 +4046,7 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans, */ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir) { - struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(dir)->root; - int ret; /* * 1 for the possible orphan item @@ -4051,27 +4055,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir) * 1 for the inode ref * 1 for the inode */ - trans = btrfs_start_transaction(root, 5); - if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) - return trans; - - if (PTR_ERR(trans) == -ENOSPC) { - u64 num_bytes = btrfs_calc_trans_metadata_size(root, 5); - - trans = btrfs_start_transaction(root, 0); - if (IS_ERR(trans)) - return trans; - ret = btrfs_cond_migrate_bytes(root->fs_info, - &root->fs_info->trans_block_rsv, - num_bytes, 5); - if (ret) { - btrfs_end_transaction(trans, root); - return ERR_PTR(ret); - } - trans->block_rsv = &root->fs_info->trans_block_rsv; - trans->bytes_reserved = num_bytes; - } - return trans; + return btrfs_start_transaction_fallback_global_rsv(root, 5, 5); } static int btrfs_unlink(struct inode *dir, struct dentry *dentry) @@ -6360,9 +6344,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, u64 objectid; u64 index = 0; - if (!new_valid_dev(rdev)) - return -EINVAL; - /* * 2 for inode item and ref * 2 for dir items @@ -7506,6 +7487,28 @@ struct btrfs_dio_data { u64 reserve; }; +static void adjust_dio_outstanding_extents(struct inode *inode, + struct btrfs_dio_data *dio_data, + const u64 len) +{ + unsigned num_extents; + + num_extents = (unsigned) div64_u64(len + BTRFS_MAX_EXTENT_SIZE - 1, + BTRFS_MAX_EXTENT_SIZE); + /* + * If we have an outstanding_extents count still set then we're + * within our reservation, otherwise we need to adjust our inode + * counter appropriately. + */ + if (dio_data->outstanding_extents) { + dio_data->outstanding_extents -= num_extents; + } else { + spin_lock(&BTRFS_I(inode)->lock); + BTRFS_I(inode)->outstanding_extents += num_extents; + spin_unlock(&BTRFS_I(inode)->lock); + } +} + static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { @@ -7541,8 +7544,11 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, * If this errors out it's because we couldn't invalidate pagecache for * this range and we need to fallback to buffered. */ - if (lock_extent_direct(inode, lockstart, lockend, &cached_state, create)) - return -ENOTBLK; + if (lock_extent_direct(inode, lockstart, lockend, &cached_state, + create)) { + ret = -ENOTBLK; + goto err; + } em = btrfs_get_extent(inode, NULL, 0, start, len, 0); if (IS_ERR(em)) { @@ -7660,19 +7666,7 @@ unlock: if (start + len > i_size_read(inode)) i_size_write(inode, start + len); - /* - * If we have an outstanding_extents count still set then we're - * within our reservation, otherwise we need to adjust our inode - * counter appropriately. - */ - if (dio_data->outstanding_extents) { - (dio_data->outstanding_extents)--; - } else { - spin_lock(&BTRFS_I(inode)->lock); - BTRFS_I(inode)->outstanding_extents++; - spin_unlock(&BTRFS_I(inode)->lock); - } - + adjust_dio_outstanding_extents(inode, dio_data, len); btrfs_free_reserved_data_space(inode, start, len); WARN_ON(dio_data->reserve < len); dio_data->reserve -= len; @@ -7699,8 +7693,17 @@ unlock: unlock_err: clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, unlock_bits, 1, 0, &cached_state, GFP_NOFS); +err: if (dio_data) current->journal_info = dio_data; + /* + * Compensate the delalloc release we do in btrfs_direct_IO() when we + * write less data then expected, so that we don't underflow our inode's + * outstanding extents counter. + */ + if (create && dio_data) + adjust_dio_outstanding_extents(inode, dio_data, len); + return ret; } diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 46476c226395..5279fdae7142 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -993,9 +993,10 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans, mutex_lock(&fs_info->qgroup_ioctl_lock); if (!fs_info->quota_root) goto out; - spin_lock(&fs_info->qgroup_lock); fs_info->quota_enabled = 0; fs_info->pending_quota_state = 0; + btrfs_qgroup_wait_for_completion(fs_info); + spin_lock(&fs_info->qgroup_lock); quota_root = fs_info->quota_root; fs_info->quota_root = NULL; fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON; @@ -1461,6 +1462,8 @@ struct btrfs_qgroup_extent_record struct btrfs_qgroup_extent_record *entry; u64 bytenr = record->bytenr; + assert_spin_locked(&delayed_refs->lock); + while (*p) { parent_node = *p; entry = rb_entry(parent_node, struct btrfs_qgroup_extent_record, @@ -2198,7 +2201,6 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, int slot; int ret; - path->leave_spinning = 1; mutex_lock(&fs_info->qgroup_rescan_lock); ret = btrfs_search_slot_for_read(fs_info->extent_root, &fs_info->qgroup_rescan_progress, @@ -2286,7 +2288,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) goto out; err = 0; - while (!err) { + while (!err && !btrfs_fs_closing(fs_info)) { trans = btrfs_start_transaction(fs_info->fs_root, 0); if (IS_ERR(trans)) { err = PTR_ERR(trans); @@ -2307,7 +2309,8 @@ out: btrfs_free_path(path); mutex_lock(&fs_info->qgroup_rescan_lock); - fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; + if (!btrfs_fs_closing(fs_info)) + fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; if (err > 0 && fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) { @@ -2336,7 +2339,9 @@ out: } btrfs_end_transaction(trans, fs_info->quota_root); - if (err >= 0) { + if (btrfs_fs_closing(fs_info)) { + btrfs_info(fs_info, "qgroup scan paused"); + } else if (err >= 0) { btrfs_info(fs_info, "qgroup scan completed%s", err > 0 ? " (inconsistency flag cleared)" : ""); } else { @@ -2384,12 +2389,11 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, memset(&fs_info->qgroup_rescan_progress, 0, sizeof(fs_info->qgroup_rescan_progress)); fs_info->qgroup_rescan_progress.objectid = progress_objectid; + init_completion(&fs_info->qgroup_rescan_completion); spin_unlock(&fs_info->qgroup_lock); mutex_unlock(&fs_info->qgroup_rescan_lock); - init_completion(&fs_info->qgroup_rescan_completion); - memset(&fs_info->qgroup_rescan_work, 0, sizeof(fs_info->qgroup_rescan_work)); btrfs_init_work(&fs_info->qgroup_rescan_work, diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 550de89a8661..b091d94ceef6 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -248,14 +248,9 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check); static int scrub_setup_recheck_block(struct scrub_block *original_sblock, struct scrub_block *sblocks_for_recheck); static void scrub_recheck_block(struct btrfs_fs_info *fs_info, - struct scrub_block *sblock, int is_metadata, - int have_csum, u8 *csum, u64 generation, - u16 csum_size, int retry_failed_mirror); -static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, - struct scrub_block *sblock, - int is_metadata, int have_csum, - const u8 *csum, u64 generation, - u16 csum_size); + struct scrub_block *sblock, + int retry_failed_mirror); +static void scrub_recheck_block_checksum(struct scrub_block *sblock); static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad, struct scrub_block *sblock_good); static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, @@ -889,11 +884,9 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) struct btrfs_fs_info *fs_info; u64 length; u64 logical; - u64 generation; unsigned int failed_mirror_index; unsigned int is_metadata; unsigned int have_csum; - u8 *csum; struct scrub_block *sblocks_for_recheck; /* holds one for each mirror */ struct scrub_block *sblock_bad; int ret; @@ -918,13 +911,11 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) } length = sblock_to_check->page_count * PAGE_SIZE; logical = sblock_to_check->pagev[0]->logical; - generation = sblock_to_check->pagev[0]->generation; BUG_ON(sblock_to_check->pagev[0]->mirror_num < 1); failed_mirror_index = sblock_to_check->pagev[0]->mirror_num - 1; is_metadata = !(sblock_to_check->pagev[0]->flags & BTRFS_EXTENT_FLAG_DATA); have_csum = sblock_to_check->pagev[0]->have_csum; - csum = sblock_to_check->pagev[0]->csum; dev = sblock_to_check->pagev[0]->dev; if (sctx->is_dev_replace && !is_metadata && !have_csum) { @@ -987,8 +978,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) sblock_bad = sblocks_for_recheck + failed_mirror_index; /* build and submit the bios for the failed mirror, check checksums */ - scrub_recheck_block(fs_info, sblock_bad, is_metadata, have_csum, - csum, generation, sctx->csum_size, 1); + scrub_recheck_block(fs_info, sblock_bad, 1); if (!sblock_bad->header_error && !sblock_bad->checksum_error && sblock_bad->no_io_error_seen) { @@ -1101,9 +1091,7 @@ nodatasum_case: sblock_other = sblocks_for_recheck + mirror_index; /* build and submit the bios, check checksums */ - scrub_recheck_block(fs_info, sblock_other, is_metadata, - have_csum, csum, generation, - sctx->csum_size, 0); + scrub_recheck_block(fs_info, sblock_other, 0); if (!sblock_other->header_error && !sblock_other->checksum_error && @@ -1215,9 +1203,7 @@ nodatasum_case: * is verified, but most likely the data comes out * of the page cache. */ - scrub_recheck_block(fs_info, sblock_bad, - is_metadata, have_csum, csum, - generation, sctx->csum_size, 1); + scrub_recheck_block(fs_info, sblock_bad, 1); if (!sblock_bad->header_error && !sblock_bad->checksum_error && sblock_bad->no_io_error_seen) @@ -1318,6 +1304,9 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock, struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; u64 length = original_sblock->page_count * PAGE_SIZE; u64 logical = original_sblock->pagev[0]->logical; + u64 generation = original_sblock->pagev[0]->generation; + u64 flags = original_sblock->pagev[0]->flags; + u64 have_csum = original_sblock->pagev[0]->have_csum; struct scrub_recover *recover; struct btrfs_bio *bbio; u64 sublen; @@ -1372,6 +1361,7 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock, sblock = sblocks_for_recheck + mirror_index; sblock->sctx = sctx; + page = kzalloc(sizeof(*page), GFP_NOFS); if (!page) { leave_nomem: @@ -1383,7 +1373,15 @@ leave_nomem: } scrub_page_get(page); sblock->pagev[page_index] = page; + page->sblock = sblock; + page->flags = flags; + page->generation = generation; page->logical = logical; + page->have_csum = have_csum; + if (have_csum) + memcpy(page->csum, + original_sblock->pagev[0]->csum, + sctx->csum_size); scrub_stripe_index_and_offset(logical, bbio->map_type, @@ -1474,15 +1472,12 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info, * the pages that are errored in the just handled mirror can be repaired. */ static void scrub_recheck_block(struct btrfs_fs_info *fs_info, - struct scrub_block *sblock, int is_metadata, - int have_csum, u8 *csum, u64 generation, - u16 csum_size, int retry_failed_mirror) + struct scrub_block *sblock, + int retry_failed_mirror) { int page_num; sblock->no_io_error_seen = 1; - sblock->header_error = 0; - sblock->checksum_error = 0; for (page_num = 0; page_num < sblock->page_count; page_num++) { struct bio *bio; @@ -1518,9 +1513,7 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info, } if (sblock->no_io_error_seen) - scrub_recheck_block_checksum(fs_info, sblock, is_metadata, - have_csum, csum, generation, - csum_size); + scrub_recheck_block_checksum(sblock); return; } @@ -1535,61 +1528,16 @@ static inline int scrub_check_fsid(u8 fsid[], return !ret; } -static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, - struct scrub_block *sblock, - int is_metadata, int have_csum, - const u8 *csum, u64 generation, - u16 csum_size) +static void scrub_recheck_block_checksum(struct scrub_block *sblock) { - int page_num; - u8 calculated_csum[BTRFS_CSUM_SIZE]; - u32 crc = ~(u32)0; - void *mapped_buffer; - - WARN_ON(!sblock->pagev[0]->page); - if (is_metadata) { - struct btrfs_header *h; - - mapped_buffer = kmap_atomic(sblock->pagev[0]->page); - h = (struct btrfs_header *)mapped_buffer; - - if (sblock->pagev[0]->logical != btrfs_stack_header_bytenr(h) || - !scrub_check_fsid(h->fsid, sblock->pagev[0]) || - memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid, - BTRFS_UUID_SIZE)) { - sblock->header_error = 1; - } else if (generation != btrfs_stack_header_generation(h)) { - sblock->header_error = 1; - sblock->generation_error = 1; - } - csum = h->csum; - } else { - if (!have_csum) - return; - - mapped_buffer = kmap_atomic(sblock->pagev[0]->page); - } - - for (page_num = 0;;) { - if (page_num == 0 && is_metadata) - crc = btrfs_csum_data( - ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE, - crc, PAGE_SIZE - BTRFS_CSUM_SIZE); - else - crc = btrfs_csum_data(mapped_buffer, crc, PAGE_SIZE); - - kunmap_atomic(mapped_buffer); - page_num++; - if (page_num >= sblock->page_count) - break; - WARN_ON(!sblock->pagev[page_num]->page); - - mapped_buffer = kmap_atomic(sblock->pagev[page_num]->page); - } + sblock->header_error = 0; + sblock->checksum_error = 0; + sblock->generation_error = 0; - btrfs_csum_final(crc, calculated_csum); - if (memcmp(calculated_csum, csum, csum_size)) - sblock->checksum_error = 1; + if (sblock->pagev[0]->flags & BTRFS_EXTENT_FLAG_DATA) + scrub_checksum_data(sblock); + else + scrub_checksum_tree_block(sblock); } static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad, @@ -1833,6 +1781,18 @@ static int scrub_checksum(struct scrub_block *sblock) u64 flags; int ret; + /* + * No need to initialize these stats currently, + * because this function only use return value + * instead of these stats value. + * + * Todo: + * always use stats + */ + sblock->header_error = 0; + sblock->generation_error = 0; + sblock->checksum_error = 0; + WARN_ON(sblock->page_count < 1); flags = sblock->pagev[0]->flags; ret = 0; @@ -1858,7 +1818,6 @@ static int scrub_checksum_data(struct scrub_block *sblock) struct page *page; void *buffer; u32 crc = ~(u32)0; - int fail = 0; u64 len; int index; @@ -1889,9 +1848,9 @@ static int scrub_checksum_data(struct scrub_block *sblock) btrfs_csum_final(crc, csum); if (memcmp(csum, on_disk_csum, sctx->csum_size)) - fail = 1; + sblock->checksum_error = 1; - return fail; + return sblock->checksum_error; } static int scrub_checksum_tree_block(struct scrub_block *sblock) @@ -1907,8 +1866,6 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock) u64 mapped_size; void *p; u32 crc = ~(u32)0; - int fail = 0; - int crc_fail = 0; u64 len; int index; @@ -1923,19 +1880,20 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock) * a) don't have an extent buffer and * b) the page is already kmapped */ - if (sblock->pagev[0]->logical != btrfs_stack_header_bytenr(h)) - ++fail; + sblock->header_error = 1; - if (sblock->pagev[0]->generation != btrfs_stack_header_generation(h)) - ++fail; + if (sblock->pagev[0]->generation != btrfs_stack_header_generation(h)) { + sblock->header_error = 1; + sblock->generation_error = 1; + } if (!scrub_check_fsid(h->fsid, sblock->pagev[0])) - ++fail; + sblock->header_error = 1; if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid, BTRFS_UUID_SIZE)) - ++fail; + sblock->header_error = 1; len = sctx->nodesize - BTRFS_CSUM_SIZE; mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE; @@ -1960,9 +1918,9 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock) btrfs_csum_final(crc, calculated_csum); if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size)) - ++crc_fail; + sblock->checksum_error = 1; - return fail || crc_fail; + return sblock->header_error || sblock->checksum_error; } static int scrub_checksum_super(struct scrub_block *sblock) @@ -2176,39 +2134,27 @@ static void scrub_missing_raid56_worker(struct btrfs_work *work) { struct scrub_block *sblock = container_of(work, struct scrub_block, work); struct scrub_ctx *sctx = sblock->sctx; - struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; - unsigned int is_metadata; - unsigned int have_csum; - u8 *csum; - u64 generation; u64 logical; struct btrfs_device *dev; - is_metadata = !(sblock->pagev[0]->flags & BTRFS_EXTENT_FLAG_DATA); - have_csum = sblock->pagev[0]->have_csum; - csum = sblock->pagev[0]->csum; - generation = sblock->pagev[0]->generation; logical = sblock->pagev[0]->logical; dev = sblock->pagev[0]->dev; - if (sblock->no_io_error_seen) { - scrub_recheck_block_checksum(fs_info, sblock, is_metadata, - have_csum, csum, generation, - sctx->csum_size); - } + if (sblock->no_io_error_seen) + scrub_recheck_block_checksum(sblock); if (!sblock->no_io_error_seen) { spin_lock(&sctx->stat_lock); sctx->stat.read_errors++; spin_unlock(&sctx->stat_lock); - btrfs_err_rl_in_rcu(fs_info, + btrfs_err_rl_in_rcu(sctx->dev_root->fs_info, "IO error rebuilding logical %llu for dev %s", logical, rcu_str_deref(dev->name)); } else if (sblock->header_error || sblock->checksum_error) { spin_lock(&sctx->stat_lock); sctx->stat.uncorrectable_errors++; spin_unlock(&sctx->stat_lock); - btrfs_err_rl_in_rcu(fs_info, + btrfs_err_rl_in_rcu(sctx->dev_root->fs_info, "failed to rebuild valid logical %llu for dev %s", logical, rcu_str_deref(dev->name)); } else { @@ -2500,8 +2446,7 @@ static void scrub_block_complete(struct scrub_block *sblock) } } -static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u64 len, - u8 *csum) +static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u8 *csum) { struct btrfs_ordered_sum *sum = NULL; unsigned long index; @@ -2565,7 +2510,7 @@ static int scrub_extent(struct scrub_ctx *sctx, u64 logical, u64 len, if (flags & BTRFS_EXTENT_FLAG_DATA) { /* push csums to sbio */ - have_csum = scrub_find_csum(sctx, logical, l, csum); + have_csum = scrub_find_csum(sctx, logical, csum); if (have_csum == 0) ++sctx->stat.no_csum; if (sctx->is_dev_replace && !have_csum) { @@ -2703,7 +2648,7 @@ static int scrub_extent_for_parity(struct scrub_parity *sparity, if (flags & BTRFS_EXTENT_FLAG_DATA) { /* push csums to sbio */ - have_csum = scrub_find_csum(sctx, logical, l, csum); + have_csum = scrub_find_csum(sctx, logical, csum); if (have_csum == 0) goto skip; } @@ -3012,6 +2957,9 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx, logic_start + map->stripe_len)) { btrfs_err(fs_info, "scrub: tree block %llu spanning stripes, ignored. logical=%llu", key.objectid, logic_start); + spin_lock(&sctx->stat_lock); + sctx->stat.uncorrectable_errors++; + spin_unlock(&sctx->stat_lock); goto next; } again: @@ -3361,6 +3309,9 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, "scrub: tree block %llu spanning " "stripes, ignored. logical=%llu", key.objectid, logical); + spin_lock(&sctx->stat_lock); + sctx->stat.uncorrectable_errors++; + spin_unlock(&sctx->stat_lock); goto next; } @@ -3481,7 +3432,9 @@ out: static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx, struct btrfs_device *scrub_dev, u64 chunk_offset, u64 length, - u64 dev_offset, int is_dev_replace) + u64 dev_offset, + struct btrfs_block_group_cache *cache, + int is_dev_replace) { struct btrfs_mapping_tree *map_tree = &sctx->dev_root->fs_info->mapping_tree; @@ -3494,8 +3447,18 @@ static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx, em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1); read_unlock(&map_tree->map_tree.lock); - if (!em) - return -EINVAL; + if (!em) { + /* + * Might have been an unused block group deleted by the cleaner + * kthread or relocation. + */ + spin_lock(&cache->lock); + if (!cache->removed) + ret = -EINVAL; + spin_unlock(&cache->lock); + + return ret; + } map = (struct map_lookup *)em->bdev; if (em->start != chunk_offset) @@ -3532,6 +3495,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, u64 length; u64 chunk_offset; int ret = 0; + int ro_set; int slot; struct extent_buffer *l; struct btrfs_key key; @@ -3617,7 +3581,21 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, scrub_pause_on(fs_info); ret = btrfs_inc_block_group_ro(root, cache); scrub_pause_off(fs_info); - if (ret) { + + if (ret == 0) { + ro_set = 1; + } else if (ret == -ENOSPC) { + /* + * btrfs_inc_block_group_ro return -ENOSPC when it + * failed in creating new chunk for metadata. + * It is not a problem for scrub/replace, because + * metadata are always cowed, and our scrub paused + * commit_transactions. + */ + ro_set = 0; + } else { + btrfs_warn(fs_info, "failed setting block group ro, ret=%d\n", + ret); btrfs_put_block_group(cache); break; } @@ -3626,7 +3604,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, dev_replace->cursor_left = found_key.offset; dev_replace->item_needs_writeback = 1; ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length, - found_key.offset, is_dev_replace); + found_key.offset, cache, is_dev_replace); /* * flush, submit all pending read and write bios, afterwards @@ -3660,7 +3638,30 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, scrub_pause_off(fs_info); - btrfs_dec_block_group_ro(root, cache); + if (ro_set) + btrfs_dec_block_group_ro(root, cache); + + /* + * We might have prevented the cleaner kthread from deleting + * this block group if it was already unused because we raced + * and set it to RO mode first. So add it back to the unused + * list, otherwise it might not ever be deleted unless a manual + * balance is triggered or it becomes used and unused again. + */ + spin_lock(&cache->lock); + if (!cache->removed && !cache->ro && cache->reserved == 0 && + btrfs_block_group_used(&cache->item) == 0) { + spin_unlock(&cache->lock); + spin_lock(&fs_info->unused_bgs_lock); + if (list_empty(&cache->bg_list)) { + btrfs_get_block_group(cache); + list_add_tail(&cache->bg_list, + &fs_info->unused_bgs); + } + spin_unlock(&fs_info->unused_bgs_lock); + } else { + spin_unlock(&cache->lock); + } btrfs_put_block_group(cache); if (ret) diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c index c8c3d70c31ff..8b72b005bfb9 100644 --- a/fs/btrfs/tests/free-space-tests.c +++ b/fs/btrfs/tests/free-space-tests.c @@ -898,8 +898,10 @@ int btrfs_test_free_space_cache(void) } root = btrfs_alloc_dummy_root(); - if (!root) + if (IS_ERR(root)) { + ret = PTR_ERR(root); goto out; + } root->fs_info = btrfs_alloc_dummy_fs_info(); if (!root->fs_info) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 418c6a2ad7d8..3367a3c6f214 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -592,6 +592,38 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, return start_transaction(root, num_items, TRANS_START, BTRFS_RESERVE_FLUSH_ALL); } +struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv( + struct btrfs_root *root, + unsigned int num_items, + int min_factor) +{ + struct btrfs_trans_handle *trans; + u64 num_bytes; + int ret; + + trans = btrfs_start_transaction(root, num_items); + if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) + return trans; + + trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) + return trans; + + num_bytes = btrfs_calc_trans_metadata_size(root, num_items); + ret = btrfs_cond_migrate_bytes(root->fs_info, + &root->fs_info->trans_block_rsv, + num_bytes, + min_factor); + if (ret) { + btrfs_end_transaction(trans, root); + return ERR_PTR(ret); + } + + trans->block_rsv = &root->fs_info->trans_block_rsv; + trans->bytes_reserved = num_bytes; + + return trans; +} struct btrfs_trans_handle *btrfs_start_transaction_lflush( struct btrfs_root *root, diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index b05b2f64d913..0da21ca9b3fb 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -185,6 +185,10 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, unsigned int num_items); +struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv( + struct btrfs_root *root, + unsigned int num_items, + int min_factor); struct btrfs_trans_handle *btrfs_start_transaction_lflush( struct btrfs_root *root, unsigned int num_items); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 17ed76d18eb6..456452206609 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -232,8 +232,8 @@ static struct btrfs_device *__alloc_device(void) spin_lock_init(&dev->reada_lock); atomic_set(&dev->reada_in_flight, 0); atomic_set(&dev->dev_stats_ccnt, 0); - INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_WAIT); - INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_WAIT); + INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM); + INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM); return dev; } @@ -1973,8 +1973,7 @@ void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info, if (srcdev->writeable) { fs_devices->rw_devices--; /* zero out the old super if it is writable */ - btrfs_scratch_superblocks(srcdev->bdev, - rcu_str_deref(srcdev->name)); + btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str); } if (srcdev->bdev) @@ -2024,8 +2023,7 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, btrfs_sysfs_rm_device_link(fs_info->fs_devices, tgtdev); if (tgtdev->bdev) { - btrfs_scratch_superblocks(tgtdev->bdev, - rcu_str_deref(tgtdev->name)); + btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str); fs_info->fs_devices->open_devices--; } fs_info->fs_devices->num_devices--; @@ -2853,7 +2851,8 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, u64 chunk_offset) if (ret) return ret; - trans = btrfs_start_transaction(root, 0); + trans = btrfs_start_trans_remove_block_group(root->fs_info, + chunk_offset); if (IS_ERR(trans)) { ret = PTR_ERR(trans); btrfs_std_error(root->fs_info, ret, NULL); @@ -3123,7 +3122,7 @@ static int chunk_profiles_filter(u64 chunk_type, return 1; } -static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset, +static int chunk_usage_range_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset, struct btrfs_balance_args *bargs) { struct btrfs_block_group_cache *cache; @@ -3156,7 +3155,7 @@ static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset, return ret; } -static int chunk_usage_range_filter(struct btrfs_fs_info *fs_info, +static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset, struct btrfs_balance_args *bargs) { struct btrfs_block_group_cache *cache; @@ -3400,6 +3399,7 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) u32 count_data = 0; u32 count_meta = 0; u32 count_sys = 0; + int chunk_reserved = 0; /* step one make some room on all the devices */ devices = &fs_info->fs_devices->devices; @@ -3501,6 +3501,7 @@ again: ret = should_balance_chunk(chunk_root, leaf, chunk, found_key.offset); + btrfs_release_path(path); if (!ret) { mutex_unlock(&fs_info->delete_unused_bgs_mutex); @@ -3537,6 +3538,25 @@ again: goto loop; } + if ((chunk_type & BTRFS_BLOCK_GROUP_DATA) && !chunk_reserved) { + trans = btrfs_start_transaction(chunk_root, 0); + if (IS_ERR(trans)) { + mutex_unlock(&fs_info->delete_unused_bgs_mutex); + ret = PTR_ERR(trans); + goto error; + } + + ret = btrfs_force_chunk_alloc(trans, chunk_root, + BTRFS_BLOCK_GROUP_DATA); + if (ret < 0) { + mutex_unlock(&fs_info->delete_unused_bgs_mutex); + goto error; + } + + btrfs_end_transaction(trans, chunk_root); + chunk_reserved = 1; + } + ret = btrfs_relocate_chunk(chunk_root, found_key.offset); mutex_unlock(&fs_info->delete_unused_bgs_mutex); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index ec5712372732..d5c84f6b1353 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -382,7 +382,7 @@ struct map_lookup { #define BTRFS_BALANCE_ARGS_LIMIT (1ULL << 5) #define BTRFS_BALANCE_ARGS_LIMIT_RANGE (1ULL << 6) #define BTRFS_BALANCE_ARGS_STRIPES_RANGE (1ULL << 7) -#define BTRFS_BALANCE_ARGS_USAGE_RANGE (1ULL << 8) +#define BTRFS_BALANCE_ARGS_USAGE_RANGE (1ULL << 10) #define BTRFS_BALANCE_ARGS_MASK \ (BTRFS_BALANCE_ARGS_PROFILES | \ diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 6f518c90e1c1..1fcd7b6e7564 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -313,8 +313,10 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) /* check to make sure this item is what we want */ if (found_key.objectid != key.objectid) break; - if (found_key.type != BTRFS_XATTR_ITEM_KEY) + if (found_key.type > BTRFS_XATTR_ITEM_KEY) break; + if (found_key.type < BTRFS_XATTR_ITEM_KEY) + goto next; di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); if (verify_dir_item(root, leaf, di)) diff --git a/fs/buffer.c b/fs/buffer.c index 82283abb2795..4f4cd959da7c 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -999,7 +999,7 @@ grow_dev_page(struct block_device *bdev, sector_t block, int ret = 0; /* Will call free_more_memory() */ gfp_t gfp_mask; - gfp_mask = (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS) | gfp; + gfp_mask = mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS) | gfp; /* * XXX: __getblk_slow() can not really deal with failure and @@ -2420,9 +2420,9 @@ EXPORT_SYMBOL(block_commit_write); * unlock the page. * * Direct callers of this function should protect against filesystem freezing - * using sb_start_write() - sb_end_write() functions. + * using sb_start_pagefault() - sb_end_pagefault() functions. */ -int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, +int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, get_block_t get_block) { struct page *page = vmf->page; @@ -2459,26 +2459,6 @@ out_unlock: unlock_page(page); return ret; } -EXPORT_SYMBOL(__block_page_mkwrite); - -int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, - get_block_t get_block) -{ - int ret; - struct super_block *sb = file_inode(vma->vm_file)->i_sb; - - sb_start_pagefault(sb); - - /* - * Update file times before taking page lock. We may end up failing the - * fault so this update may be superfluous but who really cares... - */ - file_update_time(vma->vm_file); - - ret = __block_page_mkwrite(vma, vmf, get_block); - sb_end_pagefault(sb); - return block_page_mkwrite_return(ret); -} EXPORT_SYMBOL(block_page_mkwrite); /* diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index aecd0859eacb..9c4b737a54df 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -30,7 +30,7 @@ extern unsigned cachefiles_debug; #define CACHEFILES_DEBUG_KLEAVE 2 #define CACHEFILES_DEBUG_KDEBUG 4 -#define cachefiles_gfp (__GFP_WAIT | __GFP_NORETRY | __GFP_NOMEMALLOC) +#define cachefiles_gfp (__GFP_RECLAIM | __GFP_NORETRY | __GFP_NOMEMALLOC) /* * node records diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index fc1056f5c96a..c4b893453e0e 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -655,6 +655,8 @@ lookup_again: aops = d_backing_inode(object->dentry)->i_mapping->a_ops; if (!aops->bmap) goto check_error; + if (object->dentry->d_sb->s_blocksize > PAGE_SIZE) + goto check_error; object->backer = object->dentry; } else { diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 3cbb0e834694..c0f3da3926a0 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -414,9 +414,6 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, ASSERT(inode->i_mapping->a_ops->readpages); /* calculate the shift required to use bmap */ - if (inode->i_sb->s_blocksize > PAGE_SIZE) - goto enobufs; - shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits; op->op.flags &= FSCACHE_OP_KEEP_FLAGS; @@ -711,9 +708,6 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op, ASSERT(inode->i_mapping->a_ops->readpages); /* calculate the shift required to use bmap */ - if (inode->i_sb->s_blocksize > PAGE_SIZE) - goto all_enobufs; - shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits; pagevec_init(&pagevec, 0); @@ -885,7 +879,7 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page) loff_t pos, eof; size_t len; void *data; - int ret; + int ret = -ENOBUFS; ASSERT(op != NULL); ASSERT(page != NULL); @@ -905,6 +899,15 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page) cache = container_of(object->fscache.cache, struct cachefiles_cache, cache); + pos = (loff_t)page->index << PAGE_SHIFT; + + /* We mustn't write more data than we have, so we have to beware of a + * partial page at EOF. + */ + eof = object->fscache.store_limit_l; + if (pos >= eof) + goto error; + /* write the page to the backing filesystem and let it store it in its * own time */ path.mnt = cache->mnt; @@ -912,40 +915,38 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page) file = dentry_open(&path, O_RDWR | O_LARGEFILE, cache->cache_cred); if (IS_ERR(file)) { ret = PTR_ERR(file); - } else { - pos = (loff_t) page->index << PAGE_SHIFT; - - /* we mustn't write more data than we have, so we have - * to beware of a partial page at EOF */ - eof = object->fscache.store_limit_l; - len = PAGE_SIZE; - if (eof & ~PAGE_MASK) { - ASSERTCMP(pos, <, eof); - if (eof - pos < PAGE_SIZE) { - _debug("cut short %llx to %llx", - pos, eof); - len = eof - pos; - ASSERTCMP(pos + len, ==, eof); - } - } - - data = kmap(page); - ret = __kernel_write(file, data, len, &pos); - kunmap(page); - if (ret != len) - ret = -EIO; - fput(file); + goto error_2; } - if (ret < 0) { - if (ret == -EIO) - cachefiles_io_error_obj( - object, "Write page to backing file failed"); - ret = -ENOBUFS; + len = PAGE_SIZE; + if (eof & ~PAGE_MASK) { + if (eof - pos < PAGE_SIZE) { + _debug("cut short %llx to %llx", + pos, eof); + len = eof - pos; + ASSERTCMP(pos + len, ==, eof); + } } - _leave(" = %d", ret); - return ret; + data = kmap(page); + ret = __kernel_write(file, data, len, &pos); + kunmap(page); + fput(file); + if (ret != len) + goto error_eio; + + _leave(" = 0"); + return 0; + +error_eio: + ret = -EIO; +error_2: + if (ret == -EIO) + cachefiles_io_error_obj(object, + "Write page to backing file failed"); +error: + _leave(" = -ENOBUFS [%d]", ret); + return -ENOBUFS; } /* diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 9d23e788d1df..b7d218a168fb 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1283,8 +1283,8 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) int ret1; struct address_space *mapping = inode->i_mapping; struct page *page = find_or_create_page(mapping, 0, - mapping_gfp_mask(mapping) & - ~__GFP_FS); + mapping_gfp_constraint(mapping, + ~__GFP_FS)); if (!page) { ret = VM_FAULT_OOM; goto out; @@ -1428,7 +1428,8 @@ void ceph_fill_inline_data(struct inode *inode, struct page *locked_page, if (i_size_read(inode) == 0) return; page = find_or_create_page(mapping, 0, - mapping_gfp_mask(mapping) & ~__GFP_FS); + mapping_gfp_constraint(mapping, + ~__GFP_FS)); if (!page) return; if (PageUptodate(page)) { diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c index 834f9f3723fb..a4766ded1ba7 100644 --- a/fs/ceph/cache.c +++ b/fs/ceph/cache.c @@ -88,7 +88,7 @@ static uint16_t ceph_fscache_inode_get_key(const void *cookie_netfs_data, const struct ceph_inode_info* ci = cookie_netfs_data; uint16_t klen; - /* use ceph virtual inode (id + snaphot) */ + /* use ceph virtual inode (id + snapshot) */ klen = sizeof(ci->i_vino); if (klen > maxbuf) return 0; diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 27b566874bc1..c69e1253b47b 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1655,9 +1655,8 @@ retry_locked: !S_ISDIR(inode->i_mode) && /* ignore readdir cache */ ci->i_wrbuffer_ref == 0 && /* no dirty pages... */ inode->i_data.nrpages && /* have cached pages */ - (file_wanted == 0 || /* no open files */ - (revoking & (CEPH_CAP_FILE_CACHE| - CEPH_CAP_FILE_LAZYIO))) && /* or revoking cache */ + (revoking & (CEPH_CAP_FILE_CACHE| + CEPH_CAP_FILE_LAZYIO)) && /* or revoking cache */ !tried_invalidate) { dout("check_caps trying to invalidate on %p\n", inode); if (try_nonblocking_invalidate(inode) < 0) { @@ -1971,49 +1970,46 @@ out: } /* - * wait for any uncommitted directory operations to commit. + * wait for any unsafe requests to complete. */ -static int unsafe_dirop_wait(struct inode *inode) +static int unsafe_request_wait(struct inode *inode) { struct ceph_inode_info *ci = ceph_inode(inode); - struct list_head *head = &ci->i_unsafe_dirops; - struct ceph_mds_request *req; - u64 last_tid; - int ret = 0; - - if (!S_ISDIR(inode->i_mode)) - return 0; + struct ceph_mds_request *req1 = NULL, *req2 = NULL; + int ret, err = 0; spin_lock(&ci->i_unsafe_lock); - if (list_empty(head)) - goto out; - - req = list_last_entry(head, struct ceph_mds_request, - r_unsafe_dir_item); - last_tid = req->r_tid; - - do { - ceph_mdsc_get_request(req); - spin_unlock(&ci->i_unsafe_lock); + if (S_ISDIR(inode->i_mode) && !list_empty(&ci->i_unsafe_dirops)) { + req1 = list_last_entry(&ci->i_unsafe_dirops, + struct ceph_mds_request, + r_unsafe_dir_item); + ceph_mdsc_get_request(req1); + } + if (!list_empty(&ci->i_unsafe_iops)) { + req2 = list_last_entry(&ci->i_unsafe_iops, + struct ceph_mds_request, + r_unsafe_target_item); + ceph_mdsc_get_request(req2); + } + spin_unlock(&ci->i_unsafe_lock); - dout("unsafe_dirop_wait %p wait on tid %llu (until %llu)\n", - inode, req->r_tid, last_tid); - ret = !wait_for_completion_timeout(&req->r_safe_completion, - ceph_timeout_jiffies(req->r_timeout)); + dout("unsafe_requeset_wait %p wait on tid %llu %llu\n", + inode, req1 ? req1->r_tid : 0ULL, req2 ? req2->r_tid : 0ULL); + if (req1) { + ret = !wait_for_completion_timeout(&req1->r_safe_completion, + ceph_timeout_jiffies(req1->r_timeout)); if (ret) - ret = -EIO; /* timed out */ - - ceph_mdsc_put_request(req); - - spin_lock(&ci->i_unsafe_lock); - if (ret || list_empty(head)) - break; - req = list_first_entry(head, struct ceph_mds_request, - r_unsafe_dir_item); - } while (req->r_tid < last_tid); -out: - spin_unlock(&ci->i_unsafe_lock); - return ret; + err = -EIO; + ceph_mdsc_put_request(req1); + } + if (req2) { + ret = !wait_for_completion_timeout(&req2->r_safe_completion, + ceph_timeout_jiffies(req2->r_timeout)); + if (ret) + err = -EIO; + ceph_mdsc_put_request(req2); + } + return err; } int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) @@ -2039,7 +2035,7 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) dirty = try_flush_caps(inode, &flush_tid); dout("fsync dirty caps are %s\n", ceph_cap_string(dirty)); - ret = unsafe_dirop_wait(inode); + ret = unsafe_request_wait(inode); /* * only wait on non-file metadata writeback (the mds diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 0c62868b5c56..3c68e6aee2f0 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -34,6 +34,74 @@ * need to wait for MDS acknowledgement. */ +/* + * Calculate the length sum of direct io vectors that can + * be combined into one page vector. + */ +static size_t dio_get_pagev_size(const struct iov_iter *it) +{ + const struct iovec *iov = it->iov; + const struct iovec *iovend = iov + it->nr_segs; + size_t size; + + size = iov->iov_len - it->iov_offset; + /* + * An iov can be page vectored when both the current tail + * and the next base are page aligned. + */ + while (PAGE_ALIGNED((iov->iov_base + iov->iov_len)) && + (++iov < iovend && PAGE_ALIGNED((iov->iov_base)))) { + size += iov->iov_len; + } + dout("dio_get_pagevlen len = %zu\n", size); + return size; +} + +/* + * Allocate a page vector based on (@it, @nbytes). + * The return value is the tuple describing a page vector, + * that is (@pages, @page_align, @num_pages). + */ +static struct page ** +dio_get_pages_alloc(const struct iov_iter *it, size_t nbytes, + size_t *page_align, int *num_pages) +{ + struct iov_iter tmp_it = *it; + size_t align; + struct page **pages; + int ret = 0, idx, npages; + + align = (unsigned long)(it->iov->iov_base + it->iov_offset) & + (PAGE_SIZE - 1); + npages = calc_pages_for(align, nbytes); + pages = kmalloc(sizeof(*pages) * npages, GFP_KERNEL); + if (!pages) { + pages = vmalloc(sizeof(*pages) * npages); + if (!pages) + return ERR_PTR(-ENOMEM); + } + + for (idx = 0; idx < npages; ) { + size_t start; + ret = iov_iter_get_pages(&tmp_it, pages + idx, nbytes, + npages - idx, &start); + if (ret < 0) + goto fail; + + iov_iter_advance(&tmp_it, ret); + nbytes -= ret; + idx += (ret + start + PAGE_SIZE - 1) / PAGE_SIZE; + } + + BUG_ON(nbytes != 0); + *num_pages = npages; + *page_align = align; + dout("dio_get_pages_alloc: got %d pages align %zu\n", npages, align); + return pages; +fail: + ceph_put_page_vector(pages, idx, false); + return ERR_PTR(ret); +} /* * Prepare an open request. Preallocate ceph_cap to avoid an @@ -458,11 +526,10 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i, size_t start; ssize_t n; - n = iov_iter_get_pages_alloc(i, &pages, INT_MAX, &start); - if (n < 0) - return n; - - num_pages = (n + start + PAGE_SIZE - 1) / PAGE_SIZE; + n = dio_get_pagev_size(i); + pages = dio_get_pages_alloc(i, n, &start, &num_pages); + if (IS_ERR(pages)) + return PTR_ERR(pages); ret = striped_read(inode, off, n, pages, num_pages, checkeof, @@ -592,7 +659,7 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, CEPH_OSD_FLAG_WRITE; while (iov_iter_count(from) > 0) { - u64 len = iov_iter_single_seg_count(from); + u64 len = dio_get_pagev_size(from); size_t start; ssize_t n; @@ -611,14 +678,14 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC, 0); - n = iov_iter_get_pages_alloc(from, &pages, len, &start); - if (unlikely(n < 0)) { - ret = n; + n = len; + pages = dio_get_pages_alloc(from, len, &start, &num_pages); + if (IS_ERR(pages)) { ceph_osdc_put_request(req); + ret = PTR_ERR(pages); break; } - num_pages = (n + start + PAGE_SIZE - 1) / PAGE_SIZE; /* * throw out any page cache pages in this range. this * may block. diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 96d2bd829902..498dcfa2dcdb 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -452,6 +452,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb) INIT_LIST_HEAD(&ci->i_unsafe_writes); INIT_LIST_HEAD(&ci->i_unsafe_dirops); + INIT_LIST_HEAD(&ci->i_unsafe_iops); spin_lock_init(&ci->i_unsafe_lock); ci->i_snap_realm = NULL; diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 51cb02da75d9..e7b130a637f9 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -633,13 +633,8 @@ static void __register_request(struct ceph_mds_client *mdsc, mdsc->oldest_tid = req->r_tid; if (dir) { - struct ceph_inode_info *ci = ceph_inode(dir); - ihold(dir); - spin_lock(&ci->i_unsafe_lock); req->r_unsafe_dir = dir; - list_add_tail(&req->r_unsafe_dir_item, &ci->i_unsafe_dirops); - spin_unlock(&ci->i_unsafe_lock); } } @@ -665,13 +660,20 @@ static void __unregister_request(struct ceph_mds_client *mdsc, rb_erase(&req->r_node, &mdsc->request_tree); RB_CLEAR_NODE(&req->r_node); - if (req->r_unsafe_dir) { + if (req->r_unsafe_dir && req->r_got_unsafe) { struct ceph_inode_info *ci = ceph_inode(req->r_unsafe_dir); - spin_lock(&ci->i_unsafe_lock); list_del_init(&req->r_unsafe_dir_item); spin_unlock(&ci->i_unsafe_lock); + } + if (req->r_target_inode && req->r_got_unsafe) { + struct ceph_inode_info *ci = ceph_inode(req->r_target_inode); + spin_lock(&ci->i_unsafe_lock); + list_del_init(&req->r_unsafe_target_item); + spin_unlock(&ci->i_unsafe_lock); + } + if (req->r_unsafe_dir) { iput(req->r_unsafe_dir); req->r_unsafe_dir = NULL; } @@ -1430,6 +1432,13 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg) if ((used | wanted) & CEPH_CAP_ANY_WR) goto out; } + /* The inode has cached pages, but it's no longer used. + * we can safely drop it */ + if (wanted == 0 && used == CEPH_CAP_FILE_CACHE && + !(oissued & CEPH_CAP_FILE_CACHE)) { + used = 0; + oissued = 0; + } if ((used | wanted) & ~oissued & mine) goto out; /* we need these caps */ @@ -1438,7 +1447,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg) /* we aren't the only cap.. just remove us */ __ceph_remove_cap(cap, true); } else { - /* try to drop referring dentries */ + /* try dropping referring dentries */ spin_unlock(&ci->i_ceph_lock); d_prune_aliases(inode); dout("trim_caps_cb %p cap %p pruned, count now %d\n", @@ -1704,6 +1713,7 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) req->r_started = jiffies; req->r_resend_mds = -1; INIT_LIST_HEAD(&req->r_unsafe_dir_item); + INIT_LIST_HEAD(&req->r_unsafe_target_item); req->r_fmode = -1; kref_init(&req->r_kref); INIT_LIST_HEAD(&req->r_wait); @@ -1935,7 +1945,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, len = sizeof(*head) + pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) + - sizeof(struct timespec); + sizeof(struct ceph_timespec); /* calculate (max) length for cap releases */ len += sizeof(struct ceph_mds_request_release) * @@ -2477,6 +2487,14 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) } else { req->r_got_unsafe = true; list_add_tail(&req->r_unsafe_item, &req->r_session->s_unsafe); + if (req->r_unsafe_dir) { + struct ceph_inode_info *ci = + ceph_inode(req->r_unsafe_dir); + spin_lock(&ci->i_unsafe_lock); + list_add_tail(&req->r_unsafe_dir_item, + &ci->i_unsafe_dirops); + spin_unlock(&ci->i_unsafe_lock); + } } dout("handle_reply tid %lld result %d\n", tid, result); @@ -2518,6 +2536,13 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) up_read(&mdsc->snap_rwsem); if (realm) ceph_put_snap_realm(mdsc, realm); + + if (err == 0 && req->r_got_unsafe && req->r_target_inode) { + struct ceph_inode_info *ci = ceph_inode(req->r_target_inode); + spin_lock(&ci->i_unsafe_lock); + list_add_tail(&req->r_unsafe_target_item, &ci->i_unsafe_iops); + spin_unlock(&ci->i_unsafe_lock); + } out_err: mutex_lock(&mdsc->mutex); if (!req->r_aborted) { @@ -3917,17 +3942,19 @@ static struct ceph_msg *mds_alloc_msg(struct ceph_connection *con, return msg; } -static int sign_message(struct ceph_connection *con, struct ceph_msg *msg) +static int mds_sign_message(struct ceph_msg *msg) { - struct ceph_mds_session *s = con->private; + struct ceph_mds_session *s = msg->con->private; struct ceph_auth_handshake *auth = &s->s_auth; + return ceph_auth_sign_message(auth, msg); } -static int check_message_signature(struct ceph_connection *con, struct ceph_msg *msg) +static int mds_check_message_signature(struct ceph_msg *msg) { - struct ceph_mds_session *s = con->private; + struct ceph_mds_session *s = msg->con->private; struct ceph_auth_handshake *auth = &s->s_auth; + return ceph_auth_check_message_signature(auth, msg); } @@ -3940,8 +3967,8 @@ static const struct ceph_connection_operations mds_con_ops = { .invalidate_authorizer = invalidate_authorizer, .peer_reset = peer_reset, .alloc_msg = mds_alloc_msg, - .sign_message = sign_message, - .check_message_signature = check_message_signature, + .sign_message = mds_sign_message, + .check_message_signature = mds_check_message_signature, }; /* eof */ diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index f575eafe2261..ccf11ef0ca87 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -236,6 +236,9 @@ struct ceph_mds_request { struct inode *r_unsafe_dir; struct list_head r_unsafe_dir_item; + /* unsafe requests that modify the target inode */ + struct list_head r_unsafe_target_item; + struct ceph_mds_session *r_session; int r_attempts; /* resend attempts */ diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 2f2460d23a06..75b7d125ce66 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -342,6 +342,7 @@ struct ceph_inode_info { struct list_head i_unsafe_writes; /* uncommitted sync writes */ struct list_head i_unsafe_dirops; /* uncommitted mds dir ops */ + struct list_head i_unsafe_iops; /* uncommitted mds inode ops */ spinlock_t i_unsafe_lock; struct ceph_snap_realm *i_snap_realm; /* snap realm (if caps) */ diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index e739950ca084..cbc0f4bca0c0 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -454,6 +454,10 @@ cifs_show_options(struct seq_file *s, struct dentry *root) seq_puts(s, ",nocase"); if (tcon->retry) seq_puts(s, ",hard"); + if (tcon->use_persistent) + seq_puts(s, ",persistenthandles"); + else if (tcon->use_resilient) + seq_puts(s, ",resilienthandles"); if (tcon->unix_ext) seq_puts(s, ",unix"); else @@ -921,9 +925,7 @@ const struct file_operations cifs_file_ops = { .mmap = cifs_file_mmap, .splice_read = generic_file_splice_read, .llseek = cifs_llseek, -#ifdef CONFIG_CIFS_POSIX .unlocked_ioctl = cifs_ioctl, -#endif /* CONFIG_CIFS_POSIX */ .setlease = cifs_setlease, .fallocate = cifs_fallocate, }; @@ -939,9 +941,7 @@ const struct file_operations cifs_file_strict_ops = { .mmap = cifs_file_strict_mmap, .splice_read = generic_file_splice_read, .llseek = cifs_llseek, -#ifdef CONFIG_CIFS_POSIX .unlocked_ioctl = cifs_ioctl, -#endif /* CONFIG_CIFS_POSIX */ .setlease = cifs_setlease, .fallocate = cifs_fallocate, }; @@ -957,9 +957,7 @@ const struct file_operations cifs_file_direct_ops = { .flush = cifs_flush, .mmap = cifs_file_mmap, .splice_read = generic_file_splice_read, -#ifdef CONFIG_CIFS_POSIX .unlocked_ioctl = cifs_ioctl, -#endif /* CONFIG_CIFS_POSIX */ .llseek = cifs_llseek, .setlease = cifs_setlease, .fallocate = cifs_fallocate, @@ -975,9 +973,7 @@ const struct file_operations cifs_file_nobrl_ops = { .mmap = cifs_file_mmap, .splice_read = generic_file_splice_read, .llseek = cifs_llseek, -#ifdef CONFIG_CIFS_POSIX .unlocked_ioctl = cifs_ioctl, -#endif /* CONFIG_CIFS_POSIX */ .setlease = cifs_setlease, .fallocate = cifs_fallocate, }; @@ -992,9 +988,7 @@ const struct file_operations cifs_file_strict_nobrl_ops = { .mmap = cifs_file_strict_mmap, .splice_read = generic_file_splice_read, .llseek = cifs_llseek, -#ifdef CONFIG_CIFS_POSIX .unlocked_ioctl = cifs_ioctl, -#endif /* CONFIG_CIFS_POSIX */ .setlease = cifs_setlease, .fallocate = cifs_fallocate, }; @@ -1009,9 +1003,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = { .flush = cifs_flush, .mmap = cifs_file_mmap, .splice_read = generic_file_splice_read, -#ifdef CONFIG_CIFS_POSIX .unlocked_ioctl = cifs_ioctl, -#endif /* CONFIG_CIFS_POSIX */ .llseek = cifs_llseek, .setlease = cifs_setlease, .fallocate = cifs_fallocate, diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index b406a32deb1f..2b510c537a0d 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -493,7 +493,10 @@ struct smb_vol { bool mfsymlinks:1; /* use Minshall+French Symlinks */ bool multiuser:1; bool rwpidforward:1; /* pid forward for read/write operations */ - bool nosharesock; + bool nosharesock:1; + bool persistent:1; + bool nopersistent:1; + bool resilient:1; /* noresilient not required since not fored for CA */ unsigned int rsize; unsigned int wsize; bool sockopt_tcp_nodelay:1; @@ -895,6 +898,8 @@ struct cifs_tcon { bool broken_posix_open; /* e.g. Samba server versions < 3.3.2, 3.2.9 */ bool broken_sparse_sup; /* if server or share does not support sparse */ bool need_reconnect:1; /* connection reset, tid now invalid */ + bool use_resilient:1; /* use resilient instead of durable handles */ + bool use_persistent:1; /* use persistent instead of durable handles */ #ifdef CONFIG_CIFS_SMB2 bool print:1; /* set if connection to printer share */ bool bad_network_name:1; /* set if ret status STATUS_BAD_NETWORK_NAME */ @@ -1015,6 +1020,7 @@ struct cifs_fid { __u64 persistent_fid; /* persist file id for smb2 */ __u64 volatile_fid; /* volatile file id for smb2 */ __u8 lease_key[SMB2_LEASE_KEY_SIZE]; /* lease key for smb2 */ + __u8 create_guid[16]; #endif struct cifs_pending_open *pending_open; unsigned int epoch; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 3f2228570d44..ecb0803bdb0e 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -87,6 +87,8 @@ enum { Opt_sign, Opt_seal, Opt_noac, Opt_fsc, Opt_mfsymlinks, Opt_multiuser, Opt_sloppy, Opt_nosharesock, + Opt_persistent, Opt_nopersistent, + Opt_resilient, Opt_noresilient, /* Mount options which take numeric value */ Opt_backupuid, Opt_backupgid, Opt_uid, @@ -169,6 +171,10 @@ static const match_table_t cifs_mount_option_tokens = { { Opt_multiuser, "multiuser" }, { Opt_sloppy, "sloppy" }, { Opt_nosharesock, "nosharesock" }, + { Opt_persistent, "persistenthandles"}, + { Opt_nopersistent, "nopersistenthandles"}, + { Opt_resilient, "resilienthandles"}, + { Opt_noresilient, "noresilienthandles"}, { Opt_backupuid, "backupuid=%s" }, { Opt_backupgid, "backupgid=%s" }, @@ -1497,6 +1503,33 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, case Opt_nosharesock: vol->nosharesock = true; break; + case Opt_nopersistent: + vol->nopersistent = true; + if (vol->persistent) { + cifs_dbg(VFS, + "persistenthandles mount options conflict\n"); + goto cifs_parse_mount_err; + } + break; + case Opt_persistent: + vol->persistent = true; + if ((vol->nopersistent) || (vol->resilient)) { + cifs_dbg(VFS, + "persistenthandles mount options conflict\n"); + goto cifs_parse_mount_err; + } + break; + case Opt_resilient: + vol->resilient = true; + if (vol->persistent) { + cifs_dbg(VFS, + "persistenthandles mount options conflict\n"); + goto cifs_parse_mount_err; + } + break; + case Opt_noresilient: + vol->resilient = false; /* already the default */ + break; /* Numeric Values */ case Opt_backupuid: @@ -2655,6 +2688,42 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info) cifs_dbg(FYI, "DFS disabled (%d)\n", tcon->Flags); } tcon->seal = volume_info->seal; + tcon->use_persistent = false; + /* check if SMB2 or later, CIFS does not support persistent handles */ + if (volume_info->persistent) { + if (ses->server->vals->protocol_id == 0) { + cifs_dbg(VFS, + "SMB3 or later required for persistent handles\n"); + rc = -EOPNOTSUPP; + goto out_fail; +#ifdef CONFIG_CIFS_SMB2 + } else if (ses->server->capabilities & + SMB2_GLOBAL_CAP_PERSISTENT_HANDLES) + tcon->use_persistent = true; + else /* persistent handles requested but not supported */ { + cifs_dbg(VFS, + "Persistent handles not supported on share\n"); + rc = -EOPNOTSUPP; + goto out_fail; +#endif /* CONFIG_CIFS_SMB2 */ + } +#ifdef CONFIG_CIFS_SMB2 + } else if ((tcon->capabilities & SMB2_SHARE_CAP_CONTINUOUS_AVAILABILITY) + && (ses->server->capabilities & SMB2_GLOBAL_CAP_PERSISTENT_HANDLES) + && (volume_info->nopersistent == false)) { + cifs_dbg(FYI, "enabling persistent handles\n"); + tcon->use_persistent = true; +#endif /* CONFIG_CIFS_SMB2 */ + } else if (volume_info->resilient) { + if (ses->server->vals->protocol_id == 0) { + cifs_dbg(VFS, + "SMB2.1 or later required for resilient handles\n"); + rc = -EOPNOTSUPP; + goto out_fail; + } + tcon->use_resilient = true; + } + /* * We can have only one retry value for a connection to a share so for * resources mounted more than once to the same server share the last @@ -3503,6 +3572,15 @@ try_mount_again: goto mount_fail_check; } +#ifdef CONFIG_CIFS_SMB2 + if ((volume_info->persistent == true) && ((ses->server->capabilities & + SMB2_GLOBAL_CAP_PERSISTENT_HANDLES) == 0)) { + cifs_dbg(VFS, "persistent handles not supported by server\n"); + rc = -EOPNOTSUPP; + goto mount_fail_check; + } +#endif /* CONFIG_CIFS_SMB2*/ + /* search for existing tcon to this server share */ tcon = cifs_get_tcon(ses, volume_info); if (IS_ERR(tcon)) { diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 47c5c97e2dd3..0068e82217c3 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -3380,7 +3380,7 @@ readpages_get_pages(struct address_space *mapping, struct list_head *page_list, struct page *page, *tpage; unsigned int expected_index; int rc; - gfp_t gfp = GFP_KERNEL & mapping_gfp_mask(mapping); + gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL); INIT_LIST_HEAD(tmplist); diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index 28a77bf1d559..35cf990f87d3 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -85,9 +85,14 @@ static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file, src_tcon = tlink_tcon(smb_file_src->tlink); target_tcon = tlink_tcon(smb_file_target->tlink); - /* check if source and target are on same tree connection */ - if (src_tcon != target_tcon) { - cifs_dbg(VFS, "file copy src and target on different volume\n"); + /* check source and target on same server (or volume if dup_extents) */ + if (dup_extents && (src_tcon != target_tcon)) { + cifs_dbg(VFS, "source and target of copy not on same share\n"); + goto out_fput; + } + + if (!dup_extents && (src_tcon->ses != target_tcon->ses)) { + cifs_dbg(VFS, "source and target of copy not on same server\n"); goto out_fput; } diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index b1eede3678a9..0557c45e9c33 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -84,7 +84,7 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name, cifs_dbg(FYI, "%s: for %s\n", __func__, name->name); dentry = d_hash_and_lookup(parent, name); - if (unlikely(IS_ERR(dentry))) + if (IS_ERR(dentry)) return; if (dentry) { diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c index 2ab297dae5a7..f9e766f464be 100644 --- a/fs/cifs/smb2file.c +++ b/fs/cifs/smb2file.c @@ -43,6 +43,7 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, struct smb2_file_all_info *smb2_data = NULL; __u8 smb2_oplock[17]; struct cifs_fid *fid = oparms->fid; + struct network_resiliency_req nr_ioctl_req; smb2_path = cifs_convert_path_to_utf16(oparms->path, oparms->cifs_sb); if (smb2_path == NULL) { @@ -67,6 +68,24 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, if (rc) goto out; + + if (oparms->tcon->use_resilient) { + nr_ioctl_req.Timeout = 0; /* use server default (120 seconds) */ + nr_ioctl_req.Reserved = 0; + rc = SMB2_ioctl(xid, oparms->tcon, fid->persistent_fid, + fid->volatile_fid, FSCTL_LMR_REQUEST_RESILIENCY, true, + (char *)&nr_ioctl_req, sizeof(nr_ioctl_req), + NULL, NULL /* no return info */); + if (rc == -EOPNOTSUPP) { + cifs_dbg(VFS, + "resiliency not supported by server, disabling\n"); + oparms->tcon->use_resilient = false; + } else if (rc) + cifs_dbg(FYI, "error %d setting resiliency\n", rc); + + rc = 0; + } + if (buf) { /* open response does not have IndexNumber field - get it */ rc = SMB2_get_srv_num(xid, oparms->tcon, fid->persistent_fid, diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 18da19f4f811..53ccdde6ff18 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -810,7 +810,6 @@ smb2_set_file_size(const unsigned int xid, struct cifs_tcon *tcon, cfile->fid.volatile_fid, cfile->pid, &eof, false); } -#ifdef CONFIG_CIFS_SMB311 static int smb2_duplicate_extents(const unsigned int xid, struct cifsFileInfo *srcfile, @@ -854,8 +853,6 @@ smb2_duplicate_extents(const unsigned int xid, duplicate_extents_out: return rc; } -#endif /* CONFIG_CIFS_SMB311 */ - static int smb2_set_compression(const unsigned int xid, struct cifs_tcon *tcon, @@ -1703,6 +1700,7 @@ struct smb_version_operations smb30_operations = { .create_lease_buf = smb3_create_lease_buf, .parse_lease_buf = smb3_parse_lease_buf, .clone_range = smb2_clone_range, + .duplicate_extents = smb2_duplicate_extents, .validate_negotiate = smb3_validate_negotiate, .wp_retry_size = smb2_wp_retry_size, .dir_needs_close = smb2_dir_needs_close, @@ -1840,7 +1838,7 @@ struct smb_version_values smb21_values = { struct smb_version_values smb30_values = { .version_string = SMB30_VERSION_STRING, .protocol_id = SMB30_PROT_ID, - .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU, + .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES, .large_lock_type = 0, .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK, .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK, @@ -1860,7 +1858,7 @@ struct smb_version_values smb30_values = { struct smb_version_values smb302_values = { .version_string = SMB302_VERSION_STRING, .protocol_id = SMB302_PROT_ID, - .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU, + .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES, .large_lock_type = 0, .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK, .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK, @@ -1881,7 +1879,7 @@ struct smb_version_values smb302_values = { struct smb_version_values smb311_values = { .version_string = SMB311_VERSION_STRING, .protocol_id = SMB311_PROT_ID, - .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU, + .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES, .large_lock_type = 0, .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK, .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK, diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 61276929d139..767555518d40 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1151,13 +1151,130 @@ add_lease_context(struct TCP_Server_Info *server, struct kvec *iov, return 0; } +static struct create_durable_v2 * +create_durable_v2_buf(struct cifs_fid *pfid) +{ + struct create_durable_v2 *buf; + + buf = kzalloc(sizeof(struct create_durable_v2), GFP_KERNEL); + if (!buf) + return NULL; + + buf->ccontext.DataOffset = cpu_to_le16(offsetof + (struct create_durable_v2, dcontext)); + buf->ccontext.DataLength = cpu_to_le32(sizeof(struct durable_context_v2)); + buf->ccontext.NameOffset = cpu_to_le16(offsetof + (struct create_durable_v2, Name)); + buf->ccontext.NameLength = cpu_to_le16(4); + + buf->dcontext.Timeout = 0; /* Should this be configurable by workload */ + buf->dcontext.Flags = cpu_to_le32(SMB2_DHANDLE_FLAG_PERSISTENT); + get_random_bytes(buf->dcontext.CreateGuid, 16); + memcpy(pfid->create_guid, buf->dcontext.CreateGuid, 16); + + /* SMB2_CREATE_DURABLE_HANDLE_REQUEST is "DH2Q" */ + buf->Name[0] = 'D'; + buf->Name[1] = 'H'; + buf->Name[2] = '2'; + buf->Name[3] = 'Q'; + return buf; +} + +static struct create_durable_handle_reconnect_v2 * +create_reconnect_durable_v2_buf(struct cifs_fid *fid) +{ + struct create_durable_handle_reconnect_v2 *buf; + + buf = kzalloc(sizeof(struct create_durable_handle_reconnect_v2), + GFP_KERNEL); + if (!buf) + return NULL; + + buf->ccontext.DataOffset = + cpu_to_le16(offsetof(struct create_durable_handle_reconnect_v2, + dcontext)); + buf->ccontext.DataLength = + cpu_to_le32(sizeof(struct durable_reconnect_context_v2)); + buf->ccontext.NameOffset = + cpu_to_le16(offsetof(struct create_durable_handle_reconnect_v2, + Name)); + buf->ccontext.NameLength = cpu_to_le16(4); + + buf->dcontext.Fid.PersistentFileId = fid->persistent_fid; + buf->dcontext.Fid.VolatileFileId = fid->volatile_fid; + buf->dcontext.Flags = cpu_to_le32(SMB2_DHANDLE_FLAG_PERSISTENT); + memcpy(buf->dcontext.CreateGuid, fid->create_guid, 16); + + /* SMB2_CREATE_DURABLE_HANDLE_RECONNECT_V2 is "DH2C" */ + buf->Name[0] = 'D'; + buf->Name[1] = 'H'; + buf->Name[2] = '2'; + buf->Name[3] = 'C'; + return buf; +} + static int -add_durable_context(struct kvec *iov, unsigned int *num_iovec, +add_durable_v2_context(struct kvec *iov, unsigned int *num_iovec, struct cifs_open_parms *oparms) { struct smb2_create_req *req = iov[0].iov_base; unsigned int num = *num_iovec; + iov[num].iov_base = create_durable_v2_buf(oparms->fid); + if (iov[num].iov_base == NULL) + return -ENOMEM; + iov[num].iov_len = sizeof(struct create_durable_v2); + if (!req->CreateContextsOffset) + req->CreateContextsOffset = + cpu_to_le32(sizeof(struct smb2_create_req) - 4 + + iov[1].iov_len); + le32_add_cpu(&req->CreateContextsLength, sizeof(struct create_durable_v2)); + inc_rfc1001_len(&req->hdr, sizeof(struct create_durable_v2)); + *num_iovec = num + 1; + return 0; +} + +static int +add_durable_reconnect_v2_context(struct kvec *iov, unsigned int *num_iovec, + struct cifs_open_parms *oparms) +{ + struct smb2_create_req *req = iov[0].iov_base; + unsigned int num = *num_iovec; + + /* indicate that we don't need to relock the file */ + oparms->reconnect = false; + + iov[num].iov_base = create_reconnect_durable_v2_buf(oparms->fid); + if (iov[num].iov_base == NULL) + return -ENOMEM; + iov[num].iov_len = sizeof(struct create_durable_handle_reconnect_v2); + if (!req->CreateContextsOffset) + req->CreateContextsOffset = + cpu_to_le32(sizeof(struct smb2_create_req) - 4 + + iov[1].iov_len); + le32_add_cpu(&req->CreateContextsLength, + sizeof(struct create_durable_handle_reconnect_v2)); + inc_rfc1001_len(&req->hdr, + sizeof(struct create_durable_handle_reconnect_v2)); + *num_iovec = num + 1; + return 0; +} + +static int +add_durable_context(struct kvec *iov, unsigned int *num_iovec, + struct cifs_open_parms *oparms, bool use_persistent) +{ + struct smb2_create_req *req = iov[0].iov_base; + unsigned int num = *num_iovec; + + if (use_persistent) { + if (oparms->reconnect) + return add_durable_reconnect_v2_context(iov, num_iovec, + oparms); + else + return add_durable_v2_context(iov, num_iovec, oparms); + } + if (oparms->reconnect) { iov[num].iov_base = create_reconnect_durable_buf(oparms->fid); /* indicate that we don't need to relock the file */ @@ -1275,7 +1392,9 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, ccontext->Next = cpu_to_le32(server->vals->create_lease_size); } - rc = add_durable_context(iov, &num_iovecs, oparms); + + rc = add_durable_context(iov, &num_iovecs, oparms, + tcon->use_persistent); if (rc) { cifs_small_buf_release(req); kfree(copy_path); diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 451108284a2f..4af52780ec35 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -590,6 +590,44 @@ struct create_durable { } Data; } __packed; +/* See MS-SMB2 2.2.13.2.11 */ +/* Flags */ +#define SMB2_DHANDLE_FLAG_PERSISTENT 0x00000002 +struct durable_context_v2 { + __le32 Timeout; + __le32 Flags; + __u64 Reserved; + __u8 CreateGuid[16]; +} __packed; + +struct create_durable_v2 { + struct create_context ccontext; + __u8 Name[8]; + struct durable_context_v2 dcontext; +} __packed; + +/* See MS-SMB2 2.2.13.2.12 */ +struct durable_reconnect_context_v2 { + struct { + __u64 PersistentFileId; + __u64 VolatileFileId; + } Fid; + __u8 CreateGuid[16]; + __le32 Flags; /* see above DHANDLE_FLAG_PERSISTENT */ +} __packed; + +/* See MS-SMB2 2.2.14.2.12 */ +struct durable_reconnect_context_v2_rsp { + __le32 Timeout; + __le32 Flags; /* see above DHANDLE_FLAG_PERSISTENT */ +} __packed; + +struct create_durable_handle_reconnect_v2 { + struct create_context ccontext; + __u8 Name[8]; + struct durable_reconnect_context_v2 dcontext; +} __packed; + #define COPY_CHUNK_RES_KEY_SIZE 24 struct resume_key_req { char ResumeKey[COPY_CHUNK_RES_KEY_SIZE]; @@ -643,6 +681,13 @@ struct fsctl_get_integrity_information_rsp { /* Integrity flags for above */ #define FSCTL_INTEGRITY_FLAG_CHECKSUM_ENFORCEMENT_OFF 0x00000001 +/* See MS-SMB2 2.2.31.3 */ +struct network_resiliency_req { + __le32 Timeout; + __le32 Reserved; +} __packed; +/* There is no buffer for the response ie no struct network_resiliency_rsp */ + struct validate_negotiate_info_req { __le32 Capabilities; diff --git a/fs/cifs/smbfsctl.h b/fs/cifs/smbfsctl.h index a639d0dab453..f996daeea271 100644 --- a/fs/cifs/smbfsctl.h +++ b/fs/cifs/smbfsctl.h @@ -90,7 +90,7 @@ #define FSCTL_SRV_ENUMERATE_SNAPSHOTS 0x00144064 /* Retrieve an opaque file reference for server-side data movement ie copy */ #define FSCTL_SRV_REQUEST_RESUME_KEY 0x00140078 -#define FSCTL_LMR_REQUEST_RESILIENCY 0x001401D4 /* BB add struct */ +#define FSCTL_LMR_REQUEST_RESILIENCY 0x001401D4 #define FSCTL_LMR_GET_LINK_TRACK_INF 0x001400E8 /* BB add struct */ #define FSCTL_LMR_SET_LINK_TRACK_INF 0x001400EC /* BB add struct */ #define FSCTL_VALIDATE_NEGOTIATE_INFO 0x00140204 diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 48851f6ea6ec..dcf26537c935 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -686,7 +686,7 @@ static int do_i2c_rdwr_ioctl(unsigned int fd, unsigned int cmd, if (get_user(nmsgs, &udata->nmsgs)) return -EFAULT; - if (nmsgs > I2C_RDRW_IOCTL_MAX_MSGS) + if (nmsgs > I2C_RDWR_IOCTL_MAX_MSGS) return -EINVAL; if (get_user(datap, &udata->msgs)) diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index c81ce7f200a6..a7a1b218f308 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -1636,6 +1636,116 @@ const struct file_operations configfs_dir_operations = { .iterate = configfs_readdir, }; +/** + * configfs_register_group - creates a parent-child relation between two groups + * @parent_group: parent group + * @group: child group + * + * link groups, creates dentry for the child and attaches it to the + * parent dentry. + * + * Return: 0 on success, negative errno code on error + */ +int configfs_register_group(struct config_group *parent_group, + struct config_group *group) +{ + struct configfs_subsystem *subsys = parent_group->cg_subsys; + struct dentry *parent; + int ret; + + mutex_lock(&subsys->su_mutex); + link_group(parent_group, group); + mutex_unlock(&subsys->su_mutex); + + parent = parent_group->cg_item.ci_dentry; + + mutex_lock_nested(&d_inode(parent)->i_mutex, I_MUTEX_PARENT); + ret = create_default_group(parent_group, group); + if (!ret) { + spin_lock(&configfs_dirent_lock); + configfs_dir_set_ready(group->cg_item.ci_dentry->d_fsdata); + spin_unlock(&configfs_dirent_lock); + } + mutex_unlock(&d_inode(parent)->i_mutex); + return ret; +} +EXPORT_SYMBOL(configfs_register_group); + +/** + * configfs_unregister_group() - unregisters a child group from its parent + * @group: parent group to be unregistered + * + * Undoes configfs_register_group() + */ +void configfs_unregister_group(struct config_group *group) +{ + struct configfs_subsystem *subsys = group->cg_subsys; + struct dentry *dentry = group->cg_item.ci_dentry; + struct dentry *parent = group->cg_item.ci_parent->ci_dentry; + + mutex_lock_nested(&d_inode(parent)->i_mutex, I_MUTEX_PARENT); + spin_lock(&configfs_dirent_lock); + configfs_detach_prep(dentry, NULL); + spin_unlock(&configfs_dirent_lock); + + configfs_detach_group(&group->cg_item); + d_inode(dentry)->i_flags |= S_DEAD; + dont_mount(dentry); + d_delete(dentry); + mutex_unlock(&d_inode(parent)->i_mutex); + + dput(dentry); + + mutex_lock(&subsys->su_mutex); + unlink_group(group); + mutex_unlock(&subsys->su_mutex); +} +EXPORT_SYMBOL(configfs_unregister_group); + +/** + * configfs_register_default_group() - allocates and registers a child group + * @parent_group: parent group + * @name: child group name + * @item_type: child item type description + * + * boilerplate to allocate and register a child group with its parent. We need + * kzalloc'ed memory because child's default_group is initially empty. + * + * Return: allocated config group or ERR_PTR() on error + */ +struct config_group * +configfs_register_default_group(struct config_group *parent_group, + const char *name, + struct config_item_type *item_type) +{ + int ret; + struct config_group *group; + + group = kzalloc(sizeof(*group), GFP_KERNEL); + if (!group) + return ERR_PTR(-ENOMEM); + config_group_init_type_name(group, name, item_type); + + ret = configfs_register_group(parent_group, group); + if (ret) { + kfree(group); + return ERR_PTR(ret); + } + return group; +} +EXPORT_SYMBOL(configfs_register_default_group); + +/** + * configfs_unregister_default_group() - unregisters and frees a child group + * @group: the group to act on + */ +void configfs_unregister_default_group(struct config_group *group) +{ + configfs_unregister_group(group); + kfree(group); +} +EXPORT_SYMBOL(configfs_unregister_default_group); + int configfs_register_subsystem(struct configfs_subsystem *subsys) { int err; diff --git a/fs/configfs/file.c b/fs/configfs/file.c index 403269ffcdf3..d39099ea7df7 100644 --- a/fs/configfs/file.c +++ b/fs/configfs/file.c @@ -65,7 +65,6 @@ static int fill_read_buffer(struct dentry * dentry, struct configfs_buffer * buf { struct configfs_attribute * attr = to_attr(dentry); struct config_item * item = to_item(dentry->d_parent); - struct configfs_item_operations * ops = buffer->ops; int ret = 0; ssize_t count; @@ -74,7 +73,8 @@ static int fill_read_buffer(struct dentry * dentry, struct configfs_buffer * buf if (!buffer->page) return -ENOMEM; - count = ops->show_attribute(item,attr,buffer->page); + count = attr->show(item, buffer->page); + buffer->needs_read_fill = 0; BUG_ON(count > (ssize_t)SIMPLE_ATTR_SIZE); if (count >= 0) @@ -171,9 +171,8 @@ flush_write_buffer(struct dentry * dentry, struct configfs_buffer * buffer, size { struct configfs_attribute * attr = to_attr(dentry); struct config_item * item = to_item(dentry->d_parent); - struct configfs_item_operations * ops = buffer->ops; - return ops->store_attribute(item,attr,buffer->page,count); + return attr->store(item, buffer->page, count); } @@ -237,8 +236,7 @@ static int check_perm(struct inode * inode, struct file * file) * and we must have a store method. */ if (file->f_mode & FMODE_WRITE) { - - if (!(inode->i_mode & S_IWUGO) || !ops->store_attribute) + if (!(inode->i_mode & S_IWUGO) || !attr->store) goto Eaccess; } @@ -248,7 +246,7 @@ static int check_perm(struct inode * inode, struct file * file) * must be a show method for it. */ if (file->f_mode & FMODE_READ) { - if (!(inode->i_mode & S_IRUGO) || !ops->show_attribute) + if (!(inode->i_mode & S_IRUGO) || !attr->show) goto Eaccess; } diff --git a/fs/coredump.c b/fs/coredump.c index a8f75640ac86..1777331eee76 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -280,23 +280,24 @@ out: return ispipe; } -static int zap_process(struct task_struct *start, int exit_code) +static int zap_process(struct task_struct *start, int exit_code, int flags) { struct task_struct *t; int nr = 0; + /* ignore all signals except SIGKILL, see prepare_signal() */ + start->signal->flags = SIGNAL_GROUP_COREDUMP | flags; start->signal->group_exit_code = exit_code; start->signal->group_stop_count = 0; - t = start; - do { + for_each_thread(start, t) { task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); if (t != current && t->mm) { sigaddset(&t->pending.signal, SIGKILL); signal_wake_up(t, 1); nr++; } - } while_each_thread(start, t); + } return nr; } @@ -311,10 +312,8 @@ static int zap_threads(struct task_struct *tsk, struct mm_struct *mm, spin_lock_irq(&tsk->sighand->siglock); if (!signal_group_exit(tsk->signal)) { mm->core_state = core_state; - nr = zap_process(tsk, exit_code); tsk->signal->group_exit_task = tsk; - /* ignore all signals except SIGKILL, see prepare_signal() */ - tsk->signal->flags = SIGNAL_GROUP_COREDUMP; + nr = zap_process(tsk, exit_code, 0); clear_tsk_thread_flag(tsk, TIF_SIGPENDING); } spin_unlock_irq(&tsk->sighand->siglock); @@ -360,18 +359,18 @@ static int zap_threads(struct task_struct *tsk, struct mm_struct *mm, continue; if (g->flags & PF_KTHREAD) continue; - p = g; - do { - if (p->mm) { - if (unlikely(p->mm == mm)) { - lock_task_sighand(p, &flags); - nr += zap_process(p, exit_code); - p->signal->flags = SIGNAL_GROUP_EXIT; - unlock_task_sighand(p, &flags); - } - break; + + for_each_thread(g, p) { + if (unlikely(!p->mm)) + continue; + if (unlikely(p->mm == mm)) { + lock_task_sighand(p, &flags); + nr += zap_process(p, exit_code, + SIGNAL_GROUP_EXIT); + unlock_task_sighand(p, &flags); } - } while_each_thread(g, p); + break; + } } rcu_read_unlock(); done: @@ -29,6 +29,11 @@ #include <linux/uio.h> #include <linux/vmstat.h> +/* + * dax_clear_blocks() is called from within transaction context from XFS, + * and hence this means the stack from this point must follow GFP_NOFS + * semantics for all operations. + */ int dax_clear_blocks(struct inode *inode, sector_t block, long size) { struct block_device *bdev = inode->i_sb->s_bdev; @@ -169,8 +174,10 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter, else len = iov_iter_zero(max - pos, iter); - if (!len) + if (!len) { + retval = -EFAULT; break; + } pos += len; addr += len; @@ -534,6 +541,10 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address, unsigned long pfn; int result = 0; + /* dax pmd mappings are broken wrt gup and fork */ + if (!IS_ENABLED(CONFIG_FS_DAX_PMD)) + return VM_FAULT_FALLBACK; + /* Fall back to PTEs if we're going to COW */ if (write && !(vma->vm_flags & VM_SHARED)) return VM_FAULT_FALLBACK; @@ -622,6 +633,13 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address, if ((length < PMD_SIZE) || (pfn & PG_PMD_COLOUR)) goto fallback; + /* + * TODO: teach vmf_insert_pfn_pmd() to support + * 'pte_special' for pmds + */ + if (pfn_valid(pfn)) + goto fallback; + if (buffer_unwritten(&bh) || buffer_new(&bh)) { int i; for (i = 0; i < PTRS_PER_PMD; i++) diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 5d8f35f1382a..b7fcc0de0b2f 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -271,8 +271,12 @@ static struct dentry *start_creating(const char *name, struct dentry *parent) dput(dentry); dentry = ERR_PTR(-EEXIST); } - if (IS_ERR(dentry)) + + if (IS_ERR(dentry)) { mutex_unlock(&d_inode(parent)->i_mutex); + simple_release_fs(&debugfs_mount, &debugfs_mount_count); + } + return dentry; } diff --git a/fs/direct-io.c b/fs/direct-io.c index 3ae0e0427191..cb5337d8c273 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -109,6 +109,8 @@ struct dio_submit { struct dio { int flags; /* doesn't change */ int rw; + blk_qc_t bio_cookie; + struct block_device *bio_bdev; struct inode *inode; loff_t i_size; /* i_size when submitted */ dio_iodone_t *end_io; /* IO completion function */ @@ -361,7 +363,7 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio, /* * bio_alloc() is guaranteed to return a bio when called with - * __GFP_WAIT and we request a valid number of vectors. + * __GFP_RECLAIM and we request a valid number of vectors. */ bio = bio_alloc(GFP_KERNEL, nr_vecs); @@ -397,11 +399,14 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio) if (dio->is_async && dio->rw == READ && dio->should_dirty) bio_set_pages_dirty(bio); - if (sdio->submit_io) + dio->bio_bdev = bio->bi_bdev; + + if (sdio->submit_io) { sdio->submit_io(dio->rw, bio, dio->inode, sdio->logical_offset_in_bio); - else - submit_bio(dio->rw, bio); + dio->bio_cookie = BLK_QC_T_NONE; + } else + dio->bio_cookie = submit_bio(dio->rw, bio); sdio->bio = NULL; sdio->boundary = 0; @@ -440,7 +445,8 @@ static struct bio *dio_await_one(struct dio *dio) __set_current_state(TASK_UNINTERRUPTIBLE); dio->waiter = current; spin_unlock_irqrestore(&dio->bio_lock, flags); - io_schedule(); + if (!blk_poll(bdev_get_queue(dio->bio_bdev), dio->bio_cookie)) + io_schedule(); /* wake up sets us TASK_RUNNING */ spin_lock_irqsave(&dio->bio_lock, flags); dio->waiter = NULL; diff --git a/fs/dlm/config.c b/fs/dlm/config.c index d521bddf876d..8e294fbbac39 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c @@ -61,35 +61,8 @@ static struct config_item *make_node(struct config_group *, const char *); static void drop_node(struct config_group *, struct config_item *); static void release_node(struct config_item *); -static ssize_t show_cluster(struct config_item *i, struct configfs_attribute *a, - char *buf); -static ssize_t store_cluster(struct config_item *i, - struct configfs_attribute *a, - const char *buf, size_t len); -static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a, - char *buf); -static ssize_t store_comm(struct config_item *i, struct configfs_attribute *a, - const char *buf, size_t len); -static ssize_t show_node(struct config_item *i, struct configfs_attribute *a, - char *buf); -static ssize_t store_node(struct config_item *i, struct configfs_attribute *a, - const char *buf, size_t len); - -static ssize_t comm_nodeid_read(struct dlm_comm *cm, char *buf); -static ssize_t comm_nodeid_write(struct dlm_comm *cm, const char *buf, - size_t len); -static ssize_t comm_local_read(struct dlm_comm *cm, char *buf); -static ssize_t comm_local_write(struct dlm_comm *cm, const char *buf, - size_t len); -static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, - size_t len); -static ssize_t comm_addr_list_read(struct dlm_comm *cm, char *buf); -static ssize_t node_nodeid_read(struct dlm_node *nd, char *buf); -static ssize_t node_nodeid_write(struct dlm_node *nd, const char *buf, - size_t len); -static ssize_t node_weight_read(struct dlm_node *nd, char *buf); -static ssize_t node_weight_write(struct dlm_node *nd, const char *buf, - size_t len); +static struct configfs_attribute *comm_attrs[]; +static struct configfs_attribute *node_attrs[]; struct dlm_cluster { struct config_group group; @@ -108,6 +81,12 @@ struct dlm_cluster { char cl_cluster_name[DLM_LOCKSPACE_LEN]; }; +static struct dlm_cluster *config_item_to_cluster(struct config_item *i) +{ + return i ? container_of(to_config_group(i), struct dlm_cluster, group) : + NULL; +} + enum { CLUSTER_ATTR_TCP_PORT = 0, CLUSTER_ATTR_BUFFER_SIZE, @@ -124,33 +103,24 @@ enum { CLUSTER_ATTR_CLUSTER_NAME, }; -struct cluster_attribute { - struct configfs_attribute attr; - ssize_t (*show)(struct dlm_cluster *, char *); - ssize_t (*store)(struct dlm_cluster *, const char *, size_t); -}; - -static ssize_t cluster_cluster_name_read(struct dlm_cluster *cl, char *buf) +static ssize_t cluster_cluster_name_show(struct config_item *item, char *buf) { + struct dlm_cluster *cl = config_item_to_cluster(item); return sprintf(buf, "%s\n", cl->cl_cluster_name); } -static ssize_t cluster_cluster_name_write(struct dlm_cluster *cl, +static ssize_t cluster_cluster_name_store(struct config_item *item, const char *buf, size_t len) { + struct dlm_cluster *cl = config_item_to_cluster(item); + strlcpy(dlm_config.ci_cluster_name, buf, sizeof(dlm_config.ci_cluster_name)); strlcpy(cl->cl_cluster_name, buf, sizeof(cl->cl_cluster_name)); return len; } -static struct cluster_attribute cluster_attr_cluster_name = { - .attr = { .ca_owner = THIS_MODULE, - .ca_name = "cluster_name", - .ca_mode = S_IRUGO | S_IWUSR }, - .show = cluster_cluster_name_read, - .store = cluster_cluster_name_write, -}; +CONFIGFS_ATTR(cluster_, cluster_name); static ssize_t cluster_set(struct dlm_cluster *cl, unsigned int *cl_field, int *info_field, int check_zero, @@ -175,17 +145,19 @@ static ssize_t cluster_set(struct dlm_cluster *cl, unsigned int *cl_field, } #define CLUSTER_ATTR(name, check_zero) \ -static ssize_t name##_write(struct dlm_cluster *cl, const char *buf, size_t len) \ +static ssize_t cluster_##name##_store(struct config_item *item, \ + const char *buf, size_t len) \ { \ + struct dlm_cluster *cl = config_item_to_cluster(item); \ return cluster_set(cl, &cl->cl_##name, &dlm_config.ci_##name, \ check_zero, buf, len); \ } \ -static ssize_t name##_read(struct dlm_cluster *cl, char *buf) \ +static ssize_t cluster_##name##_show(struct config_item *item, char *buf) \ { \ + struct dlm_cluster *cl = config_item_to_cluster(item); \ return snprintf(buf, PAGE_SIZE, "%u\n", cl->cl_##name); \ } \ -static struct cluster_attribute cluster_attr_##name = \ -__CONFIGFS_ATTR(name, 0644, name##_read, name##_write) +CONFIGFS_ATTR(cluster_, name); CLUSTER_ATTR(tcp_port, 1); CLUSTER_ATTR(buffer_size, 1); @@ -201,19 +173,19 @@ CLUSTER_ATTR(new_rsb_count, 0); CLUSTER_ATTR(recover_callbacks, 0); static struct configfs_attribute *cluster_attrs[] = { - [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr, - [CLUSTER_ATTR_BUFFER_SIZE] = &cluster_attr_buffer_size.attr, - [CLUSTER_ATTR_RSBTBL_SIZE] = &cluster_attr_rsbtbl_size.attr, - [CLUSTER_ATTR_RECOVER_TIMER] = &cluster_attr_recover_timer.attr, - [CLUSTER_ATTR_TOSS_SECS] = &cluster_attr_toss_secs.attr, - [CLUSTER_ATTR_SCAN_SECS] = &cluster_attr_scan_secs.attr, - [CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr, - [CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr, - [CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr, - [CLUSTER_ATTR_WAITWARN_US] = &cluster_attr_waitwarn_us.attr, - [CLUSTER_ATTR_NEW_RSB_COUNT] = &cluster_attr_new_rsb_count.attr, - [CLUSTER_ATTR_RECOVER_CALLBACKS] = &cluster_attr_recover_callbacks.attr, - [CLUSTER_ATTR_CLUSTER_NAME] = &cluster_attr_cluster_name.attr, + [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port, + [CLUSTER_ATTR_BUFFER_SIZE] = &cluster_attr_buffer_size, + [CLUSTER_ATTR_RSBTBL_SIZE] = &cluster_attr_rsbtbl_size, + [CLUSTER_ATTR_RECOVER_TIMER] = &cluster_attr_recover_timer, + [CLUSTER_ATTR_TOSS_SECS] = &cluster_attr_toss_secs, + [CLUSTER_ATTR_SCAN_SECS] = &cluster_attr_scan_secs, + [CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug, + [CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol, + [CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs, + [CLUSTER_ATTR_WAITWARN_US] = &cluster_attr_waitwarn_us, + [CLUSTER_ATTR_NEW_RSB_COUNT] = &cluster_attr_new_rsb_count, + [CLUSTER_ATTR_RECOVER_CALLBACKS] = &cluster_attr_recover_callbacks, + [CLUSTER_ATTR_CLUSTER_NAME] = &cluster_attr_cluster_name, NULL, }; @@ -224,83 +196,11 @@ enum { COMM_ATTR_ADDR_LIST, }; -struct comm_attribute { - struct configfs_attribute attr; - ssize_t (*show)(struct dlm_comm *, char *); - ssize_t (*store)(struct dlm_comm *, const char *, size_t); -}; - -static struct comm_attribute comm_attr_nodeid = { - .attr = { .ca_owner = THIS_MODULE, - .ca_name = "nodeid", - .ca_mode = S_IRUGO | S_IWUSR }, - .show = comm_nodeid_read, - .store = comm_nodeid_write, -}; - -static struct comm_attribute comm_attr_local = { - .attr = { .ca_owner = THIS_MODULE, - .ca_name = "local", - .ca_mode = S_IRUGO | S_IWUSR }, - .show = comm_local_read, - .store = comm_local_write, -}; - -static struct comm_attribute comm_attr_addr = { - .attr = { .ca_owner = THIS_MODULE, - .ca_name = "addr", - .ca_mode = S_IWUSR }, - .store = comm_addr_write, -}; - -static struct comm_attribute comm_attr_addr_list = { - .attr = { .ca_owner = THIS_MODULE, - .ca_name = "addr_list", - .ca_mode = S_IRUGO }, - .show = comm_addr_list_read, -}; - -static struct configfs_attribute *comm_attrs[] = { - [COMM_ATTR_NODEID] = &comm_attr_nodeid.attr, - [COMM_ATTR_LOCAL] = &comm_attr_local.attr, - [COMM_ATTR_ADDR] = &comm_attr_addr.attr, - [COMM_ATTR_ADDR_LIST] = &comm_attr_addr_list.attr, - NULL, -}; - enum { NODE_ATTR_NODEID = 0, NODE_ATTR_WEIGHT, }; -struct node_attribute { - struct configfs_attribute attr; - ssize_t (*show)(struct dlm_node *, char *); - ssize_t (*store)(struct dlm_node *, const char *, size_t); -}; - -static struct node_attribute node_attr_nodeid = { - .attr = { .ca_owner = THIS_MODULE, - .ca_name = "nodeid", - .ca_mode = S_IRUGO | S_IWUSR }, - .show = node_nodeid_read, - .store = node_nodeid_write, -}; - -static struct node_attribute node_attr_weight = { - .attr = { .ca_owner = THIS_MODULE, - .ca_name = "weight", - .ca_mode = S_IRUGO | S_IWUSR }, - .show = node_weight_read, - .store = node_weight_write, -}; - -static struct configfs_attribute *node_attrs[] = { - [NODE_ATTR_NODEID] = &node_attr_nodeid.attr, - [NODE_ATTR_WEIGHT] = &node_attr_weight.attr, - NULL, -}; - struct dlm_clusters { struct configfs_subsystem subsys; }; @@ -349,8 +249,6 @@ static struct configfs_group_operations clusters_ops = { static struct configfs_item_operations cluster_ops = { .release = release_cluster, - .show_attribute = show_cluster, - .store_attribute = store_cluster, }; static struct configfs_group_operations spaces_ops = { @@ -369,8 +267,6 @@ static struct configfs_group_operations comms_ops = { static struct configfs_item_operations comm_ops = { .release = release_comm, - .show_attribute = show_comm, - .store_attribute = store_comm, }; static struct configfs_group_operations nodes_ops = { @@ -380,8 +276,6 @@ static struct configfs_group_operations nodes_ops = { static struct configfs_item_operations node_ops = { .release = release_node, - .show_attribute = show_node, - .store_attribute = store_node, }; static struct config_item_type clusters_type = { @@ -427,12 +321,6 @@ static struct config_item_type node_type = { .ct_owner = THIS_MODULE, }; -static struct dlm_cluster *config_item_to_cluster(struct config_item *i) -{ - return i ? container_of(to_config_group(i), struct dlm_cluster, group) : - NULL; -} - static struct dlm_space *config_item_to_space(struct config_item *i) { return i ? container_of(to_config_group(i), struct dlm_space, group) : @@ -687,66 +575,30 @@ void dlm_config_exit(void) * Functions for user space to read/write attributes */ -static ssize_t show_cluster(struct config_item *i, struct configfs_attribute *a, - char *buf) -{ - struct dlm_cluster *cl = config_item_to_cluster(i); - struct cluster_attribute *cla = - container_of(a, struct cluster_attribute, attr); - return cla->show ? cla->show(cl, buf) : 0; -} - -static ssize_t store_cluster(struct config_item *i, - struct configfs_attribute *a, - const char *buf, size_t len) +static ssize_t comm_nodeid_show(struct config_item *item, char *buf) { - struct dlm_cluster *cl = config_item_to_cluster(i); - struct cluster_attribute *cla = - container_of(a, struct cluster_attribute, attr); - return cla->store ? cla->store(cl, buf, len) : -EINVAL; -} - -static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a, - char *buf) -{ - struct dlm_comm *cm = config_item_to_comm(i); - struct comm_attribute *cma = - container_of(a, struct comm_attribute, attr); - return cma->show ? cma->show(cm, buf) : 0; -} - -static ssize_t store_comm(struct config_item *i, struct configfs_attribute *a, - const char *buf, size_t len) -{ - struct dlm_comm *cm = config_item_to_comm(i); - struct comm_attribute *cma = - container_of(a, struct comm_attribute, attr); - return cma->store ? cma->store(cm, buf, len) : -EINVAL; + return sprintf(buf, "%d\n", config_item_to_comm(item)->nodeid); } -static ssize_t comm_nodeid_read(struct dlm_comm *cm, char *buf) -{ - return sprintf(buf, "%d\n", cm->nodeid); -} - -static ssize_t comm_nodeid_write(struct dlm_comm *cm, const char *buf, +static ssize_t comm_nodeid_store(struct config_item *item, const char *buf, size_t len) { - int rc = kstrtoint(buf, 0, &cm->nodeid); + int rc = kstrtoint(buf, 0, &config_item_to_comm(item)->nodeid); if (rc) return rc; return len; } -static ssize_t comm_local_read(struct dlm_comm *cm, char *buf) +static ssize_t comm_local_show(struct config_item *item, char *buf) { - return sprintf(buf, "%d\n", cm->local); + return sprintf(buf, "%d\n", config_item_to_comm(item)->local); } -static ssize_t comm_local_write(struct dlm_comm *cm, const char *buf, +static ssize_t comm_local_store(struct config_item *item, const char *buf, size_t len) { + struct dlm_comm *cm = config_item_to_comm(item); int rc = kstrtoint(buf, 0, &cm->local); if (rc) @@ -756,8 +608,10 @@ static ssize_t comm_local_write(struct dlm_comm *cm, const char *buf, return len; } -static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, size_t len) +static ssize_t comm_addr_store(struct config_item *item, const char *buf, + size_t len) { + struct dlm_comm *cm = config_item_to_comm(item); struct sockaddr_storage *addr; int rv; @@ -783,8 +637,9 @@ static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, size_t len) return len; } -static ssize_t comm_addr_list_read(struct dlm_comm *cm, char *buf) +static ssize_t comm_addr_list_show(struct config_item *item, char *buf) { + struct dlm_comm *cm = config_item_to_comm(item); ssize_t s; ssize_t allowance; int i; @@ -827,32 +682,28 @@ static ssize_t comm_addr_list_read(struct dlm_comm *cm, char *buf) return 4096 - allowance; } -static ssize_t show_node(struct config_item *i, struct configfs_attribute *a, - char *buf) -{ - struct dlm_node *nd = config_item_to_node(i); - struct node_attribute *nda = - container_of(a, struct node_attribute, attr); - return nda->show ? nda->show(nd, buf) : 0; -} +CONFIGFS_ATTR(comm_, nodeid); +CONFIGFS_ATTR(comm_, local); +CONFIGFS_ATTR_WO(comm_, addr); +CONFIGFS_ATTR_RO(comm_, addr_list); -static ssize_t store_node(struct config_item *i, struct configfs_attribute *a, - const char *buf, size_t len) -{ - struct dlm_node *nd = config_item_to_node(i); - struct node_attribute *nda = - container_of(a, struct node_attribute, attr); - return nda->store ? nda->store(nd, buf, len) : -EINVAL; -} +static struct configfs_attribute *comm_attrs[] = { + [COMM_ATTR_NODEID] = &comm_attr_nodeid, + [COMM_ATTR_LOCAL] = &comm_attr_local, + [COMM_ATTR_ADDR] = &comm_attr_addr, + [COMM_ATTR_ADDR_LIST] = &comm_attr_addr_list, + NULL, +}; -static ssize_t node_nodeid_read(struct dlm_node *nd, char *buf) +static ssize_t node_nodeid_show(struct config_item *item, char *buf) { - return sprintf(buf, "%d\n", nd->nodeid); + return sprintf(buf, "%d\n", config_item_to_node(item)->nodeid); } -static ssize_t node_nodeid_write(struct dlm_node *nd, const char *buf, +static ssize_t node_nodeid_store(struct config_item *item, const char *buf, size_t len) { + struct dlm_node *nd = config_item_to_node(item); uint32_t seq = 0; int rc = kstrtoint(buf, 0, &nd->nodeid); @@ -863,21 +714,30 @@ static ssize_t node_nodeid_write(struct dlm_node *nd, const char *buf, return len; } -static ssize_t node_weight_read(struct dlm_node *nd, char *buf) +static ssize_t node_weight_show(struct config_item *item, char *buf) { - return sprintf(buf, "%d\n", nd->weight); + return sprintf(buf, "%d\n", config_item_to_node(item)->weight); } -static ssize_t node_weight_write(struct dlm_node *nd, const char *buf, +static ssize_t node_weight_store(struct config_item *item, const char *buf, size_t len) { - int rc = kstrtoint(buf, 0, &nd->weight); + int rc = kstrtoint(buf, 0, &config_item_to_node(item)->weight); if (rc) return rc; return len; } +CONFIGFS_ATTR(node_, nodeid); +CONFIGFS_ATTR(node_, weight); + +static struct configfs_attribute *node_attrs[] = { + [NODE_ATTR_NODEID] = &node_attr_nodeid, + [NODE_ATTR_WEIGHT] = &node_attr_weight, + NULL, +}; + /* * Functions for the dlm to get the info that's been configured */ diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 3c4db1172d22..e2e47ba5d313 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -270,7 +270,7 @@ ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry, ecryptfs_inode = ecryptfs_do_create(directory_inode, ecryptfs_dentry, mode); - if (unlikely(IS_ERR(ecryptfs_inode))) { + if (IS_ERR(ecryptfs_inode)) { ecryptfs_printk(KERN_WARNING, "Failed to create file in" "lower filesystem\n"); rc = PTR_ERR(ecryptfs_inode); diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c index 09a6bb1ad63c..994e078da4bb 100644 --- a/fs/exofs/namei.c +++ b/fs/exofs/namei.c @@ -80,9 +80,6 @@ static int exofs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, struct inode *inode; int err; - if (!new_valid_dev(rdev)) - return -EINVAL; - inode = exofs_new_inode(dir, mode); err = PTR_ERR(inode); if (!IS_ERR(inode)) { diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 8d15febd0aa3..4c69c94cafd8 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -684,6 +684,9 @@ struct ext2_inode_info { struct rw_semaphore xattr_sem; #endif rwlock_t i_meta_lock; +#ifdef CONFIG_FS_DAX + struct rw_semaphore dax_sem; +#endif /* * truncate_mutex is for serialising ext2_truncate() against @@ -699,6 +702,14 @@ struct ext2_inode_info { #endif }; +#ifdef CONFIG_FS_DAX +#define dax_sem_down_write(ext2_inode) down_write(&(ext2_inode)->dax_sem) +#define dax_sem_up_write(ext2_inode) up_write(&(ext2_inode)->dax_sem) +#else +#define dax_sem_down_write(ext2_inode) +#define dax_sem_up_write(ext2_inode) +#endif + /* * Inode dynamic state flags */ diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 1982c3f11aec..11a42c5a09ae 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -27,27 +27,103 @@ #include "acl.h" #ifdef CONFIG_FS_DAX +/* + * The lock ordering for ext2 DAX fault paths is: + * + * mmap_sem (MM) + * sb_start_pagefault (vfs, freeze) + * ext2_inode_info->dax_sem + * address_space->i_mmap_rwsem or page_lock (mutually exclusive in DAX) + * ext2_inode_info->truncate_mutex + * + * The default page_lock and i_size verification done by non-DAX fault paths + * is sufficient because ext2 doesn't support hole punching. + */ static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { - return dax_fault(vma, vmf, ext2_get_block, NULL); + struct inode *inode = file_inode(vma->vm_file); + struct ext2_inode_info *ei = EXT2_I(inode); + int ret; + + if (vmf->flags & FAULT_FLAG_WRITE) { + sb_start_pagefault(inode->i_sb); + file_update_time(vma->vm_file); + } + down_read(&ei->dax_sem); + + ret = __dax_fault(vma, vmf, ext2_get_block, NULL); + + up_read(&ei->dax_sem); + if (vmf->flags & FAULT_FLAG_WRITE) + sb_end_pagefault(inode->i_sb); + return ret; } static int ext2_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd, unsigned int flags) { - return dax_pmd_fault(vma, addr, pmd, flags, ext2_get_block, NULL); + struct inode *inode = file_inode(vma->vm_file); + struct ext2_inode_info *ei = EXT2_I(inode); + int ret; + + if (flags & FAULT_FLAG_WRITE) { + sb_start_pagefault(inode->i_sb); + file_update_time(vma->vm_file); + } + down_read(&ei->dax_sem); + + ret = __dax_pmd_fault(vma, addr, pmd, flags, ext2_get_block, NULL); + + up_read(&ei->dax_sem); + if (flags & FAULT_FLAG_WRITE) + sb_end_pagefault(inode->i_sb); + return ret; } static int ext2_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { - return dax_mkwrite(vma, vmf, ext2_get_block, NULL); + struct inode *inode = file_inode(vma->vm_file); + struct ext2_inode_info *ei = EXT2_I(inode); + int ret; + + sb_start_pagefault(inode->i_sb); + file_update_time(vma->vm_file); + down_read(&ei->dax_sem); + + ret = __dax_mkwrite(vma, vmf, ext2_get_block, NULL); + + up_read(&ei->dax_sem); + sb_end_pagefault(inode->i_sb); + return ret; +} + +static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma, + struct vm_fault *vmf) +{ + struct inode *inode = file_inode(vma->vm_file); + struct ext2_inode_info *ei = EXT2_I(inode); + int ret = VM_FAULT_NOPAGE; + loff_t size; + + sb_start_pagefault(inode->i_sb); + file_update_time(vma->vm_file); + down_read(&ei->dax_sem); + + /* check that the faulting page hasn't raced with truncate */ + size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; + if (vmf->pgoff >= size) + ret = VM_FAULT_SIGBUS; + + up_read(&ei->dax_sem); + sb_end_pagefault(inode->i_sb); + return ret; } static const struct vm_operations_struct ext2_dax_vm_ops = { .fault = ext2_dax_fault, .pmd_fault = ext2_dax_pmd_fault, .page_mkwrite = ext2_dax_mkwrite, - .pfn_mkwrite = dax_pfn_mkwrite, + .pfn_mkwrite = ext2_dax_pfn_mkwrite, }; static int ext2_file_mmap(struct file *file, struct vm_area_struct *vma) diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index c60a248c640c..0aa9bf6e6e53 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -1085,6 +1085,7 @@ static void ext2_free_branches(struct inode *inode, __le32 *p, __le32 *q, int de ext2_free_data(inode, p, q); } +/* dax_sem must be held when calling this function */ static void __ext2_truncate_blocks(struct inode *inode, loff_t offset) { __le32 *i_data = EXT2_I(inode)->i_data; @@ -1100,6 +1101,10 @@ static void __ext2_truncate_blocks(struct inode *inode, loff_t offset) blocksize = inode->i_sb->s_blocksize; iblock = (offset + blocksize-1) >> EXT2_BLOCK_SIZE_BITS(inode->i_sb); +#ifdef CONFIG_FS_DAX + WARN_ON(!rwsem_is_locked(&ei->dax_sem)); +#endif + n = ext2_block_to_path(inode, iblock, offsets, NULL); if (n == 0) return; @@ -1185,7 +1190,10 @@ static void ext2_truncate_blocks(struct inode *inode, loff_t offset) return; if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) return; + + dax_sem_down_write(EXT2_I(inode)); __ext2_truncate_blocks(inode, offset); + dax_sem_up_write(EXT2_I(inode)); } static int ext2_setsize(struct inode *inode, loff_t newsize) @@ -1213,8 +1221,10 @@ static int ext2_setsize(struct inode *inode, loff_t newsize) if (error) return error; + dax_sem_down_write(EXT2_I(inode)); truncate_setsize(inode, newsize); __ext2_truncate_blocks(inode, newsize); + dax_sem_up_write(EXT2_I(inode)); inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; if (inode_needs_sync(inode)) { diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index b4841e3066a5..3267a80dbbe2 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -143,9 +143,6 @@ static int ext2_mknod (struct inode * dir, struct dentry *dentry, umode_t mode, struct inode * inode; int err; - if (!new_valid_dev(rdev)) - return -EINVAL; - err = dquot_initialize(dir); if (err) return err; diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 900e19cf9ef6..748d35afc902 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -192,6 +192,9 @@ static void init_once(void *foo) init_rwsem(&ei->xattr_sem); #endif mutex_init(&ei->truncate_mutex); +#ifdef CONFIG_FS_DAX + init_rwsem(&ei->dax_sem); +#endif inode_init_once(&ei->vfs_inode); } @@ -566,6 +569,8 @@ static int parse_options(char *options, struct super_block *sb) /* Fall through */ case Opt_dax: #ifdef CONFIG_FS_DAX + ext2_msg(sb, KERN_WARNING, + "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); set_opt(sbi->s_mount_opt, DAX); #else ext2_msg(sb, KERN_INFO, "dax option not supported"); diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index 0b6bfd3a398b..fa70848afa8f 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c @@ -293,10 +293,9 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_list", ext2_xattr_handler(entry->e_name_index); if (handler) { - size_t size = handler->list(dentry, buffer, rest, - entry->e_name, - entry->e_name_len, - handler->flags); + size_t size = handler->list(handler, dentry, buffer, + rest, entry->e_name, + entry->e_name_len); if (buffer) { if (size > rest) { error = -ERANGE; diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c index 702fc6840246..dfb08750370d 100644 --- a/fs/ext2/xattr_security.c +++ b/fs/ext2/xattr_security.c @@ -8,8 +8,9 @@ #include "xattr.h" static size_t -ext2_xattr_security_list(struct dentry *dentry, char *list, size_t list_size, - const char *name, size_t name_len, int type) +ext2_xattr_security_list(const struct xattr_handler *handler, + struct dentry *dentry, char *list, size_t list_size, + const char *name, size_t name_len) { const int prefix_len = XATTR_SECURITY_PREFIX_LEN; const size_t total_len = prefix_len + name_len + 1; @@ -23,8 +24,9 @@ ext2_xattr_security_list(struct dentry *dentry, char *list, size_t list_size, } static int -ext2_xattr_security_get(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) +ext2_xattr_security_get(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + void *buffer, size_t size) { if (strcmp(name, "") == 0) return -EINVAL; @@ -33,8 +35,9 @@ ext2_xattr_security_get(struct dentry *dentry, const char *name, } static int -ext2_xattr_security_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, int type) +ext2_xattr_security_set(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) { if (strcmp(name, "") == 0) return -EINVAL; diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c index 42b6e9874bcc..3150dd3a7859 100644 --- a/fs/ext2/xattr_trusted.c +++ b/fs/ext2/xattr_trusted.c @@ -9,8 +9,9 @@ #include "xattr.h" static size_t -ext2_xattr_trusted_list(struct dentry *dentry, char *list, size_t list_size, - const char *name, size_t name_len, int type) +ext2_xattr_trusted_list(const struct xattr_handler *handler, + struct dentry *dentry, char *list, size_t list_size, + const char *name, size_t name_len) { const int prefix_len = XATTR_TRUSTED_PREFIX_LEN; const size_t total_len = prefix_len + name_len + 1; @@ -27,8 +28,9 @@ ext2_xattr_trusted_list(struct dentry *dentry, char *list, size_t list_size, } static int -ext2_xattr_trusted_get(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) +ext2_xattr_trusted_get(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + void *buffer, size_t size) { if (strcmp(name, "") == 0) return -EINVAL; @@ -37,8 +39,9 @@ ext2_xattr_trusted_get(struct dentry *dentry, const char *name, } static int -ext2_xattr_trusted_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, int type) +ext2_xattr_trusted_set(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) { if (strcmp(name, "") == 0) return -EINVAL; diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c index ecdc4605192c..339a49bbb8ef 100644 --- a/fs/ext2/xattr_user.c +++ b/fs/ext2/xattr_user.c @@ -11,8 +11,9 @@ #include "xattr.h" static size_t -ext2_xattr_user_list(struct dentry *dentry, char *list, size_t list_size, - const char *name, size_t name_len, int type) +ext2_xattr_user_list(const struct xattr_handler *handler, + struct dentry *dentry, char *list, size_t list_size, + const char *name, size_t name_len) { const size_t prefix_len = XATTR_USER_PREFIX_LEN; const size_t total_len = prefix_len + name_len + 1; @@ -29,8 +30,9 @@ ext2_xattr_user_list(struct dentry *dentry, char *list, size_t list_size, } static int -ext2_xattr_user_get(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) +ext2_xattr_user_get(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + void *buffer, size_t size) { if (strcmp(name, "") == 0) return -EINVAL; @@ -41,8 +43,9 @@ ext2_xattr_user_get(struct dentry *dentry, const char *name, } static int -ext2_xattr_user_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, int type) +ext2_xattr_user_set(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) { if (strcmp(name, "") == 0) return -EINVAL; diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 3a6197a2e270..551353b1b17a 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -900,7 +900,7 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block, bh = read_extent_tree_block(inode, path[ppos].p_block, --i, flags); - if (unlikely(IS_ERR(bh))) { + if (IS_ERR(bh)) { ret = PTR_ERR(bh); goto err; } @@ -5796,7 +5796,7 @@ ext4_swap_extents(handle_t *handle, struct inode *inode1, int split = 0; path1 = ext4_find_extent(inode1, lblk1, NULL, EXT4_EX_NOCACHE); - if (unlikely(IS_ERR(path1))) { + if (IS_ERR(path1)) { *erp = PTR_ERR(path1); path1 = NULL; finish: @@ -5804,7 +5804,7 @@ ext4_swap_extents(handle_t *handle, struct inode *inode1, goto repeat; } path2 = ext4_find_extent(inode2, lblk2, NULL, EXT4_EX_NOCACHE); - if (unlikely(IS_ERR(path2))) { + if (IS_ERR(path2)) { *erp = PTR_ERR(path2); path2 = NULL; goto finish; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index e8d620a484f6..ea433a7f4bca 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3386,7 +3386,7 @@ static int __ext4_block_zero_page_range(handle_t *handle, int err = 0; page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT, - mapping_gfp_mask(mapping) & ~__GFP_FS); + mapping_gfp_constraint(mapping, ~__GFP_FS)); if (!page) return -ENOMEM; @@ -5283,7 +5283,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) !ext4_should_journal_data(inode) && !ext4_nonda_switch(inode->i_sb)) { do { - ret = __block_page_mkwrite(vma, vmf, + ret = block_page_mkwrite(vma, vmf, ext4_da_get_block_prep); } while (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)); @@ -5330,7 +5330,7 @@ retry_alloc: ret = VM_FAULT_SIGBUS; goto out; } - ret = __block_page_mkwrite(vma, vmf, get_block); + ret = block_page_mkwrite(vma, vmf, get_block); if (!ret && ext4_should_journal_data(inode)) { if (ext4_walk_page_buffers(handle, page_buffers(page), 0, PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) { diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index b4b3c1f91814..61eaf74dca37 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3333,8 +3333,8 @@ ext4_mb_check_group_pa(ext4_fsblk_t goal_block, atomic_inc(&pa->pa_count); return pa; } - cur_distance = abs64(goal_block - cpa->pa_pstart); - new_distance = abs64(goal_block - pa->pa_pstart); + cur_distance = abs(goal_block - cpa->pa_pstart); + new_distance = abs(goal_block - pa->pa_pstart); if (cur_distance <= new_distance) return cpa; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 19ce34525a59..a969ab39f302 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1429,7 +1429,7 @@ restart: } num++; bh = ext4_getblk(NULL, dir, b++, 0); - if (unlikely(IS_ERR(bh))) { + if (IS_ERR(bh)) { if (ra_max == 0) { ret = bh; goto cleanup_and_exit; diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index d94af71a4e7f..5dc5e95063de 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -166,7 +166,7 @@ int ext4_mpage_readpages(struct address_space *mapping, page = list_entry(pages->prev, struct page, lru); list_del(&page->lru); if (add_to_page_cache_lru(page, mapping, page->index, - GFP_KERNEL & mapping_gfp_mask(mapping))) + mapping_gfp_constraint(mapping, GFP_KERNEL))) goto next_page; } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 04d0f1b33409..c9ab67da6e5a 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1061,7 +1061,7 @@ static int bdev_try_to_free_page(struct super_block *sb, struct page *page, return 0; if (journal) return jbd2_journal_try_to_free_buffers(journal, page, - wait & ~__GFP_WAIT); + wait & ~__GFP_DIRECT_RECLAIM); return try_to_free_buffers(page); } @@ -1664,8 +1664,12 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, } sbi->s_jquota_fmt = m->mount_opt; #endif -#ifndef CONFIG_FS_DAX } else if (token == Opt_dax) { +#ifdef CONFIG_FS_DAX + ext4_msg(sb, KERN_WARNING, + "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); + sbi->s_mount_opt |= m->mount_opt; +#else ext4_msg(sb, KERN_INFO, "dax option not supported"); return -1; #endif diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 984448c6f5f0..6b6b3e751f8c 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -405,10 +405,9 @@ ext4_xattr_list_entries(struct dentry *dentry, struct ext4_xattr_entry *entry, ext4_xattr_handler(entry->e_name_index); if (handler) { - size_t size = handler->list(dentry, buffer, rest, - entry->e_name, - entry->e_name_len, - handler->flags); + size_t size = handler->list(handler, dentry, buffer, + rest, entry->e_name, + entry->e_name_len); if (buffer) { if (size > rest) return -ERANGE; diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c index 95d90e0560f0..36f4c1a84c21 100644 --- a/fs/ext4/xattr_security.c +++ b/fs/ext4/xattr_security.c @@ -12,8 +12,9 @@ #include "xattr.h" static size_t -ext4_xattr_security_list(struct dentry *dentry, char *list, size_t list_size, - const char *name, size_t name_len, int type) +ext4_xattr_security_list(const struct xattr_handler *handler, + struct dentry *dentry, char *list, size_t list_size, + const char *name, size_t name_len) { const size_t prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1; const size_t total_len = prefix_len + name_len + 1; @@ -28,8 +29,9 @@ ext4_xattr_security_list(struct dentry *dentry, char *list, size_t list_size, } static int -ext4_xattr_security_get(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) +ext4_xattr_security_get(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + void *buffer, size_t size) { if (strcmp(name, "") == 0) return -EINVAL; @@ -38,8 +40,9 @@ ext4_xattr_security_get(struct dentry *dentry, const char *name, } static int -ext4_xattr_security_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, int type) +ext4_xattr_security_set(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) { if (strcmp(name, "") == 0) return -EINVAL; diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c index 891ee2ddfbd6..488089053342 100644 --- a/fs/ext4/xattr_trusted.c +++ b/fs/ext4/xattr_trusted.c @@ -13,8 +13,9 @@ #include "xattr.h" static size_t -ext4_xattr_trusted_list(struct dentry *dentry, char *list, size_t list_size, - const char *name, size_t name_len, int type) +ext4_xattr_trusted_list(const struct xattr_handler *handler, + struct dentry *dentry, char *list, size_t list_size, + const char *name, size_t name_len) { const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN; const size_t total_len = prefix_len + name_len + 1; @@ -31,8 +32,9 @@ ext4_xattr_trusted_list(struct dentry *dentry, char *list, size_t list_size, } static int -ext4_xattr_trusted_get(struct dentry *dentry, const char *name, void *buffer, - size_t size, int type) +ext4_xattr_trusted_get(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, void *buffer, + size_t size) { if (strcmp(name, "") == 0) return -EINVAL; @@ -41,8 +43,9 @@ ext4_xattr_trusted_get(struct dentry *dentry, const char *name, void *buffer, } static int -ext4_xattr_trusted_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, int type) +ext4_xattr_trusted_set(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) { if (strcmp(name, "") == 0) return -EINVAL; diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c index 6ed932b3c043..d2dec3364062 100644 --- a/fs/ext4/xattr_user.c +++ b/fs/ext4/xattr_user.c @@ -12,8 +12,9 @@ #include "xattr.h" static size_t -ext4_xattr_user_list(struct dentry *dentry, char *list, size_t list_size, - const char *name, size_t name_len, int type) +ext4_xattr_user_list(const struct xattr_handler *handler, + struct dentry *dentry, char *list, size_t list_size, + const char *name, size_t name_len) { const size_t prefix_len = XATTR_USER_PREFIX_LEN; const size_t total_len = prefix_len + name_len + 1; @@ -30,8 +31,9 @@ ext4_xattr_user_list(struct dentry *dentry, char *list, size_t list_size, } static int -ext4_xattr_user_get(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) +ext4_xattr_user_get(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + void *buffer, size_t size) { if (strcmp(name, "") == 0) return -EINVAL; @@ -42,8 +44,9 @@ ext4_xattr_user_get(struct dentry *dentry, const char *name, } static int -ext4_xattr_user_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, int type) +ext4_xattr_user_set(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) { if (strcmp(name, "") == 0) return -EINVAL; diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index e48b80c49090..2c32110f9fc0 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -481,9 +481,6 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, struct inode *inode; int err = 0; - if (!new_valid_dev(rdev)) - return -EINVAL; - f2fs_balance_fs(sbi); inode = f2fs_new_inode(dir, mode); diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 4de2286c0e4d..862368a32e53 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -25,49 +25,45 @@ #include "f2fs.h" #include "xattr.h" -static size_t f2fs_xattr_generic_list(struct dentry *dentry, char *list, - size_t list_size, const char *name, size_t len, int type) +static size_t f2fs_xattr_generic_list(const struct xattr_handler *handler, + struct dentry *dentry, char *list, size_t list_size, + const char *name, size_t len) { struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); - int total_len, prefix_len = 0; - const char *prefix = NULL; + int total_len, prefix_len; - switch (type) { + switch (handler->flags) { case F2FS_XATTR_INDEX_USER: if (!test_opt(sbi, XATTR_USER)) return -EOPNOTSUPP; - prefix = XATTR_USER_PREFIX; - prefix_len = XATTR_USER_PREFIX_LEN; break; case F2FS_XATTR_INDEX_TRUSTED: if (!capable(CAP_SYS_ADMIN)) return -EPERM; - prefix = XATTR_TRUSTED_PREFIX; - prefix_len = XATTR_TRUSTED_PREFIX_LEN; break; case F2FS_XATTR_INDEX_SECURITY: - prefix = XATTR_SECURITY_PREFIX; - prefix_len = XATTR_SECURITY_PREFIX_LEN; break; default: return -EINVAL; } + prefix_len = strlen(handler->prefix); total_len = prefix_len + len + 1; if (list && total_len <= list_size) { - memcpy(list, prefix, prefix_len); + memcpy(list, handler->prefix, prefix_len); memcpy(list + prefix_len, name, len); list[prefix_len + len] = '\0'; } return total_len; } -static int f2fs_xattr_generic_get(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) +static int f2fs_xattr_generic_get(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, void *buffer, + size_t size) { struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); - switch (type) { + switch (handler->flags) { case F2FS_XATTR_INDEX_USER: if (!test_opt(sbi, XATTR_USER)) return -EOPNOTSUPP; @@ -83,15 +79,17 @@ static int f2fs_xattr_generic_get(struct dentry *dentry, const char *name, } if (strcmp(name, "") == 0) return -EINVAL; - return f2fs_getxattr(d_inode(dentry), type, name, buffer, size, NULL); + return f2fs_getxattr(d_inode(dentry), handler->flags, name, + buffer, size, NULL); } -static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, int type) +static int f2fs_xattr_generic_set(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, const void *value, + size_t size, int flags) { struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); - switch (type) { + switch (handler->flags) { case F2FS_XATTR_INDEX_USER: if (!test_opt(sbi, XATTR_USER)) return -EOPNOTSUPP; @@ -108,27 +106,26 @@ static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name, if (strcmp(name, "") == 0) return -EINVAL; - return f2fs_setxattr(d_inode(dentry), type, name, + return f2fs_setxattr(d_inode(dentry), handler->flags, name, value, size, NULL, flags); } -static size_t f2fs_xattr_advise_list(struct dentry *dentry, char *list, - size_t list_size, const char *name, size_t len, int type) +static size_t f2fs_xattr_advise_list(const struct xattr_handler *handler, + struct dentry *dentry, char *list, size_t list_size, + const char *name, size_t len) { const char *xname = F2FS_SYSTEM_ADVISE_PREFIX; size_t size; - if (type != F2FS_XATTR_INDEX_ADVISE) - return 0; - size = strlen(xname) + 1; if (list && size <= list_size) memcpy(list, xname, size); return size; } -static int f2fs_xattr_advise_get(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) +static int f2fs_xattr_advise_get(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, void *buffer, + size_t size) { struct inode *inode = d_inode(dentry); @@ -140,8 +137,9 @@ static int f2fs_xattr_advise_get(struct dentry *dentry, const char *name, return sizeof(char); } -static int f2fs_xattr_advise_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, int type) +static int f2fs_xattr_advise_set(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, const void *value, + size_t size, int flags) { struct inode *inode = d_inode(dentry); @@ -462,8 +460,8 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) if (!handler) continue; - size = handler->list(dentry, buffer, rest, entry->e_name, - entry->e_name_len, handler->flags); + size = handler->list(handler, dentry, buffer, rest, + entry->e_name, entry->e_name_len); if (buffer && size > rest) { error = -ERANGE; goto cleanup; diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 4afc4d9d2e41..8b2127ffb226 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -610,9 +610,9 @@ parse_record: int status = fat_parse_long(inode, &cpos, &bh, &de, &unicode, &nr_slots); if (status < 0) { - ctx->pos = cpos; + bh = NULL; ret = status; - goto out; + goto end_of_dir; } else if (status == PARSE_INVALID) goto record_end; else if (status == PARSE_NOT_LONGNAME) @@ -654,8 +654,9 @@ parse_record: fill_len = short_len; start_filldir: - if (!fake_offset) - ctx->pos = cpos - (nr_slots + 1) * sizeof(struct msdos_dir_entry); + ctx->pos = cpos - (nr_slots + 1) * sizeof(struct msdos_dir_entry); + if (fake_offset && ctx->pos < 2) + ctx->pos = 2; if (!memcmp(de->name, MSDOS_DOT, MSDOS_NAME)) { if (!dir_emit_dot(file, ctx)) @@ -681,14 +682,19 @@ record_end: fake_offset = 0; ctx->pos = cpos; goto get_new; + end_of_dir: - ctx->pos = cpos; + if (fake_offset && cpos < 2) + ctx->pos = 2; + else + ctx->pos = cpos; fill_failed: brelse(bh); if (unicode) __putname(unicode); out: mutex_unlock(&sbi->s_lock); + return ret; } diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 206a68b1db1a..023f6a1f23cd 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1981,9 +1981,9 @@ static noinline void block_dump___mark_inode_dirty(struct inode *inode) * page->mapping->host, so the page-dirtying time is recorded in the internal * blockdev inode. */ -#define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC) void __mark_inode_dirty(struct inode *inode, int flags) { +#define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC) struct super_block *sb = inode->i_sb; int dirtytime; @@ -2093,6 +2093,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) out_unlock_inode: spin_unlock(&inode->i_lock); +#undef I_DIRTY_INODE } EXPORT_SYMBOL(__mark_inode_dirty); diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index d403c69bee08..4304072161aa 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -111,7 +111,7 @@ struct fscache_cookie *__fscache_acquire_cookie( /* radix tree insertion won't use the preallocation pool unless it's * told it may not wait */ - INIT_RADIX_TREE(&cookie->stores, GFP_NOFS & ~__GFP_WAIT); + INIT_RADIX_TREE(&cookie->stores, GFP_NOFS & ~__GFP_DIRECT_RECLAIM); switch (cookie->def->type) { case FSCACHE_COOKIE_TYPE_INDEX: diff --git a/fs/fscache/netfs.c b/fs/fscache/netfs.c index 6d941f56faf4..9b28649df3a1 100644 --- a/fs/fscache/netfs.c +++ b/fs/fscache/netfs.c @@ -22,6 +22,7 @@ static LIST_HEAD(fscache_netfs_list); int __fscache_register_netfs(struct fscache_netfs *netfs) { struct fscache_netfs *ptr; + struct fscache_cookie *cookie; int ret; _enter("{%s}", netfs->name); @@ -29,29 +30,25 @@ int __fscache_register_netfs(struct fscache_netfs *netfs) INIT_LIST_HEAD(&netfs->link); /* allocate a cookie for the primary index */ - netfs->primary_index = - kmem_cache_zalloc(fscache_cookie_jar, GFP_KERNEL); + cookie = kmem_cache_zalloc(fscache_cookie_jar, GFP_KERNEL); - if (!netfs->primary_index) { + if (!cookie) { _leave(" = -ENOMEM"); return -ENOMEM; } /* initialise the primary index cookie */ - atomic_set(&netfs->primary_index->usage, 1); - atomic_set(&netfs->primary_index->n_children, 0); - atomic_set(&netfs->primary_index->n_active, 1); + atomic_set(&cookie->usage, 1); + atomic_set(&cookie->n_children, 0); + atomic_set(&cookie->n_active, 1); - netfs->primary_index->def = &fscache_fsdef_netfs_def; - netfs->primary_index->parent = &fscache_fsdef_index; - netfs->primary_index->netfs_data = netfs; - netfs->primary_index->flags = 1 << FSCACHE_COOKIE_ENABLED; + cookie->def = &fscache_fsdef_netfs_def; + cookie->parent = &fscache_fsdef_index; + cookie->netfs_data = netfs; + cookie->flags = 1 << FSCACHE_COOKIE_ENABLED; - atomic_inc(&netfs->primary_index->parent->usage); - atomic_inc(&netfs->primary_index->parent->n_children); - - spin_lock_init(&netfs->primary_index->lock); - INIT_HLIST_HEAD(&netfs->primary_index->backing_objects); + spin_lock_init(&cookie->lock); + INIT_HLIST_HEAD(&cookie->backing_objects); /* check the netfs type is not already present */ down_write(&fscache_addremove_sem); @@ -62,6 +59,10 @@ int __fscache_register_netfs(struct fscache_netfs *netfs) goto already_registered; } + atomic_inc(&cookie->parent->usage); + atomic_inc(&cookie->parent->n_children); + + netfs->primary_index = cookie; list_add(&netfs->link, &fscache_netfs_list); ret = 0; @@ -70,11 +71,8 @@ int __fscache_register_netfs(struct fscache_netfs *netfs) already_registered: up_write(&fscache_addremove_sem); - if (ret < 0) { - netfs->primary_index->parent = NULL; - __fscache_cookie_put(netfs->primary_index); - netfs->primary_index = NULL; - } + if (ret < 0) + kmem_cache_free(fscache_cookie_jar, cookie); _leave(" = %d", ret); return ret; diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 483bbc613bf0..6b35fc4860a0 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -58,7 +58,7 @@ bool release_page_wait_timeout(struct fscache_cookie *cookie, struct page *page) /* * decide whether a page can be released, possibly by cancelling a store to it - * - we're allowed to sleep if __GFP_WAIT is flagged + * - we're allowed to sleep if __GFP_DIRECT_RECLAIM is flagged */ bool __fscache_maybe_release_page(struct fscache_cookie *cookie, struct page *page, @@ -122,7 +122,7 @@ page_busy: * allocator as the work threads writing to the cache may all end up * sleeping on memory allocation, so we may need to impose a timeout * too. */ - if (!(gfp & __GFP_WAIT) || !(gfp & __GFP_FS)) { + if (!(gfp & __GFP_DIRECT_RECLAIM) || !(gfp & __GFP_FS)) { fscache_stat(&fscache_n_store_vmscan_busy); return false; } @@ -132,7 +132,7 @@ page_busy: _debug("fscache writeout timeout page: %p{%lx}", page, page->index); - gfp &= ~__GFP_WAIT; + gfp &= ~__GFP_DIRECT_RECLAIM; goto try_again; } EXPORT_SYMBOL(__fscache_maybe_release_page); @@ -816,7 +816,7 @@ static void fscache_write_op(struct fscache_operation *_op) goto superseded; page = results[0]; _debug("gang %d [%lx]", n, page->index); - if (page->index > op->store_limit) { + if (page->index >= op->store_limit) { fscache_stat(&fscache_n_store_pages_over_limit); goto superseded; } diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 487527b42d94..ad8a5b757cc7 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -388,8 +388,13 @@ static __be64 *gfs2_dir_get_hash_table(struct gfs2_inode *ip) */ void gfs2_dir_hash_inval(struct gfs2_inode *ip) { - __be64 *hc = ip->i_hash_cache; + __be64 *hc; + + spin_lock(&ip->i_inode.i_lock); + hc = ip->i_hash_cache; ip->i_hash_cache = NULL; + spin_unlock(&ip->i_inode.i_lock); + kvfree(hc); } diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 9287a2d17b8c..5e425469f0c2 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -897,8 +897,8 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t if (!(mode & FALLOC_FL_KEEP_SIZE) && (pos + count) > inode->i_size) { i_size_write(inode, pos + count); - /* Marks the inode as dirty */ file_update_time(file); + mark_inode_dirty(inode); } return generic_write_sync(file, pos, count); diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 9bd1244caf38..32e74710b1aa 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -246,8 +246,8 @@ static inline void do_error(struct gfs2_glock *gl, const int ret) */ static int do_promote(struct gfs2_glock *gl) -__releases(&gl->gl_spin) -__acquires(&gl->gl_spin) +__releases(&gl->gl_lockref.lock) +__acquires(&gl->gl_lockref.lock) { const struct gfs2_glock_operations *glops = gl->gl_ops; struct gfs2_holder *gh, *tmp; @@ -260,10 +260,10 @@ restart: if (may_grant(gl, gh)) { if (gh->gh_list.prev == &gl->gl_holders && glops->go_lock) { - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); /* FIXME: eliminate this eventually */ ret = glops->go_lock(gh); - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); if (ret) { if (ret == 1) return 2; @@ -361,7 +361,7 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret) unsigned state = ret & LM_OUT_ST_MASK; int rv; - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); trace_gfs2_glock_state_change(gl, state); state_change(gl, state); gh = find_first_waiter(gl); @@ -405,7 +405,7 @@ retry: pr_err("wanted %u got %u\n", gl->gl_target, state); GLOCK_BUG_ON(gl, 1); } - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); return; } @@ -414,9 +414,9 @@ retry: gfs2_demote_wake(gl); if (state != LM_ST_UNLOCKED) { if (glops->go_xmote_bh) { - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); rv = glops->go_xmote_bh(gl, gh); - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); if (rv) { do_error(gl, rv); goto out; @@ -429,7 +429,7 @@ retry: out: clear_bit(GLF_LOCK, &gl->gl_flags); out_locked: - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); } /** @@ -441,8 +441,8 @@ out_locked: */ static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target) -__releases(&gl->gl_spin) -__acquires(&gl->gl_spin) +__releases(&gl->gl_lockref.lock) +__acquires(&gl->gl_lockref.lock) { const struct gfs2_glock_operations *glops = gl->gl_ops; struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; @@ -464,7 +464,7 @@ __acquires(&gl->gl_spin) (gl->gl_state == LM_ST_EXCLUSIVE) || (lck_flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB))) clear_bit(GLF_BLOCKING, &gl->gl_flags); - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); if (glops->go_sync) glops->go_sync(gl); if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) @@ -485,7 +485,7 @@ __acquires(&gl->gl_spin) gfs2_glock_put(gl); } - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); } /** @@ -513,8 +513,8 @@ static inline struct gfs2_holder *find_first_holder(const struct gfs2_glock *gl) */ static void run_queue(struct gfs2_glock *gl, const int nonblock) -__releases(&gl->gl_spin) -__acquires(&gl->gl_spin) +__releases(&gl->gl_lockref.lock) +__acquires(&gl->gl_lockref.lock) { struct gfs2_holder *gh = NULL; int ret; @@ -596,7 +596,7 @@ static void glock_work_func(struct work_struct *work) finish_xmote(gl, gl->gl_reply); drop_ref = 1; } - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && gl->gl_state != LM_ST_UNLOCKED && gl->gl_demote_state != LM_ST_EXCLUSIVE) { @@ -612,7 +612,7 @@ static void glock_work_func(struct work_struct *work) } } run_queue(gl, 0); - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); if (!delay) gfs2_glock_put(gl); else { @@ -876,8 +876,8 @@ void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...) */ static inline void add_to_queue(struct gfs2_holder *gh) -__releases(&gl->gl_spin) -__acquires(&gl->gl_spin) +__releases(&gl->gl_lockref.lock) +__acquires(&gl->gl_lockref.lock) { struct gfs2_glock *gl = gh->gh_gl; struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; @@ -926,10 +926,10 @@ fail: do_cancel: gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list); if (!(gh->gh_flags & LM_FLAG_PRIORITY)) { - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); if (sdp->sd_lockstruct.ls_ops->lm_cancel) sdp->sd_lockstruct.ls_ops->lm_cancel(gl); - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); } return; @@ -967,7 +967,7 @@ int gfs2_glock_nq(struct gfs2_holder *gh) if (test_bit(GLF_LRU, &gl->gl_flags)) gfs2_glock_remove_from_lru(gl); - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); add_to_queue(gh); if (unlikely((LM_FLAG_NOEXP & gh->gh_flags) && test_and_clear_bit(GLF_FROZEN, &gl->gl_flags))) { @@ -977,7 +977,7 @@ int gfs2_glock_nq(struct gfs2_holder *gh) gl->gl_lockref.count--; } run_queue(gl, 1); - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); if (!(gh->gh_flags & GL_ASYNC)) error = gfs2_glock_wait(gh); @@ -1010,7 +1010,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh) unsigned delay = 0; int fast_path = 0; - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); if (gh->gh_flags & GL_NOCACHE) handle_callback(gl, LM_ST_UNLOCKED, 0, false); @@ -1018,9 +1018,9 @@ void gfs2_glock_dq(struct gfs2_holder *gh) if (find_first_holder(gl) == NULL) { if (glops->go_unlock) { GLOCK_BUG_ON(gl, test_and_set_bit(GLF_LOCK, &gl->gl_flags)); - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); glops->go_unlock(gh); - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); clear_bit(GLF_LOCK, &gl->gl_flags); } if (list_empty(&gl->gl_holders) && @@ -1033,7 +1033,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh) gfs2_glock_add_to_lru(gl); trace_gfs2_glock_queue(gh, 0); - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); if (likely(fast_path)) return; @@ -1217,9 +1217,9 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state) delay = gl->gl_hold_time; } - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); handle_callback(gl, state, delay, true); - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) gfs2_glock_put(gl); } @@ -1259,7 +1259,7 @@ static int gfs2_should_freeze(const struct gfs2_glock *gl) * @gl: Pointer to the glock * @ret: The return value from the dlm * - * The gl_reply field is under the gl_spin lock so that it is ok + * The gl_reply field is under the gl_lockref.lock lock so that it is ok * to use a bitfield shared with other glock state fields. */ @@ -1267,20 +1267,20 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret) { struct lm_lockstruct *ls = &gl->gl_name.ln_sbd->sd_lockstruct; - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); gl->gl_reply = ret; if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags))) { if (gfs2_should_freeze(gl)) { set_bit(GLF_FROZEN, &gl->gl_flags); - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); return; } } gl->gl_lockref.count++; set_bit(GLF_REPLY_PENDING, &gl->gl_flags); - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) gfs2_glock_put(gl); @@ -1326,14 +1326,14 @@ __acquires(&lru_lock) while(!list_empty(list)) { gl = list_entry(list->next, struct gfs2_glock, gl_lru); list_del_init(&gl->gl_lru); - if (!spin_trylock(&gl->gl_spin)) { + if (!spin_trylock(&gl->gl_lockref.lock)) { add_back_to_lru: list_add(&gl->gl_lru, &lru_list); atomic_inc(&lru_count); continue; } if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); goto add_back_to_lru; } clear_bit(GLF_LRU, &gl->gl_flags); @@ -1343,7 +1343,7 @@ add_back_to_lru: WARN_ON(!test_and_clear_bit(GLF_LOCK, &gl->gl_flags)); if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) gl->gl_lockref.count--; - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); cond_resched_lock(&lru_lock); } } @@ -1461,10 +1461,10 @@ static void clear_glock(struct gfs2_glock *gl) { gfs2_glock_remove_from_lru(gl); - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); if (gl->gl_state != LM_ST_UNLOCKED) handle_callback(gl, LM_ST_UNLOCKED, 0, false); - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) gfs2_glock_put(gl); } @@ -1482,9 +1482,9 @@ void gfs2_glock_thaw(struct gfs2_sbd *sdp) static void dump_glock(struct seq_file *seq, struct gfs2_glock *gl) { - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); gfs2_dump_glock(seq, gl); - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); } static void dump_glock_func(struct gfs2_glock *gl) @@ -1518,10 +1518,10 @@ void gfs2_glock_finish_truncate(struct gfs2_inode *ip) ret = gfs2_truncatei_resume(ip); gfs2_assert_withdraw(gl->gl_name.ln_sbd, ret == 0); - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); clear_bit(GLF_LOCK, &gl->gl_flags); run_queue(gl, 1); - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); } static const char *state2str(unsigned state) diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 32572f71f027..f7cdaa8b4c83 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -141,7 +141,7 @@ static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock * struct pid *pid; /* Look in glock's list of holders for one with current task as owner */ - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); pid = task_pid(current); list_for_each_entry(gh, &gl->gl_holders, gh_list) { if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) @@ -151,7 +151,7 @@ static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock * } gh = NULL; out: - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); return gh; } diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 1f6c9c3fe5cb..f348cfb6b69a 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -146,11 +146,11 @@ static void rgrp_go_sync(struct gfs2_glock *gl) struct gfs2_rgrpd *rgd; int error; - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); rgd = gl->gl_object; if (rgd) gfs2_rgrp_brelse(rgd); - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) return; @@ -162,11 +162,11 @@ static void rgrp_go_sync(struct gfs2_glock *gl) mapping_set_error(mapping, error); gfs2_ail_empty_gl(gl); - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); rgd = gl->gl_object; if (rgd) gfs2_free_clones(rgd); - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); } /** @@ -542,7 +542,7 @@ static int freeze_go_demote_ok(const struct gfs2_glock *gl) * iopen_go_callback - schedule the dcache entry for the inode to be deleted * @gl: the glock * - * gl_spin lock is held while calling this + * gl_lockref.lock lock is held while calling this */ static void iopen_go_callback(struct gfs2_glock *gl, bool remote) { diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 121ed08d9d9f..de7b4f97ac75 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -334,9 +334,8 @@ struct gfs2_glock { struct lm_lockname gl_name; struct lockref gl_lockref; -#define gl_spin gl_lockref.lock - /* State fields protected by gl_spin */ + /* State fields protected by gl_lockref.lock */ unsigned int gl_state:2, /* Current state */ gl_target:2, /* Target state */ gl_demote_state:2, /* State requested by remote node */ diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 284c1542783e..8b907c5cc913 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -50,7 +50,7 @@ static inline void gfs2_update_stats(struct gfs2_lkstats *s, unsigned index, s64 delta = sample - s->stats[index]; s->stats[index] += (delta >> 3); index++; - s->stats[index] += ((abs64(delta) - s->stats[index]) >> 2); + s->stats[index] += ((abs(delta) - s->stats[index]) >> 2); } /** diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 241a399bf83d..fb2b42cf46b5 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -50,7 +50,7 @@ static void gfs2_init_glock_once(void *foo) struct gfs2_glock *gl = foo; INIT_HLIST_BL_NODE(&gl->gl_list); - spin_lock_init(&gl->gl_spin); + spin_lock_init(&gl->gl_lockref.lock); INIT_LIST_HEAD(&gl->gl_holders); INIT_LIST_HEAD(&gl->gl_lru); INIT_LIST_HEAD(&gl->gl_ail_list); diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 02586e7eb964..baab99b69d8a 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1291,6 +1291,9 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags, up_write(&s->s_umount); blkdev_put(bdev, mode); down_write(&s->s_umount); + } else { + /* s_mode must be set before deactivate_locked_super calls */ + s->s_mode = mode; } memset(&args, 0, sizeof(args)); @@ -1314,7 +1317,6 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags, } else { char b[BDEVNAME_SIZE]; - s->s_mode = mode; strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); sb_set_blocksize(s, block_size(bdev)); error = fill_super(s, &args, flags & MS_SILENT ? 1 : 0); diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 475985d14758..c134c0462cee 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -729,9 +729,9 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) rb_erase(n, &sdp->sd_rindex_tree); if (gl) { - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); gl->gl_object = NULL; - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); gfs2_glock_add_to_lru(gl); gfs2_glock_put(gl); } @@ -933,8 +933,9 @@ static int read_rindex_entry(struct gfs2_inode *ip) goto fail; rgd->rd_gl->gl_object = rgd; - rgd->rd_gl->gl_vm.start = rgd->rd_addr * bsize; - rgd->rd_gl->gl_vm.end = rgd->rd_gl->gl_vm.start + (rgd->rd_length * bsize) - 1; + rgd->rd_gl->gl_vm.start = (rgd->rd_addr * bsize) & PAGE_CACHE_MASK; + rgd->rd_gl->gl_vm.end = PAGE_CACHE_ALIGN((rgd->rd_addr + + rgd->rd_length) * bsize) - 1; rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr; rgd->rd_flags &= ~(GFS2_RDF_UPTODATE | GFS2_RDF_PREFERRED); if (rgd->rd_data > sdp->sd_max_rg_data) diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index b95d0d625f32..0c1bde395062 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -176,6 +176,8 @@ void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh) unlock_buffer(bh); if (bh->b_private == NULL) bd = gfs2_alloc_bufdata(gl, bh, &gfs2_databuf_lops); + else + bd = bh->b_private; lock_buffer(bh); gfs2_log_lock(sdp); } @@ -236,6 +238,8 @@ void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh) lock_page(bh->b_page); if (bh->b_private == NULL) bd = gfs2_alloc_bufdata(gl, bh, &gfs2_buf_lops); + else + bd = bh->b_private; unlock_page(bh->b_page); lock_buffer(bh); gfs2_log_lock(sdp); diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index 4c096fa9e2a1..53ce76a374fe 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -583,11 +583,13 @@ out: * * Returns: actual size of data on success, -errno on error */ -static int gfs2_xattr_get(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) +static int gfs2_xattr_get(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + void *buffer, size_t size) { struct gfs2_inode *ip = GFS2_I(d_inode(dentry)); struct gfs2_ea_location el; + int type = handler->flags; int error; if (!ip->i_eattr) @@ -1227,11 +1229,12 @@ int __gfs2_xattr_set(struct inode *inode, const char *name, return error; } -static int gfs2_xattr_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, int type) +static int gfs2_xattr_set(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) { return __gfs2_xattr_set(d_inode(dentry), name, value, - size, flags, type); + size, flags, handler->flags); } diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c index 416b1dbafe51..e41a010cd89c 100644 --- a/fs/hfsplus/xattr.c +++ b/fs/hfsplus/xattr.c @@ -849,8 +849,9 @@ end_removexattr: return err; } -static int hfsplus_osx_getxattr(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) +static int hfsplus_osx_getxattr(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + void *buffer, size_t size) { if (!strcmp(name, "")) return -EINVAL; @@ -871,8 +872,9 @@ static int hfsplus_osx_getxattr(struct dentry *dentry, const char *name, return __hfsplus_getxattr(d_inode(dentry), name, buffer, size); } -static int hfsplus_osx_setxattr(struct dentry *dentry, const char *name, - const void *buffer, size_t size, int flags, int type) +static int hfsplus_osx_setxattr(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + const void *buffer, size_t size, int flags) { if (!strcmp(name, "")) return -EINVAL; @@ -893,19 +895,8 @@ static int hfsplus_osx_setxattr(struct dentry *dentry, const char *name, return __hfsplus_setxattr(d_inode(dentry), name, buffer, size, flags); } -static size_t hfsplus_osx_listxattr(struct dentry *dentry, char *list, - size_t list_size, const char *name, size_t name_len, int type) -{ - /* - * This method is not used. - * It is used hfsplus_listxattr() instead of generic_listxattr(). - */ - return -EOPNOTSUPP; -} - const struct xattr_handler hfsplus_xattr_osx_handler = { .prefix = XATTR_MAC_OSX_PREFIX, - .list = hfsplus_osx_listxattr, .get = hfsplus_osx_getxattr, .set = hfsplus_osx_setxattr, }; diff --git a/fs/hfsplus/xattr_security.c b/fs/hfsplus/xattr_security.c index aacff00a9ff9..72a68a3a0c99 100644 --- a/fs/hfsplus/xattr_security.c +++ b/fs/hfsplus/xattr_security.c @@ -13,32 +13,24 @@ #include "xattr.h" #include "acl.h" -static int hfsplus_security_getxattr(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) +static int hfsplus_security_getxattr(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + void *buffer, size_t size) { return hfsplus_getxattr(dentry, name, buffer, size, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN); } -static int hfsplus_security_setxattr(struct dentry *dentry, const char *name, - const void *buffer, size_t size, int flags, int type) +static int hfsplus_security_setxattr(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + const void *buffer, size_t size, int flags) { return hfsplus_setxattr(dentry, name, buffer, size, flags, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN); } -static size_t hfsplus_security_listxattr(struct dentry *dentry, char *list, - size_t list_size, const char *name, size_t name_len, int type) -{ - /* - * This method is not used. - * It is used hfsplus_listxattr() instead of generic_listxattr(). - */ - return -EOPNOTSUPP; -} - static int hfsplus_initxattrs(struct inode *inode, const struct xattr *xattr_array, void *fs_info) @@ -92,7 +84,6 @@ int hfsplus_init_inode_security(struct inode *inode, const struct xattr_handler hfsplus_xattr_security_handler = { .prefix = XATTR_SECURITY_PREFIX, - .list = hfsplus_security_listxattr, .get = hfsplus_security_getxattr, .set = hfsplus_security_setxattr, }; diff --git a/fs/hfsplus/xattr_trusted.c b/fs/hfsplus/xattr_trusted.c index bcf65089b7f7..95a7704c7abb 100644 --- a/fs/hfsplus/xattr_trusted.c +++ b/fs/hfsplus/xattr_trusted.c @@ -11,34 +11,25 @@ #include "hfsplus_fs.h" #include "xattr.h" -static int hfsplus_trusted_getxattr(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) +static int hfsplus_trusted_getxattr(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + void *buffer, size_t size) { return hfsplus_getxattr(dentry, name, buffer, size, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN); } -static int hfsplus_trusted_setxattr(struct dentry *dentry, const char *name, - const void *buffer, size_t size, int flags, int type) +static int hfsplus_trusted_setxattr(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + const void *buffer, size_t size, int flags) { return hfsplus_setxattr(dentry, name, buffer, size, flags, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN); } -static size_t hfsplus_trusted_listxattr(struct dentry *dentry, char *list, - size_t list_size, const char *name, size_t name_len, int type) -{ - /* - * This method is not used. - * It is used hfsplus_listxattr() instead of generic_listxattr(). - */ - return -EOPNOTSUPP; -} - const struct xattr_handler hfsplus_xattr_trusted_handler = { .prefix = XATTR_TRUSTED_PREFIX, - .list = hfsplus_trusted_listxattr, .get = hfsplus_trusted_getxattr, .set = hfsplus_trusted_setxattr, }; diff --git a/fs/hfsplus/xattr_user.c b/fs/hfsplus/xattr_user.c index 5aa0e6dc4a1e..6fc269baf959 100644 --- a/fs/hfsplus/xattr_user.c +++ b/fs/hfsplus/xattr_user.c @@ -11,34 +11,25 @@ #include "hfsplus_fs.h" #include "xattr.h" -static int hfsplus_user_getxattr(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) +static int hfsplus_user_getxattr(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + void *buffer, size_t size) { return hfsplus_getxattr(dentry, name, buffer, size, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); } -static int hfsplus_user_setxattr(struct dentry *dentry, const char *name, - const void *buffer, size_t size, int flags, int type) +static int hfsplus_user_setxattr(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + const void *buffer, size_t size, int flags) { return hfsplus_setxattr(dentry, name, buffer, size, flags, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); } -static size_t hfsplus_user_listxattr(struct dentry *dentry, char *list, - size_t list_size, const char *name, size_t name_len, int type) -{ - /* - * This method is not used. - * It is used hfsplus_listxattr() instead of generic_listxattr(). - */ - return -EOPNOTSUPP; -} - const struct xattr_handler hfsplus_xattr_user_handler = { .prefix = XATTR_USER_PREFIX, - .list = hfsplus_user_listxattr, .get = hfsplus_user_getxattr, .set = hfsplus_user_setxattr, }; diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index 9e92c9c2d319..ae4d5a1fa4c9 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -227,8 +227,6 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, de int err; if ((err = hpfs_chk_name(name, &len))) return err==-ENOENT ? -EINVAL : err; if (hpfs_sb(dir->i_sb)->sb_eas < 2) return -EPERM; - if (!new_valid_dev(rdev)) - return -EINVAL; hpfs_lock(dir->i_sb); err = -ENOSPC; fnode = hpfs_alloc_fnode(dir->i_sb, hpfs_i(dir)->i_dno, &fno, &bh); diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 316adb968b65..de4bdfac0cec 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -332,12 +332,17 @@ static void remove_huge_page(struct page *page) * truncation is indicated by end of range being LLONG_MAX * In this case, we first scan the range and release found pages. * After releasing pages, hugetlb_unreserve_pages cleans up region/reserv - * maps and global counts. + * maps and global counts. Page faults can not race with truncation + * in this routine. hugetlb_no_page() prevents page faults in the + * truncated range. It checks i_size before allocation, and again after + * with the page table lock for the page held. The same lock must be + * acquired to unmap a page. * hole punch is indicated if end is not LLONG_MAX * In the hole punch case we scan the range and release found pages. * Only when releasing a page is the associated region/reserv map * deleted. The region/reserv map for ranges without associated - * pages are not modified. + * pages are not modified. Page faults can race with hole punch. + * This is indicated if we find a mapped page. * Note: If the passed end of range value is beyond the end of file, but * not LLONG_MAX this routine still performs a hole punch operation. */ @@ -361,46 +366,37 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, next = start; while (next < end) { /* - * Make sure to never grab more pages that we - * might possibly need. + * Don't grab more pages than the number left in the range. */ if (end - next < lookup_nr) lookup_nr = end - next; /* - * This pagevec_lookup() may return pages past 'end', - * so we must check for page->index > end. + * When no more pages are found, we are done. */ - if (!pagevec_lookup(&pvec, mapping, next, lookup_nr)) { - if (next == start) - break; - next = start; - continue; - } + if (!pagevec_lookup(&pvec, mapping, next, lookup_nr)) + break; for (i = 0; i < pagevec_count(&pvec); ++i) { struct page *page = pvec.pages[i]; u32 hash; + /* + * The page (index) could be beyond end. This is + * only possible in the punch hole case as end is + * max page offset in the truncate case. + */ + next = page->index; + if (next >= end) + break; + hash = hugetlb_fault_mutex_hash(h, current->mm, &pseudo_vma, mapping, next, 0); mutex_lock(&hugetlb_fault_mutex_table[hash]); lock_page(page); - if (page->index >= end) { - unlock_page(page); - mutex_unlock(&hugetlb_fault_mutex_table[hash]); - next = end; /* we are done */ - break; - } - - /* - * If page is mapped, it was faulted in after being - * unmapped. Do nothing in this race case. In the - * normal case page is not mapped. - */ - if (!page_mapped(page)) { + if (likely(!page_mapped(page))) { bool rsv_on_error = !PagePrivate(page); /* * We must free the huge page and remove @@ -421,17 +417,23 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, hugetlb_fix_reserve_counts( inode, rsv_on_error); } + } else { + /* + * If page is mapped, it was faulted in after + * being unmapped. It indicates a race between + * hole punch and page fault. Do nothing in + * this case. Getting here in a truncate + * operation is a bug. + */ + BUG_ON(truncate_op); } - if (page->index > next) - next = page->index; - - ++next; unlock_page(page); - mutex_unlock(&hugetlb_fault_mutex_table[hash]); } + ++next; huge_pagevec_release(&pvec); + cond_resched(); } if (truncate_op) @@ -647,9 +649,6 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset, if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size) i_size_write(inode, offset + len); inode->i_ctime = CURRENT_TIME; - spin_lock(&inode->i_lock); - inode->i_private = NULL; - spin_unlock(&inode->i_lock); out: mutex_unlock(&inode->i_mutex); return error; diff --git a/fs/inode.c b/fs/inode.c index 78a17b8859e1..1be5f9003eb3 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1597,6 +1597,7 @@ static int update_time(struct inode *inode, struct timespec *time, int flags) /** * touch_atime - update the access time * @path: the &struct path to update + * @inode: inode to update * * Update the accessed time on an inode and mark it for writeback. * This function automatically handles read only file systems and media, diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 6b8338ec2464..89463eee6791 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1937,8 +1937,8 @@ out: * @journal: journal for operation * @page: to try and free * @gfp_mask: we use the mask to detect how hard should we try to release - * buffers. If __GFP_WAIT and __GFP_FS is set, we wait for commit code to - * release the buffers. + * buffers. If __GFP_DIRECT_RECLAIM and __GFP_FS is set, we wait for commit + * code to release the buffers. * * * For all the buffers on this page, diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c index bb9cebc9ca8a..e5c1783ab64a 100644 --- a/fs/jffs2/background.c +++ b/fs/jffs2/background.c @@ -80,7 +80,6 @@ static int jffs2_garbage_collect_thread(void *_c) siginitset(&hupmask, sigmask(SIGHUP)); allow_signal(SIGKILL); allow_signal(SIGSTOP); - allow_signal(SIGCONT); allow_signal(SIGHUP); c->gc_task = current; @@ -121,20 +120,18 @@ static int jffs2_garbage_collect_thread(void *_c) /* Put_super will send a SIGKILL and then wait on the sem. */ while (signal_pending(current) || freezing(current)) { - siginfo_t info; unsigned long signr; if (try_to_freeze()) goto again; - signr = dequeue_signal_lock(current, ¤t->blocked, &info); + signr = kernel_dequeue_signal(NULL); switch(signr) { case SIGSTOP: jffs2_dbg(1, "%s(): SIGSTOP received\n", __func__); - set_current_state(TASK_STOPPED); - schedule(); + kernel_signal_stop(); break; case SIGKILL: diff --git a/fs/jffs2/security.c b/fs/jffs2/security.c index d4b43fb7adb1..bf12fe5f83d7 100644 --- a/fs/jffs2/security.c +++ b/fs/jffs2/security.c @@ -48,8 +48,9 @@ int jffs2_init_security(struct inode *inode, struct inode *dir, } /* ---- XATTR Handler for "security.*" ----------------- */ -static int jffs2_security_getxattr(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) +static int jffs2_security_getxattr(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + void *buffer, size_t size) { if (!strcmp(name, "")) return -EINVAL; @@ -58,8 +59,9 @@ static int jffs2_security_getxattr(struct dentry *dentry, const char *name, name, buffer, size); } -static int jffs2_security_setxattr(struct dentry *dentry, const char *name, - const void *buffer, size_t size, int flags, int type) +static int jffs2_security_setxattr(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + const void *buffer, size_t size, int flags) { if (!strcmp(name, "")) return -EINVAL; @@ -68,8 +70,10 @@ static int jffs2_security_setxattr(struct dentry *dentry, const char *name, name, buffer, size, flags); } -static size_t jffs2_security_listxattr(struct dentry *dentry, char *list, - size_t list_size, const char *name, size_t name_len, int type) +static size_t jffs2_security_listxattr(const struct xattr_handler *handler, + struct dentry *dentry, char *list, + size_t list_size, const char *name, + size_t name_len) { size_t retlen = XATTR_SECURITY_PREFIX_LEN + name_len + 1; diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c index 63f31c0733c5..f3a4857ff071 100644 --- a/fs/jffs2/wbuf.c +++ b/fs/jffs2/wbuf.c @@ -1264,7 +1264,7 @@ int jffs2_dataflash_setup(struct jffs2_sb_info *c) { if ((c->flash_size % c->sector_size) != 0) { c->flash_size = (c->flash_size / c->sector_size) * c->sector_size; pr_warn("flash size adjusted to %dKiB\n", c->flash_size); - }; + } c->wbuf_ofs = 0xFFFFFFFF; c->wbuf = kmalloc(c->wbuf_pagesize, GFP_KERNEL); diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c index f092fee5be50..4c2c03663533 100644 --- a/fs/jffs2/xattr.c +++ b/fs/jffs2/xattr.c @@ -1001,11 +1001,12 @@ ssize_t jffs2_listxattr(struct dentry *dentry, char *buffer, size_t size) if (!xhandle) continue; if (buffer) { - rc = xhandle->list(dentry, buffer+len, size-len, - xd->xname, xd->name_len, xd->flags); + rc = xhandle->list(xhandle, dentry, buffer + len, + size - len, xd->xname, + xd->name_len); } else { - rc = xhandle->list(dentry, NULL, 0, xd->xname, - xd->name_len, xd->flags); + rc = xhandle->list(xhandle, dentry, NULL, 0, + xd->xname, xd->name_len); } if (rc < 0) goto out; diff --git a/fs/jffs2/xattr_trusted.c b/fs/jffs2/xattr_trusted.c index ceaf9c693225..a562da0d6a26 100644 --- a/fs/jffs2/xattr_trusted.c +++ b/fs/jffs2/xattr_trusted.c @@ -16,8 +16,9 @@ #include <linux/mtd/mtd.h> #include "nodelist.h" -static int jffs2_trusted_getxattr(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) +static int jffs2_trusted_getxattr(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + void *buffer, size_t size) { if (!strcmp(name, "")) return -EINVAL; @@ -25,8 +26,9 @@ static int jffs2_trusted_getxattr(struct dentry *dentry, const char *name, name, buffer, size); } -static int jffs2_trusted_setxattr(struct dentry *dentry, const char *name, - const void *buffer, size_t size, int flags, int type) +static int jffs2_trusted_setxattr(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + const void *buffer, size_t size, int flags) { if (!strcmp(name, "")) return -EINVAL; @@ -34,11 +36,16 @@ static int jffs2_trusted_setxattr(struct dentry *dentry, const char *name, name, buffer, size, flags); } -static size_t jffs2_trusted_listxattr(struct dentry *dentry, char *list, - size_t list_size, const char *name, size_t name_len, int type) +static size_t jffs2_trusted_listxattr(const struct xattr_handler *handler, + struct dentry *dentry, char *list, + size_t list_size, const char *name, + size_t name_len) { size_t retlen = XATTR_TRUSTED_PREFIX_LEN + name_len + 1; + if (!capable(CAP_SYS_ADMIN)) + return 0; + if (list && retlen<=list_size) { strcpy(list, XATTR_TRUSTED_PREFIX); strcpy(list + XATTR_TRUSTED_PREFIX_LEN, name); diff --git a/fs/jffs2/xattr_user.c b/fs/jffs2/xattr_user.c index a71391eba514..cbc0472e59a8 100644 --- a/fs/jffs2/xattr_user.c +++ b/fs/jffs2/xattr_user.c @@ -16,8 +16,9 @@ #include <linux/mtd/mtd.h> #include "nodelist.h" -static int jffs2_user_getxattr(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) +static int jffs2_user_getxattr(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + void *buffer, size_t size) { if (!strcmp(name, "")) return -EINVAL; @@ -25,8 +26,9 @@ static int jffs2_user_getxattr(struct dentry *dentry, const char *name, name, buffer, size); } -static int jffs2_user_setxattr(struct dentry *dentry, const char *name, - const void *buffer, size_t size, int flags, int type) +static int jffs2_user_setxattr(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + const void *buffer, size_t size, int flags) { if (!strcmp(name, "")) return -EINVAL; @@ -34,8 +36,10 @@ static int jffs2_user_setxattr(struct dentry *dentry, const char *name, name, buffer, size, flags); } -static size_t jffs2_user_listxattr(struct dentry *dentry, char *list, - size_t list_size, const char *name, size_t name_len, int type) +static size_t jffs2_user_listxattr(const struct xattr_handler *handler, + struct dentry *dentry, char *list, + size_t list_size, const char *name, + size_t name_len) { size_t retlen = XATTR_USER_PREFIX_LEN + name_len + 1; diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 35976bdccafc..9d7551f5c32a 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -1372,9 +1372,6 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry, tid_t tid; struct tblock *tblk; - if (!new_valid_dev(rdev)) - return -EINVAL; - jfs_info("jfs_mknod: %pd", dentry); rc = dquot_initialize(dir); diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 4cd9798f4948..8f9176caf098 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -496,9 +496,6 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) jfs_info("In jfs_read_super: s_flags=0x%lx", sb->s_flags); - if (!new_valid_dev(sb->s_bdev->bd_dev)) - return -EOVERFLOW; - sbi = kzalloc(sizeof(struct jfs_sb_info), GFP_KERNEL); if (!sbi) return -ENOMEM; diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 969d589c848d..d716c9993a26 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -116,7 +116,7 @@ static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni, atomic_inc(&nsm->sm_count); else { host = NULL; - nsm = nsm_get_handle(ni->sap, ni->salen, + nsm = nsm_get_handle(ni->net, ni->sap, ni->salen, ni->hostname, ni->hostname_len); if (unlikely(nsm == NULL)) { dprintk("lockd: %s failed; no nsm handle\n", @@ -161,6 +161,7 @@ static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni, host->h_nsmhandle = nsm; host->h_addrbuf = nsm->sm_addrbuf; host->net = ni->net; + strlcpy(host->nodename, utsname()->nodename, sizeof(host->nodename)); out: return host; @@ -534,17 +535,18 @@ static struct nlm_host *next_host_state(struct hlist_head *cache, /** * nlm_host_rebooted - Release all resources held by rebooted host + * @net: network namespace * @info: pointer to decoded results of NLM_SM_NOTIFY call * * We were notified that the specified host has rebooted. Release * all resources held by that peer. */ -void nlm_host_rebooted(const struct nlm_reboot *info) +void nlm_host_rebooted(const struct net *net, const struct nlm_reboot *info) { struct nsm_handle *nsm; struct nlm_host *host; - nsm = nsm_reboot_lookup(info); + nsm = nsm_reboot_lookup(net, info); if (unlikely(nsm == NULL)) return; diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 47a32b6d9b90..19166d4a8d31 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -42,7 +42,7 @@ struct nsm_args { u32 proc; char *mon_name; - char *nodename; + const char *nodename; }; struct nsm_res { @@ -51,7 +51,6 @@ struct nsm_res { }; static const struct rpc_program nsm_program; -static LIST_HEAD(nsm_handles); static DEFINE_SPINLOCK(nsm_lock); /* @@ -87,69 +86,18 @@ static struct rpc_clnt *nsm_create(struct net *net, const char *nodename) return rpc_create(&args); } -static struct rpc_clnt *nsm_client_set(struct lockd_net *ln, - struct rpc_clnt *clnt) -{ - spin_lock(&ln->nsm_clnt_lock); - if (ln->nsm_users == 0) { - if (clnt == NULL) - goto out; - ln->nsm_clnt = clnt; - } - clnt = ln->nsm_clnt; - ln->nsm_users++; -out: - spin_unlock(&ln->nsm_clnt_lock); - return clnt; -} - -static struct rpc_clnt *nsm_client_get(struct net *net, const char *nodename) -{ - struct rpc_clnt *clnt, *new; - struct lockd_net *ln = net_generic(net, lockd_net_id); - - clnt = nsm_client_set(ln, NULL); - if (clnt != NULL) - goto out; - - clnt = new = nsm_create(net, nodename); - if (IS_ERR(clnt)) - goto out; - - clnt = nsm_client_set(ln, new); - if (clnt != new) - rpc_shutdown_client(new); -out: - return clnt; -} - -static void nsm_client_put(struct net *net) -{ - struct lockd_net *ln = net_generic(net, lockd_net_id); - struct rpc_clnt *clnt = NULL; - - spin_lock(&ln->nsm_clnt_lock); - ln->nsm_users--; - if (ln->nsm_users == 0) { - clnt = ln->nsm_clnt; - ln->nsm_clnt = NULL; - } - spin_unlock(&ln->nsm_clnt_lock); - if (clnt != NULL) - rpc_shutdown_client(clnt); -} - static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res, - struct rpc_clnt *clnt) + const struct nlm_host *host) { int status; + struct rpc_clnt *clnt; struct nsm_args args = { .priv = &nsm->sm_priv, .prog = NLM_PROGRAM, .vers = 3, .proc = NLMPROC_NSM_NOTIFY, .mon_name = nsm->sm_mon_name, - .nodename = clnt->cl_nodename, + .nodename = host->nodename, }; struct rpc_message msg = { .rpc_argp = &args, @@ -158,6 +106,13 @@ static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res, memset(res, 0, sizeof(*res)); + clnt = nsm_create(host->net, host->nodename); + if (IS_ERR(clnt)) { + dprintk("lockd: failed to create NSM upcall transport, " + "status=%ld, net=%p\n", PTR_ERR(clnt), host->net); + return PTR_ERR(clnt); + } + msg.rpc_proc = &clnt->cl_procinfo[proc]; status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFTCONN); if (status == -ECONNREFUSED) { @@ -171,6 +126,8 @@ static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res, status); else status = 0; + + rpc_shutdown_client(clnt); return status; } @@ -190,32 +147,19 @@ int nsm_monitor(const struct nlm_host *host) struct nsm_handle *nsm = host->h_nsmhandle; struct nsm_res res; int status; - struct rpc_clnt *clnt; - const char *nodename = NULL; dprintk("lockd: nsm_monitor(%s)\n", nsm->sm_name); if (nsm->sm_monitored) return 0; - if (host->h_rpcclnt) - nodename = host->h_rpcclnt->cl_nodename; - /* * Choose whether to record the caller_name or IP address of * this peer in the local rpc.statd's database. */ nsm->sm_mon_name = nsm_use_hostnames ? nsm->sm_name : nsm->sm_addrbuf; - clnt = nsm_client_get(host->net, nodename); - if (IS_ERR(clnt)) { - status = PTR_ERR(clnt); - dprintk("lockd: failed to create NSM upcall transport, " - "status=%d, net=%p\n", status, host->net); - return status; - } - - status = nsm_mon_unmon(nsm, NSMPROC_MON, &res, clnt); + status = nsm_mon_unmon(nsm, NSMPROC_MON, &res, host); if (unlikely(res.status != 0)) status = -EIO; if (unlikely(status < 0)) { @@ -247,11 +191,9 @@ void nsm_unmonitor(const struct nlm_host *host) if (atomic_read(&nsm->sm_count) == 1 && nsm->sm_monitored && !nsm->sm_sticky) { - struct lockd_net *ln = net_generic(host->net, lockd_net_id); - dprintk("lockd: nsm_unmonitor(%s)\n", nsm->sm_name); - status = nsm_mon_unmon(nsm, NSMPROC_UNMON, &res, ln->nsm_clnt); + status = nsm_mon_unmon(nsm, NSMPROC_UNMON, &res, host); if (res.status != 0) status = -EIO; if (status < 0) @@ -259,38 +201,38 @@ void nsm_unmonitor(const struct nlm_host *host) nsm->sm_name); else nsm->sm_monitored = 0; - - nsm_client_put(host->net); } } -static struct nsm_handle *nsm_lookup_hostname(const char *hostname, - const size_t len) +static struct nsm_handle *nsm_lookup_hostname(const struct list_head *nsm_handles, + const char *hostname, const size_t len) { struct nsm_handle *nsm; - list_for_each_entry(nsm, &nsm_handles, sm_link) + list_for_each_entry(nsm, nsm_handles, sm_link) if (strlen(nsm->sm_name) == len && memcmp(nsm->sm_name, hostname, len) == 0) return nsm; return NULL; } -static struct nsm_handle *nsm_lookup_addr(const struct sockaddr *sap) +static struct nsm_handle *nsm_lookup_addr(const struct list_head *nsm_handles, + const struct sockaddr *sap) { struct nsm_handle *nsm; - list_for_each_entry(nsm, &nsm_handles, sm_link) + list_for_each_entry(nsm, nsm_handles, sm_link) if (rpc_cmp_addr(nsm_addr(nsm), sap)) return nsm; return NULL; } -static struct nsm_handle *nsm_lookup_priv(const struct nsm_private *priv) +static struct nsm_handle *nsm_lookup_priv(const struct list_head *nsm_handles, + const struct nsm_private *priv) { struct nsm_handle *nsm; - list_for_each_entry(nsm, &nsm_handles, sm_link) + list_for_each_entry(nsm, nsm_handles, sm_link) if (memcmp(nsm->sm_priv.data, priv->data, sizeof(priv->data)) == 0) return nsm; @@ -353,6 +295,7 @@ static struct nsm_handle *nsm_create_handle(const struct sockaddr *sap, /** * nsm_get_handle - Find or create a cached nsm_handle + * @net: network namespace * @sap: pointer to socket address of handle to find * @salen: length of socket address * @hostname: pointer to C string containing hostname to find @@ -365,11 +308,13 @@ static struct nsm_handle *nsm_create_handle(const struct sockaddr *sap, * @hostname cannot be found in the handle cache. Returns NULL if * an error occurs. */ -struct nsm_handle *nsm_get_handle(const struct sockaddr *sap, +struct nsm_handle *nsm_get_handle(const struct net *net, + const struct sockaddr *sap, const size_t salen, const char *hostname, const size_t hostname_len) { struct nsm_handle *cached, *new = NULL; + struct lockd_net *ln = net_generic(net, lockd_net_id); if (hostname && memchr(hostname, '/', hostname_len) != NULL) { if (printk_ratelimit()) { @@ -384,9 +329,10 @@ retry: spin_lock(&nsm_lock); if (nsm_use_hostnames && hostname != NULL) - cached = nsm_lookup_hostname(hostname, hostname_len); + cached = nsm_lookup_hostname(&ln->nsm_handles, + hostname, hostname_len); else - cached = nsm_lookup_addr(sap); + cached = nsm_lookup_addr(&ln->nsm_handles, sap); if (cached != NULL) { atomic_inc(&cached->sm_count); @@ -400,7 +346,7 @@ retry: } if (new != NULL) { - list_add(&new->sm_link, &nsm_handles); + list_add(&new->sm_link, &ln->nsm_handles); spin_unlock(&nsm_lock); dprintk("lockd: created nsm_handle for %s (%s)\n", new->sm_name, new->sm_addrbuf); @@ -417,19 +363,22 @@ retry: /** * nsm_reboot_lookup - match NLMPROC_SM_NOTIFY arguments to an nsm_handle + * @net: network namespace * @info: pointer to NLMPROC_SM_NOTIFY arguments * * Returns a matching nsm_handle if found in the nsm cache. The returned * nsm_handle's reference count is bumped. Otherwise returns NULL if some * error occurred. */ -struct nsm_handle *nsm_reboot_lookup(const struct nlm_reboot *info) +struct nsm_handle *nsm_reboot_lookup(const struct net *net, + const struct nlm_reboot *info) { struct nsm_handle *cached; + struct lockd_net *ln = net_generic(net, lockd_net_id); spin_lock(&nsm_lock); - cached = nsm_lookup_priv(&info->priv); + cached = nsm_lookup_priv(&ln->nsm_handles, &info->priv); if (unlikely(cached == NULL)) { spin_unlock(&nsm_lock); dprintk("lockd: never saw rebooted peer '%.*s' before\n", diff --git a/fs/lockd/netns.h b/fs/lockd/netns.h index 097bfa3adb1c..5426189406c1 100644 --- a/fs/lockd/netns.h +++ b/fs/lockd/netns.h @@ -12,9 +12,7 @@ struct lockd_net { struct delayed_work grace_period_end; struct lock_manager lockd_manager; - spinlock_t nsm_clnt_lock; - unsigned int nsm_users; - struct rpc_clnt *nsm_clnt; + struct list_head nsm_handles; }; extern int lockd_net_id; diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index d678bcc3cbcb..5f31ebd96c06 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -592,7 +592,7 @@ static int lockd_init_net(struct net *net) INIT_DELAYED_WORK(&ln->grace_period_end, grace_ender); INIT_LIST_HEAD(&ln->lockd_manager.list); ln->lockd_manager.block_opens = false; - spin_lock_init(&ln->nsm_clnt_lock); + INIT_LIST_HEAD(&ln->nsm_handles); return 0; } diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index b147d1ae71fd..09c576f26c7b 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -421,7 +421,7 @@ nlm4svc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp, return rpc_system_err; } - nlm_host_rebooted(argp); + nlm_host_rebooted(SVC_NET(rqstp), argp); return rpc_success; } diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index 21171f0c6477..fb26b9f522e7 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -464,7 +464,7 @@ nlmsvc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp, return rpc_system_err; } - nlm_host_rebooted(argp); + nlm_host_rebooted(SVC_NET(rqstp), argp); return rpc_success; } diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c index 7f9b096d8d57..6de0fbfc6c00 100644 --- a/fs/logfs/segment.c +++ b/fs/logfs/segment.c @@ -57,7 +57,7 @@ static struct page *get_mapping_page(struct super_block *sb, pgoff_t index, filler_t *filler = super->s_devops->readpage; struct page *page; - BUG_ON(mapping_gfp_mask(mapping) & __GFP_FS); + BUG_ON(mapping_gfp_constraint(mapping, __GFP_FS)); if (use_filler) page = read_cache_page(mapping, index, filler, sb); else { diff --git a/fs/mpage.c b/fs/mpage.c index 09abba7653aa..1480d3a18037 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -361,7 +361,7 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages, sector_t last_block_in_bio = 0; struct buffer_head map_bh; unsigned long first_logical_block = 0; - gfp_t gfp = GFP_KERNEL & mapping_gfp_mask(mapping); + gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL); map_bh.b_state = 0; map_bh.b_size = 0; @@ -397,7 +397,7 @@ int mpage_readpage(struct page *page, get_block_t get_block) sector_t last_block_in_bio = 0; struct buffer_head map_bh; unsigned long first_logical_block = 0; - gfp_t gfp = GFP_KERNEL & mapping_gfp_mask(page->mapping); + gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL); map_bh.b_state = 0; map_bh.b_size = 0; diff --git a/fs/namei.c b/fs/namei.c index 0d3340b32e14..d84d7c7515fc 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1966,7 +1966,7 @@ OK: if (err) { const char *s = get_link(nd); - if (unlikely(IS_ERR(s))) + if (IS_ERR(s)) return PTR_ERR(s); err = 0; if (unlikely(!s)) { @@ -3380,7 +3380,7 @@ struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt, return ERR_PTR(-ELOOP); filename = getname_kernel(name); - if (unlikely(IS_ERR(filename))) + if (IS_ERR(filename)) return ERR_CAST(filename); set_nameidata(&nd, -1, filename); @@ -4604,7 +4604,7 @@ EXPORT_SYMBOL(__page_symlink); int page_symlink(struct inode *inode, const char *symname, int len) { return __page_symlink(inode, symname, len, - !(mapping_gfp_mask(inode->i_mapping) & __GFP_FS)); + !mapping_gfp_constraint(inode->i_mapping, __GFP_FS)); } EXPORT_SYMBOL(page_symlink); diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 93575e91a7aa..f0e3e9e747dd 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -597,7 +597,7 @@ ncp_fill_cache(struct file *file, struct dir_context *ctx, qname.name = __name; newdent = d_hash_and_lookup(dentry, &qname); - if (unlikely(IS_ERR(newdent))) + if (IS_ERR(newdent)) goto end_advance; if (!newdent) { newdent = d_alloc(dentry, &qname); @@ -1165,8 +1165,6 @@ out: static int ncp_mknod(struct inode * dir, struct dentry *dentry, umode_t mode, dev_t rdev) { - if (!new_valid_dev(rdev)) - return -EINVAL; if (ncp_is_nfs_extras(NCP_SERVER(dir), NCP_FINFO(dir)->volNumber)) { ncp_dbg(1, "mode = 0%ho\n", mode); return ncp_create_new(dir, dentry, mode, rdev, 0); diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c index 79b113048eac..0a3f9b594602 100644 --- a/fs/ncpfs/ioctl.c +++ b/fs/ncpfs/ioctl.c @@ -525,6 +525,8 @@ static long __ncp_ioctl(struct inode *inode, unsigned int cmd, unsigned long arg switch (rqdata.cmd) { case NCP_LOCK_EX: case NCP_LOCK_SH: + if (rqdata.timeout < 0) + return -EINVAL; if (rqdata.timeout == 0) rqdata.timeout = NCP_LOCK_DEFAULT_TIMEOUT; else if (rqdata.timeout > NCP_LOCK_MAX_TIMEOUT) diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 9cd4eb3a1e22..ddd0138f410c 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -229,7 +229,7 @@ bl_read_pagelist(struct nfs_pgio_header *header) struct parallel_io *par; loff_t f_offset = header->args.offset; size_t bytes_left = header->args.count; - unsigned int pg_offset, pg_len; + unsigned int pg_offset = header->args.pgbase, pg_len; struct page **pages = header->args.pages; int pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT; const bool is_dio = (header->dreq != NULL); @@ -262,7 +262,6 @@ bl_read_pagelist(struct nfs_pgio_header *header) extent_length = be.be_length - (isect - be.be_f_offset); } - pg_offset = f_offset & ~PAGE_CACHE_MASK; if (is_dio) { if (pg_offset + bytes_left > PAGE_CACHE_SIZE) pg_len = PAGE_CACHE_SIZE - pg_offset; @@ -273,9 +272,6 @@ bl_read_pagelist(struct nfs_pgio_header *header) pg_len = PAGE_CACHE_SIZE; } - isect += (pg_offset >> SECTOR_SHIFT); - extent_length -= (pg_offset >> SECTOR_SHIFT); - if (is_hole(&be)) { bio = bl_submit_bio(READ, bio); /* Fill hole w/ zeroes w/o accessing device */ @@ -301,6 +297,7 @@ bl_read_pagelist(struct nfs_pgio_header *header) extent_length -= (pg_len >> SECTOR_SHIFT); f_offset += pg_len; bytes_left -= pg_len; + pg_offset = 0; } if ((isect << SECTOR_SHIFT) >= header->inode->i_size) { header->res.eof = 1; diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 75f7c0a7538a..a7f2e6e33305 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -99,17 +99,6 @@ nfs4_callback_up(struct svc_serv *serv) } #if defined(CONFIG_NFS_V4_1) -static int nfs41_callback_up_net(struct svc_serv *serv, struct net *net) -{ - /* - * Create an svc_sock for the back channel service that shares the - * fore channel connection. - * Returns the input port (0) and sets the svc_serv bc_xprt on success - */ - return svc_create_xprt(serv, "tcp-bc", net, PF_INET, 0, - SVC_SOCK_ANONYMOUS); -} - /* * The callback service for NFSv4.1 callbacks */ @@ -184,11 +173,6 @@ static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt, xprt->bc_serv = serv; } #else -static int nfs41_callback_up_net(struct svc_serv *serv, struct net *net) -{ - return 0; -} - static void nfs_minorversion_callback_svc_setup(struct svc_serv *serv, struct svc_rqst **rqstpp, int (**callback_svc)(void *vrqstp)) { @@ -259,7 +243,8 @@ static void nfs_callback_down_net(u32 minorversion, struct svc_serv *serv, struc svc_shutdown_net(serv, net); } -static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, struct net *net) +static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, + struct net *net, struct rpc_xprt *xprt) { struct nfs_net *nn = net_generic(net, nfs_net_id); int ret; @@ -275,20 +260,11 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, struct n goto err_bind; } - switch (minorversion) { - case 0: - ret = nfs4_callback_up_net(serv, net); - break; - case 1: - case 2: - ret = nfs41_callback_up_net(serv, net); - break; - default: - printk(KERN_ERR "NFS: unknown callback version: %d\n", - minorversion); - ret = -EINVAL; - break; - } + ret = -EPROTONOSUPPORT; + if (minorversion == 0) + ret = nfs4_callback_up_net(serv, net); + else if (xprt->ops->bc_up) + ret = xprt->ops->bc_up(serv, net); if (ret < 0) { printk(KERN_ERR "NFS: callback service start failed\n"); @@ -364,7 +340,7 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt) goto err_create; } - ret = nfs_callback_up_net(minorversion, serv, net); + ret = nfs_callback_up_net(minorversion, serv, net, xprt); if (ret < 0) goto err_net; diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index 84326e9fb47a..ff8195bd75ea 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -61,7 +61,6 @@ struct cb_compound_hdr_res { }; struct cb_getattrargs { - struct sockaddr *addr; struct nfs_fh fh; uint32_t bitmap[2]; }; @@ -76,7 +75,6 @@ struct cb_getattrres { }; struct cb_recallargs { - struct sockaddr *addr; struct nfs_fh fh; nfs4_stateid stateid; uint32_t truncate; @@ -119,9 +117,6 @@ extern __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, struct cb_sequenceres *res, struct cb_process_state *cps); -extern int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, - const nfs4_stateid *stateid); - #define RCA4_TYPE_MASK_RDATA_DLG 0 #define RCA4_TYPE_MASK_WDATA_DLG 1 #define RCA4_TYPE_MASK_DIR_DLG 2 @@ -134,7 +129,6 @@ extern int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, #define RCA4_TYPE_MASK_ALL 0xf31f struct cb_recallanyargs { - struct sockaddr *craa_addr; uint32_t craa_objs_to_keep; uint32_t craa_type_mask; }; @@ -144,7 +138,6 @@ extern __be32 nfs4_callback_recallany(struct cb_recallanyargs *args, struct cb_process_state *cps); struct cb_recallslotargs { - struct sockaddr *crsa_addr; uint32_t crsa_target_highest_slotid; }; extern __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, @@ -152,7 +145,6 @@ extern __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, struct cb_process_state *cps); struct cb_layoutrecallargs { - struct sockaddr *cbl_addr; uint32_t cbl_recall_type; uint32_t cbl_layout_type; uint32_t cbl_layoutchanged; @@ -196,9 +188,6 @@ extern __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy, #if IS_ENABLED(CONFIG_NFS_V4) extern int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt); extern void nfs_callback_down(int minorversion, struct net *net); -extern int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, - const nfs4_stateid *stateid); -extern int nfs4_set_callback_sessionid(struct nfs_client *clp); #endif /* CONFIG_NFS_V4 */ /* * nfs41: Callbacks are expected to not cause substantial latency, @@ -209,6 +198,5 @@ extern int nfs4_set_callback_sessionid(struct nfs_client *clp); #define NFS41_BC_MAX_CALLBACKS 1 extern unsigned int nfs_callback_set_tcpport; -extern unsigned short nfs_callback_tcpport; #endif /* __LINUX_FS_NFS_CALLBACK_H */ diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index b85cf7a30232..807eb6ef4f91 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -17,9 +17,7 @@ #include "nfs4session.h" #include "nfs4trace.h" -#ifdef NFS_DEBUG #define NFSDBG_FACILITY NFSDBG_CALLBACK -#endif __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res, diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 6b1697a01dde..beac58b0e09c 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -18,19 +18,21 @@ #include "internal.h" #include "nfs4session.h" -#define CB_OP_TAGLEN_MAXSZ (512) -#define CB_OP_HDR_RES_MAXSZ (2 + CB_OP_TAGLEN_MAXSZ) -#define CB_OP_GETATTR_BITMAP_MAXSZ (4) -#define CB_OP_GETATTR_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \ - CB_OP_GETATTR_BITMAP_MAXSZ + \ - 2 + 2 + 3 + 3) -#define CB_OP_RECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) +#define CB_OP_TAGLEN_MAXSZ (512) +#define CB_OP_HDR_RES_MAXSZ (2 * 4) // opcode, status +#define CB_OP_GETATTR_BITMAP_MAXSZ (4 * 4) // bitmap length, 3 bitmaps +#define CB_OP_GETATTR_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \ + CB_OP_GETATTR_BITMAP_MAXSZ + \ + /* change, size, ctime, mtime */\ + (2 + 2 + 3 + 3) * 4) +#define CB_OP_RECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) #if defined(CONFIG_NFS_V4_1) #define CB_OP_LAYOUTRECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) #define CB_OP_DEVICENOTIFY_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) #define CB_OP_SEQUENCE_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \ - 4 + 1 + 3) + NFS4_MAX_SESSIONID_LEN + \ + (1 + 3) * 4) // seqid, 3 slotids #define CB_OP_RECALLANY_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) #define CB_OP_RECALLSLOT_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) #endif /* CONFIG_NFS_V4_1 */ @@ -76,7 +78,8 @@ static __be32 *read_buf(struct xdr_stream *xdr, int nbytes) p = xdr_inline_decode(xdr, nbytes); if (unlikely(p == NULL)) - printk(KERN_WARNING "NFS: NFSv4 callback reply buffer overflowed!\n"); + printk(KERN_WARNING "NFS: NFSv4 callback reply buffer overflowed " + "or truncated request.\n"); return p; } @@ -157,7 +160,7 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound if (unlikely(status != 0)) return status; /* We do not like overly long tags! */ - if (hdr->taglen > CB_OP_TAGLEN_MAXSZ - 12) { + if (hdr->taglen > CB_OP_TAGLEN_MAXSZ) { printk("NFS: NFSv4 CALLBACK %s: client sent tag of length %u\n", __func__, hdr->taglen); return htonl(NFS4ERR_RESOURCE); @@ -198,7 +201,6 @@ static __be32 decode_getattr_args(struct svc_rqst *rqstp, struct xdr_stream *xdr status = decode_fh(xdr, &args->fh); if (unlikely(status != 0)) goto out; - args->addr = svc_addr(rqstp); status = decode_bitmap(xdr, args->bitmap); out: dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); @@ -210,7 +212,6 @@ static __be32 decode_recall_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, __be32 *p; __be32 status; - args->addr = svc_addr(rqstp); status = decode_stateid(xdr, &args->stateid); if (unlikely(status != 0)) goto out; @@ -236,7 +237,6 @@ static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp, __be32 status = 0; uint32_t iomode; - args->cbl_addr = svc_addr(rqstp); p = read_buf(xdr, 4 * sizeof(uint32_t)); if (unlikely(p == NULL)) { status = htonl(NFS4ERR_BADXDR); @@ -383,13 +383,12 @@ static __be32 decode_sessionid(struct xdr_stream *xdr, struct nfs4_sessionid *sid) { __be32 *p; - int len = NFS4_MAX_SESSIONID_LEN; - p = read_buf(xdr, len); + p = read_buf(xdr, NFS4_MAX_SESSIONID_LEN); if (unlikely(p == NULL)) return htonl(NFS4ERR_RESOURCE); - memcpy(sid->data, p, len); + memcpy(sid->data, p, NFS4_MAX_SESSIONID_LEN); return 0; } @@ -500,7 +499,6 @@ static __be32 decode_recallany_args(struct svc_rqst *rqstp, uint32_t bitmap[2]; __be32 *p, status; - args->craa_addr = svc_addr(rqstp); p = read_buf(xdr, 4); if (unlikely(p == NULL)) return htonl(NFS4ERR_BADXDR); @@ -519,7 +517,6 @@ static __be32 decode_recallslot_args(struct svc_rqst *rqstp, { __be32 *p; - args->crsa_addr = svc_addr(rqstp); p = read_buf(xdr, 4); if (unlikely(p == NULL)) return htonl(NFS4ERR_BADXDR); @@ -684,13 +681,12 @@ static __be32 encode_sessionid(struct xdr_stream *xdr, const struct nfs4_sessionid *sid) { __be32 *p; - int len = NFS4_MAX_SESSIONID_LEN; - p = xdr_reserve_space(xdr, len); + p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN); if (unlikely(p == NULL)) return htonl(NFS4ERR_RESOURCE); - memcpy(p, sid, len); + memcpy(p, sid, NFS4_MAX_SESSIONID_LEN); return 0; } @@ -704,7 +700,9 @@ static __be32 encode_cb_sequence_res(struct svc_rqst *rqstp, if (unlikely(status != 0)) goto out; - encode_sessionid(xdr, &res->csr_sessionid); + status = encode_sessionid(xdr, &res->csr_sessionid); + if (status) + goto out; p = xdr_reserve_space(xdr, 4 * sizeof(uint32_t)); if (unlikely(p == NULL)) @@ -892,6 +890,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r struct cb_compound_hdr_arg hdr_arg = { 0 }; struct cb_compound_hdr_res hdr_res = { NULL }; struct xdr_stream xdr_in, xdr_out; + struct xdr_buf *rq_arg = &rqstp->rq_arg; __be32 *p, status; struct cb_process_state cps = { .drc_status = 0, @@ -903,7 +902,8 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r dprintk("%s: start\n", __func__); - xdr_init_decode(&xdr_in, &rqstp->rq_arg, rqstp->rq_arg.head[0].iov_base); + rq_arg->len = rq_arg->head[0].iov_len + rq_arg->page_len; + xdr_init_decode(&xdr_in, rq_arg, rq_arg->head[0].iov_base); p = (__be32*)((char *)rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len); xdr_init_encode(&xdr_out, &rqstp->rq_res, p); diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 57c5a02f6213..d6d5d2a48e83 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -764,6 +764,7 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, server->time_delta = fsinfo->time_delta; + server->clone_blksize = fsinfo->clone_blksize; /* We're airborne Set socket buffersize */ rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100); } diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index be806ead7f4d..5166adcfc0fb 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -721,14 +721,12 @@ int nfs_async_inode_return_delegation(struct inode *inode, struct nfs_client *clp = server->nfs_client; struct nfs_delegation *delegation; - filemap_flush(inode->i_mapping); - rcu_read_lock(); delegation = rcu_dereference(NFS_I(inode)->delegation); if (delegation == NULL) goto out_enoent; - - if (!clp->cl_mvops->match_stateid(&delegation->stateid, stateid)) + if (stateid != NULL && + !clp->cl_mvops->match_stateid(&delegation->stateid, stateid)) goto out_enoent; nfs_mark_return_delegation(server, delegation); rcu_read_unlock(); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 3d8e4ffa0a33..ce5a21861074 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1714,9 +1714,6 @@ nfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) dfprintk(VFS, "NFS: mknod(%s/%lu), %pd\n", dir->i_sb->s_id, dir->i_ino, dentry); - if (!new_valid_dev(rdev)) - return -EINVAL; - attr.ia_mode = mode; attr.ia_valid = ATTR_MODE; diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 37f639d50af5..93e236429c5d 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -473,8 +473,8 @@ static int nfs_release_page(struct page *page, gfp_t gfp) dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page); /* Always try to initiate a 'commit' if relevant, but only - * wait for it if __GFP_WAIT is set. Even then, only wait 1 - * second and only if the 'bdi' is not congested. + * wait for it if the caller allows blocking. Even then, + * only wait 1 second and only if the 'bdi' is not congested. * Waiting indefinitely can cause deadlocks when the NFS * server is on this machine, when a new TCP connection is * needed and in other rare cases. There is no particular @@ -484,7 +484,7 @@ static int nfs_release_page(struct page *page, gfp_t gfp) if (mapping) { struct nfs_server *nfss = NFS_SERVER(mapping->host); nfs_commit_inode(mapping->host, 0); - if ((gfp & __GFP_WAIT) && + if (gfpflags_allow_blocking(gfp) && !bdi_write_congested(&nfss->backing_dev_info)) { wait_on_page_bit_killable_timeout(page, PG_private, HZ); diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index fbc5a56de875..03516c80855a 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -339,6 +339,19 @@ static void ff_layout_sort_mirrors(struct nfs4_ff_layout_segment *fls) } } +static void ff_layout_mark_devices_valid(struct nfs4_ff_layout_segment *fls) +{ + struct nfs4_deviceid_node *node; + int i; + + if (!(fls->flags & FF_FLAGS_NO_IO_THRU_MDS)) + return; + for (i = 0; i < fls->mirror_array_cnt; i++) { + node = &fls->mirror_array[i]->mirror_ds->id_node; + clear_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags); + } +} + static struct pnfs_layout_segment * ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, struct nfs4_layoutget_res *lgr, @@ -499,6 +512,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, rc = ff_layout_check_layout(lgr); if (rc) goto out_err_free; + ff_layout_mark_devices_valid(fls); ret = &fls->generic_hdr; dprintk("<-- %s (success)\n", __func__); @@ -741,17 +755,17 @@ ff_layout_alloc_commit_info(struct pnfs_layout_segment *lseg, } static struct nfs4_pnfs_ds * -ff_layout_choose_best_ds_for_read(struct nfs_pageio_descriptor *pgio, +ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg, + int start_idx, int *best_idx) { - struct nfs4_ff_layout_segment *fls; + struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg); struct nfs4_pnfs_ds *ds; int idx; - fls = FF_LAYOUT_LSEG(pgio->pg_lseg); /* mirrors are sorted by efficiency */ - for (idx = 0; idx < fls->mirror_array_cnt; idx++) { - ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, idx, false); + for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) { + ds = nfs4_ff_layout_prepare_ds(lseg, idx, false); if (ds) { *best_idx = idx; return ds; @@ -782,7 +796,7 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, if (pgio->pg_lseg == NULL) goto out_mds; - ds = ff_layout_choose_best_ds_for_read(pgio, &ds_idx); + ds = ff_layout_choose_best_ds_for_read(pgio->pg_lseg, 0, &ds_idx); if (!ds) goto out_mds; mirror = FF_LAYOUT_COMP(pgio->pg_lseg, ds_idx); @@ -1035,7 +1049,8 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, rpc_wake_up(&tbl->slot_tbl_waitq); /* fall through */ default: - if (ff_layout_has_available_ds(lseg)) + if (ff_layout_no_fallback_to_mds(lseg) || + ff_layout_has_available_ds(lseg)) return -NFS4ERR_RESET_TO_PNFS; reset: dprintk("%s Retry through MDS. Error %d\n", __func__, @@ -1153,7 +1168,6 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, } /* NFS_PROTO call done callback routines */ - static int ff_layout_read_done_cb(struct rpc_task *task, struct nfs_pgio_header *hdr) { @@ -1171,6 +1185,10 @@ static int ff_layout_read_done_cb(struct rpc_task *task, switch (err) { case -NFS4ERR_RESET_TO_PNFS: + if (ff_layout_choose_best_ds_for_read(hdr->lseg, + hdr->pgio_mirror_idx + 1, + &hdr->pgio_mirror_idx)) + goto out_eagain; set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, &hdr->lseg->pls_layout->plh_flags); pnfs_read_resend_pnfs(hdr); @@ -1179,11 +1197,13 @@ static int ff_layout_read_done_cb(struct rpc_task *task, ff_layout_reset_read(hdr); return task->tk_status; case -EAGAIN: - rpc_restart_call_prepare(task); - return -EAGAIN; + goto out_eagain; } return 0; +out_eagain: + rpc_restart_call_prepare(task); + return -EAGAIN; } static bool diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index 68cc0d9828f9..2bb08bc6aaf0 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -10,6 +10,7 @@ #define FS_NFS_NFS4FLEXFILELAYOUT_H #define FF_FLAGS_NO_LAYOUTCOMMIT 1 +#define FF_FLAGS_NO_IO_THRU_MDS 2 #include "../pnfs.h" @@ -146,6 +147,12 @@ FF_LAYOUT_MIRROR_COUNT(struct pnfs_layout_segment *lseg) } static inline bool +ff_layout_no_fallback_to_mds(struct pnfs_layout_segment *lseg) +{ + return FF_LAYOUT_LSEG(lseg)->flags & FF_FLAGS_NO_IO_THRU_MDS; +} + +static inline bool ff_layout_test_devid_unavailable(struct nfs4_deviceid_node *node) { return nfs4_test_deviceid_unavailable(node); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 326d9e10d833..31b0a52223a7 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -618,7 +618,10 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC); nfs_vmtruncate(inode, attr->ia_size); } - nfs_update_inode(inode, fattr); + if (fattr->valid) + nfs_update_inode(inode, fattr); + else + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATTR; spin_unlock(&inode->i_lock); } EXPORT_SYMBOL_GPL(nfs_setattr_update_inode); @@ -1824,7 +1827,11 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if ((long)fattr->gencount - (long)nfsi->attr_gencount > 0) nfsi->attr_gencount = fattr->gencount; } - invalid &= ~NFS_INO_INVALID_ATTR; + + /* Don't declare attrcache up to date if there were no attrs! */ + if (fattr->valid != 0) + invalid &= ~NFS_INO_INVALID_ATTR; + /* Don't invalidate the data if we were to blame */ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 99a45283b9ee..09b190015df4 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -16,9 +16,7 @@ #include <linux/nfs_fs.h> #include "internal.h" -#ifdef NFS_DEBUG -# define NFSDBG_FACILITY NFSDBG_MOUNT -#endif +#define NFSDBG_FACILITY NFSDBG_MOUNT /* * Defined by RFC 1094, section A.3; and RFC 1813, section 5.1.4 diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h index 814c1255f1d2..b587ccd31083 100644 --- a/fs/nfs/nfs42.h +++ b/fs/nfs/nfs42.h @@ -17,5 +17,6 @@ int nfs42_proc_deallocate(struct file *, loff_t, loff_t); loff_t nfs42_proc_llseek(struct file *, loff_t, int); int nfs42_proc_layoutstats_generic(struct nfs_server *, struct nfs42_layoutstat_data *); +int nfs42_proc_clone(struct file *, struct file *, loff_t, loff_t, loff_t); #endif /* __LINUX_FS_NFS_NFS4_2_H */ diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 0f020e4d8421..6b1ce9825430 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -14,7 +14,7 @@ #include "pnfs.h" #include "internal.h" -#define NFSDBG_FACILITY NFSDBG_PNFS +#define NFSDBG_FACILITY NFSDBG_PROC static int nfs42_set_rw_stateid(nfs4_stateid *dst, struct file *file, fmode_t fmode) @@ -271,3 +271,75 @@ int nfs42_proc_layoutstats_generic(struct nfs_server *server, return PTR_ERR(task); return 0; } + +static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f, + struct file *dst_f, loff_t src_offset, + loff_t dst_offset, loff_t count) +{ + struct inode *src_inode = file_inode(src_f); + struct inode *dst_inode = file_inode(dst_f); + struct nfs_server *server = NFS_SERVER(dst_inode); + struct nfs42_clone_args args = { + .src_fh = NFS_FH(src_inode), + .dst_fh = NFS_FH(dst_inode), + .src_offset = src_offset, + .dst_offset = dst_offset, + .count = count, + .dst_bitmask = server->cache_consistency_bitmask, + }; + struct nfs42_clone_res res = { + .server = server, + }; + int status; + + msg->rpc_argp = &args; + msg->rpc_resp = &res; + + status = nfs42_set_rw_stateid(&args.src_stateid, src_f, FMODE_READ); + if (status) + return status; + + status = nfs42_set_rw_stateid(&args.dst_stateid, dst_f, FMODE_WRITE); + if (status) + return status; + + res.dst_fattr = nfs_alloc_fattr(); + if (!res.dst_fattr) + return -ENOMEM; + + status = nfs4_call_sync(server->client, server, msg, + &args.seq_args, &res.seq_res, 0); + if (status == 0) + status = nfs_post_op_update_inode(dst_inode, res.dst_fattr); + + kfree(res.dst_fattr); + return status; +} + +int nfs42_proc_clone(struct file *src_f, struct file *dst_f, + loff_t src_offset, loff_t dst_offset, loff_t count) +{ + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLONE], + }; + struct inode *inode = file_inode(src_f); + struct nfs_server *server = NFS_SERVER(file_inode(src_f)); + struct nfs4_exception exception = { }; + int err; + + if (!nfs_server_capable(inode, NFS_CAP_CLONE)) + return -EOPNOTSUPP; + + do { + err = _nfs42_proc_clone(&msg, src_f, dst_f, src_offset, + dst_offset, count); + if (err == -ENOTSUPP || err == -EOPNOTSUPP) { + NFS_SERVER(inode)->caps &= ~NFS_CAP_CLONE; + return -EOPNOTSUPP; + } + err = nfs4_handle_exception(server, err, &exception); + } while (exception.retry); + + return err; + +} diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 0eb29e14070d..0ca482a51e53 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -34,6 +34,12 @@ 1 /* opaque devaddr4 length */ + \ XDR_QUADLEN(PNFS_LAYOUTSTATS_MAXSIZE)) #define decode_layoutstats_maxsz (op_decode_hdr_maxsz) +#define encode_clone_maxsz (encode_stateid_maxsz + \ + encode_stateid_maxsz + \ + 2 /* src offset */ + \ + 2 /* dst offset */ + \ + 2 /* count */) +#define decode_clone_maxsz (op_decode_hdr_maxsz) #define NFS4_enc_allocate_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ @@ -65,7 +71,20 @@ decode_sequence_maxsz + \ decode_putfh_maxsz + \ PNFS_LAYOUTSTATS_MAXDEV * decode_layoutstats_maxsz) - +#define NFS4_enc_clone_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_savefh_maxsz + \ + encode_putfh_maxsz + \ + encode_clone_maxsz + \ + encode_getattr_maxsz) +#define NFS4_dec_clone_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_savefh_maxsz + \ + decode_putfh_maxsz + \ + decode_clone_maxsz + \ + decode_getattr_maxsz) static void encode_fallocate(struct xdr_stream *xdr, struct nfs42_falloc_args *args) @@ -128,6 +147,21 @@ static void encode_layoutstats(struct xdr_stream *xdr, encode_uint32(xdr, 0); } +static void encode_clone(struct xdr_stream *xdr, + struct nfs42_clone_args *args, + struct compound_hdr *hdr) +{ + __be32 *p; + + encode_op_hdr(xdr, OP_CLONE, decode_clone_maxsz, hdr); + encode_nfs4_stateid(xdr, &args->src_stateid); + encode_nfs4_stateid(xdr, &args->dst_stateid); + p = reserve_space(xdr, 3*8); + p = xdr_encode_hyper(p, args->src_offset); + p = xdr_encode_hyper(p, args->dst_offset); + xdr_encode_hyper(p, args->count); +} + /* * Encode ALLOCATE request */ @@ -206,6 +240,27 @@ static void nfs4_xdr_enc_layoutstats(struct rpc_rqst *req, encode_nops(&hdr); } +/* + * Encode CLONE request + */ +static void nfs4_xdr_enc_clone(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs42_clone_args *args) +{ + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->src_fh, &hdr); + encode_savefh(xdr, &hdr); + encode_putfh(xdr, args->dst_fh, &hdr); + encode_clone(xdr, args, &hdr); + encode_getfattr(xdr, args->dst_bitmask, &hdr); + encode_nops(&hdr); +} + static int decode_allocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res) { return decode_op_hdr(xdr, OP_ALLOCATE); @@ -243,6 +298,11 @@ static int decode_layoutstats(struct xdr_stream *xdr) return decode_op_hdr(xdr, OP_LAYOUTSTATS); } +static int decode_clone(struct xdr_stream *xdr) +{ + return decode_op_hdr(xdr, OP_CLONE); +} + /* * Decode ALLOCATE request */ @@ -351,4 +411,39 @@ out: return status; } +/* + * Decode CLONE request + */ +static int nfs4_xdr_dec_clone(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs42_clone_res *res) +{ + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + status = decode_savefh(xdr); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + status = decode_clone(xdr); + if (status) + goto out; + status = decode_getfattr(xdr, res->dst_fattr, res->server); + +out: + res->rpc_status = status; + return status; +} + #endif /* __LINUX_FS_NFS_NFS4_2XDR_H */ diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 50cfc4ca7a02..4afdee420d25 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -183,10 +183,12 @@ struct nfs4_state { struct nfs4_exception { - long timeout; - int retry; struct nfs4_state *state; struct inode *inode; + long timeout; + unsigned char delay : 1, + recovering : 1, + retry : 1; }; struct nfs4_state_recovery_ops { diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 223bedda64ae..10410e8b5853 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -33,7 +33,7 @@ static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion) return ret; idr_preload(GFP_KERNEL); spin_lock(&nn->nfs_client_lock); - ret = idr_alloc(&nn->cb_ident_idr, clp, 0, 0, GFP_NOWAIT); + ret = idr_alloc(&nn->cb_ident_idr, clp, 1, 0, GFP_NOWAIT); if (ret >= 0) clp->cl_cb_ident = ret; spin_unlock(&nn->nfs_client_lock); diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index b0dbe0abed53..db9b5fea5b3e 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -4,8 +4,10 @@ * Copyright (C) 1992 Rick Sladkey */ #include <linux/fs.h> +#include <linux/file.h> #include <linux/falloc.h> #include <linux/nfs_fs.h> +#include <uapi/linux/btrfs.h> /* BTRFS_IOC_CLONE/BTRFS_IOC_CLONE_RANGE */ #include "delegation.h" #include "internal.h" #include "iostat.h" @@ -192,14 +194,138 @@ static long nfs42_fallocate(struct file *filep, int mode, loff_t offset, loff_t return nfs42_proc_deallocate(filep, offset, len); return nfs42_proc_allocate(filep, offset, len); } + +static noinline long +nfs42_ioctl_clone(struct file *dst_file, unsigned long srcfd, + u64 src_off, u64 dst_off, u64 count) +{ + struct inode *dst_inode = file_inode(dst_file); + struct nfs_server *server = NFS_SERVER(dst_inode); + struct fd src_file; + struct inode *src_inode; + unsigned int bs = server->clone_blksize; + bool same_inode = false; + int ret; + + /* dst file must be opened for writing */ + if (!(dst_file->f_mode & FMODE_WRITE)) + return -EINVAL; + + ret = mnt_want_write_file(dst_file); + if (ret) + return ret; + + src_file = fdget(srcfd); + if (!src_file.file) { + ret = -EBADF; + goto out_drop_write; + } + + src_inode = file_inode(src_file.file); + + if (src_inode == dst_inode) + same_inode = true; + + /* src file must be opened for reading */ + if (!(src_file.file->f_mode & FMODE_READ)) + goto out_fput; + + /* src and dst must be regular files */ + ret = -EISDIR; + if (!S_ISREG(src_inode->i_mode) || !S_ISREG(dst_inode->i_mode)) + goto out_fput; + + ret = -EXDEV; + if (src_file.file->f_path.mnt != dst_file->f_path.mnt || + src_inode->i_sb != dst_inode->i_sb) + goto out_fput; + + /* check alignment w.r.t. clone_blksize */ + ret = -EINVAL; + if (bs) { + if (!IS_ALIGNED(src_off, bs) || !IS_ALIGNED(dst_off, bs)) + goto out_fput; + if (!IS_ALIGNED(count, bs) && i_size_read(src_inode) != (src_off + count)) + goto out_fput; + } + + /* verify if ranges are overlapped within the same file */ + if (same_inode) { + if (dst_off + count > src_off && dst_off < src_off + count) + goto out_fput; + } + + /* XXX: do we lock at all? what if server needs CB_RECALL_LAYOUT? */ + if (same_inode) { + mutex_lock(&src_inode->i_mutex); + } else if (dst_inode < src_inode) { + mutex_lock_nested(&dst_inode->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&src_inode->i_mutex, I_MUTEX_CHILD); + } else { + mutex_lock_nested(&src_inode->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&dst_inode->i_mutex, I_MUTEX_CHILD); + } + + /* flush all pending writes on both src and dst so that server + * has the latest data */ + ret = nfs_sync_inode(src_inode); + if (ret) + goto out_unlock; + ret = nfs_sync_inode(dst_inode); + if (ret) + goto out_unlock; + + ret = nfs42_proc_clone(src_file.file, dst_file, src_off, dst_off, count); + + /* truncate inode page cache of the dst range so that future reads can fetch + * new data from server */ + if (!ret) + truncate_inode_pages_range(&dst_inode->i_data, dst_off, dst_off + count - 1); + +out_unlock: + if (same_inode) { + mutex_unlock(&src_inode->i_mutex); + } else if (dst_inode < src_inode) { + mutex_unlock(&src_inode->i_mutex); + mutex_unlock(&dst_inode->i_mutex); + } else { + mutex_unlock(&dst_inode->i_mutex); + mutex_unlock(&src_inode->i_mutex); + } +out_fput: + fdput(src_file); +out_drop_write: + mnt_drop_write_file(dst_file); + return ret; +} + +static long nfs42_ioctl_clone_range(struct file *dst_file, void __user *argp) +{ + struct btrfs_ioctl_clone_range_args args; + + if (copy_from_user(&args, argp, sizeof(args))) + return -EFAULT; + + return nfs42_ioctl_clone(dst_file, args.src_fd, args.src_offset, + args.dest_offset, args.src_length); +} + +long nfs4_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + void __user *argp = (void __user *)arg; + + switch (cmd) { + case BTRFS_IOC_CLONE: + return nfs42_ioctl_clone(file, arg, 0, 0, 0); + case BTRFS_IOC_CLONE_RANGE: + return nfs42_ioctl_clone_range(file, argp); + } + + return -ENOTTY; +} #endif /* CONFIG_NFS_V4_2 */ const struct file_operations nfs4_file_operations = { -#ifdef CONFIG_NFS_V4_2 - .llseek = nfs4_file_llseek, -#else - .llseek = nfs_file_llseek, -#endif .read_iter = nfs_file_read, .write_iter = nfs_file_write, .mmap = nfs_file_mmap, @@ -211,9 +337,14 @@ const struct file_operations nfs4_file_operations = { .flock = nfs_flock, .splice_read = nfs_file_splice_read, .splice_write = iter_file_splice_write, -#ifdef CONFIG_NFS_V4_2 - .fallocate = nfs42_fallocate, -#endif /* CONFIG_NFS_V4_2 */ .check_flags = nfs_check_flags, .setlease = simple_nosetlease, +#ifdef CONFIG_NFS_V4_2 + .llseek = nfs4_file_llseek, + .fallocate = nfs42_fallocate, + .unlocked_ioctl = nfs4_ioctl, + .compat_ioctl = nfs4_ioctl, +#else + .llseek = nfs_file_llseek, +#endif }; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0e5ff69455c7..89818036f035 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -78,7 +78,6 @@ struct nfs4_opendata; static int _nfs4_proc_open(struct nfs4_opendata *data); static int _nfs4_recover_proc_open(struct nfs4_opendata *data); static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); -static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *, long *); static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr); static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *label); static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label); @@ -239,6 +238,7 @@ const u32 nfs4_fsinfo_bitmap[3] = { FATTR4_WORD0_MAXFILESIZE FATTR4_WORD1_TIME_DELTA | FATTR4_WORD1_FS_LAYOUT_TYPES, FATTR4_WORD2_LAYOUT_BLKSIZE + | FATTR4_WORD2_CLONE_BLKSIZE }; const u32 nfs4_fs_locations_bitmap[3] = { @@ -344,13 +344,16 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) /* This is the error handling routine for processes that are allowed * to sleep. */ -int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception) +static int nfs4_do_handle_exception(struct nfs_server *server, + int errorcode, struct nfs4_exception *exception) { struct nfs_client *clp = server->nfs_client; struct nfs4_state *state = exception->state; struct inode *inode = exception->inode; int ret = errorcode; + exception->delay = 0; + exception->recovering = 0; exception->retry = 0; switch(errorcode) { case 0: @@ -359,11 +362,9 @@ int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_ case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_BAD_STATEID: - if (inode && nfs4_have_delegation(inode, FMODE_READ)) { - nfs4_inode_return_delegation(inode); - exception->retry = 1; - return 0; - } + if (inode && nfs_async_inode_return_delegation(inode, + NULL) == 0) + goto wait_on_recovery; if (state == NULL) break; ret = nfs4_schedule_stateid_recovery(server, state); @@ -409,11 +410,12 @@ int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_ ret = -EBUSY; break; } - case -NFS4ERR_GRACE: case -NFS4ERR_DELAY: - ret = nfs4_delay(server->client, &exception->timeout); - if (ret != 0) - break; + nfs_inc_server_stats(server, NFSIOS_DELAY); + case -NFS4ERR_GRACE: + exception->delay = 1; + return 0; + case -NFS4ERR_RETRY_UNCACHED_REP: case -NFS4ERR_OLD_STATEID: exception->retry = 1; @@ -434,14 +436,85 @@ int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_ /* We failed to handle the error */ return nfs4_map_errors(ret); wait_on_recovery: - ret = nfs4_wait_clnt_recover(clp); + exception->recovering = 1; + return 0; +} + +/* This is the error handling routine for processes that are allowed + * to sleep. + */ +int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception) +{ + struct nfs_client *clp = server->nfs_client; + int ret; + + ret = nfs4_do_handle_exception(server, errorcode, exception); + if (exception->delay) { + ret = nfs4_delay(server->client, &exception->timeout); + goto out_retry; + } + if (exception->recovering) { + ret = nfs4_wait_clnt_recover(clp); + if (test_bit(NFS_MIG_FAILED, &server->mig_status)) + return -EIO; + goto out_retry; + } + return ret; +out_retry: + if (ret == 0) + exception->retry = 1; + return ret; +} + +static int +nfs4_async_handle_exception(struct rpc_task *task, struct nfs_server *server, + int errorcode, struct nfs4_exception *exception) +{ + struct nfs_client *clp = server->nfs_client; + int ret; + + ret = nfs4_do_handle_exception(server, errorcode, exception); + if (exception->delay) { + rpc_delay(task, nfs4_update_delay(&exception->timeout)); + goto out_retry; + } + if (exception->recovering) { + rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL); + if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0) + rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task); + goto out_retry; + } if (test_bit(NFS_MIG_FAILED, &server->mig_status)) - return -EIO; + ret = -EIO; + return ret; +out_retry: if (ret == 0) exception->retry = 1; return ret; } +static int +nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server, + struct nfs4_state *state, long *timeout) +{ + struct nfs4_exception exception = { + .state = state, + }; + + if (task->tk_status >= 0) + return 0; + if (timeout) + exception.timeout = *timeout; + task->tk_status = nfs4_async_handle_exception(task, server, + task->tk_status, + &exception); + if (exception.delay && timeout) + *timeout = exception.timeout; + if (exception.retry) + return -EAGAIN; + return 0; +} + /* * Return 'true' if 'clp' is using an rpc_client that is integrity protected * or 'false' otherwise. @@ -4530,7 +4603,7 @@ static inline int nfs4_server_supports_acls(struct nfs_server *server) #define NFS4ACL_MAXPAGES DIV_ROUND_UP(XATTR_SIZE_MAX, PAGE_SIZE) static int buf_to_pages_noslab(const void *buf, size_t buflen, - struct page **pages, unsigned int *pgbase) + struct page **pages) { struct page *newpage, **spages; int rc = 0; @@ -4674,7 +4747,6 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu goto out_free; args.acl_len = npages * PAGE_SIZE; - args.acl_pgbase = 0; dprintk("%s buf %p buflen %zu npages %d args.acl_len %zu\n", __func__, buf, buflen, npages, args.acl_len); @@ -4766,7 +4838,7 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl return -EOPNOTSUPP; if (npages > ARRAY_SIZE(pages)) return -ERANGE; - i = buf_to_pages_noslab(buf, buflen, arg.acl_pages, &arg.acl_pgbase); + i = buf_to_pages_noslab(buf, buflen, arg.acl_pages); if (i < 0) return i; nfs4_inode_return_delegation(inode); @@ -4955,79 +5027,6 @@ out: #endif /* CONFIG_NFS_V4_SECURITY_LABEL */ -static int -nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, - struct nfs4_state *state, long *timeout) -{ - struct nfs_client *clp = server->nfs_client; - - if (task->tk_status >= 0) - return 0; - switch(task->tk_status) { - case -NFS4ERR_DELEG_REVOKED: - case -NFS4ERR_ADMIN_REVOKED: - case -NFS4ERR_BAD_STATEID: - case -NFS4ERR_OPENMODE: - if (state == NULL) - break; - if (nfs4_schedule_stateid_recovery(server, state) < 0) - goto recovery_failed; - goto wait_on_recovery; - case -NFS4ERR_EXPIRED: - if (state != NULL) { - if (nfs4_schedule_stateid_recovery(server, state) < 0) - goto recovery_failed; - } - case -NFS4ERR_STALE_STATEID: - case -NFS4ERR_STALE_CLIENTID: - nfs4_schedule_lease_recovery(clp); - goto wait_on_recovery; - case -NFS4ERR_MOVED: - if (nfs4_schedule_migration_recovery(server) < 0) - goto recovery_failed; - goto wait_on_recovery; - case -NFS4ERR_LEASE_MOVED: - nfs4_schedule_lease_moved_recovery(clp); - goto wait_on_recovery; -#if defined(CONFIG_NFS_V4_1) - case -NFS4ERR_BADSESSION: - case -NFS4ERR_BADSLOT: - case -NFS4ERR_BAD_HIGH_SLOT: - case -NFS4ERR_DEADSESSION: - case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: - case -NFS4ERR_SEQ_FALSE_RETRY: - case -NFS4ERR_SEQ_MISORDERED: - dprintk("%s ERROR %d, Reset session\n", __func__, - task->tk_status); - nfs4_schedule_session_recovery(clp->cl_session, task->tk_status); - goto wait_on_recovery; -#endif /* CONFIG_NFS_V4_1 */ - case -NFS4ERR_DELAY: - nfs_inc_server_stats(server, NFSIOS_DELAY); - rpc_delay(task, nfs4_update_delay(timeout)); - goto restart_call; - case -NFS4ERR_GRACE: - rpc_delay(task, NFS4_POLL_RETRY_MAX); - case -NFS4ERR_RETRY_UNCACHED_REP: - case -NFS4ERR_OLD_STATEID: - goto restart_call; - } - task->tk_status = nfs4_map_errors(task->tk_status); - return 0; -recovery_failed: - task->tk_status = -EIO; - return 0; -wait_on_recovery: - rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL); - if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0) - rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task); - if (test_bit(NFS_MIG_FAILED, &server->mig_status)) - goto recovery_failed; -restart_call: - task->tk_status = 0; - return -EAGAIN; -} - static void nfs4_init_boot_verifier(const struct nfs_client *clp, nfs4_verifier *bootverf) { @@ -5522,7 +5521,7 @@ struct nfs4_unlockdata { struct nfs4_lock_state *lsp; struct nfs_open_context *ctx; struct file_lock fl; - const struct nfs_server *server; + struct nfs_server *server; unsigned long timestamp; }; @@ -6249,9 +6248,10 @@ nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_state *lsp) #define XATTR_NAME_NFSV4_ACL "system.nfs4_acl" -static int nfs4_xattr_set_nfs4_acl(struct dentry *dentry, const char *key, +static int nfs4_xattr_set_nfs4_acl(const struct xattr_handler *handler, + struct dentry *dentry, const char *key, const void *buf, size_t buflen, - int flags, int type) + int flags) { if (strcmp(key, "") != 0) return -EINVAL; @@ -6259,8 +6259,9 @@ static int nfs4_xattr_set_nfs4_acl(struct dentry *dentry, const char *key, return nfs4_proc_set_acl(d_inode(dentry), buf, buflen); } -static int nfs4_xattr_get_nfs4_acl(struct dentry *dentry, const char *key, - void *buf, size_t buflen, int type) +static int nfs4_xattr_get_nfs4_acl(const struct xattr_handler *handler, + struct dentry *dentry, const char *key, + void *buf, size_t buflen) { if (strcmp(key, "") != 0) return -EINVAL; @@ -6268,9 +6269,10 @@ static int nfs4_xattr_get_nfs4_acl(struct dentry *dentry, const char *key, return nfs4_proc_get_acl(d_inode(dentry), buf, buflen); } -static size_t nfs4_xattr_list_nfs4_acl(struct dentry *dentry, char *list, +static size_t nfs4_xattr_list_nfs4_acl(const struct xattr_handler *handler, + struct dentry *dentry, char *list, size_t list_len, const char *name, - size_t name_len, int type) + size_t name_len) { size_t len = sizeof(XATTR_NAME_NFSV4_ACL); @@ -6288,9 +6290,10 @@ static inline int nfs4_server_supports_labels(struct nfs_server *server) return server->caps & NFS_CAP_SECURITY_LABEL; } -static int nfs4_xattr_set_nfs4_label(struct dentry *dentry, const char *key, - const void *buf, size_t buflen, - int flags, int type) +static int nfs4_xattr_set_nfs4_label(const struct xattr_handler *handler, + struct dentry *dentry, const char *key, + const void *buf, size_t buflen, + int flags) { if (security_ismaclabel(key)) return nfs4_set_security_label(dentry, buf, buflen); @@ -6298,17 +6301,19 @@ static int nfs4_xattr_set_nfs4_label(struct dentry *dentry, const char *key, return -EOPNOTSUPP; } -static int nfs4_xattr_get_nfs4_label(struct dentry *dentry, const char *key, - void *buf, size_t buflen, int type) +static int nfs4_xattr_get_nfs4_label(const struct xattr_handler *handler, + struct dentry *dentry, const char *key, + void *buf, size_t buflen) { if (security_ismaclabel(key)) return nfs4_get_security_label(d_inode(dentry), buf, buflen); return -EOPNOTSUPP; } -static size_t nfs4_xattr_list_nfs4_label(struct dentry *dentry, char *list, - size_t list_len, const char *name, - size_t name_len, int type) +static size_t nfs4_xattr_list_nfs4_label(const struct xattr_handler *handler, + struct dentry *dentry, char *list, + size_t list_len, const char *name, + size_t name_len) { size_t len = 0; @@ -7861,7 +7866,7 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) spin_unlock(&inode->i_lock); goto out_restart; } - if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN) + if (nfs4_async_handle_error(task, server, state, &lgp->timeout) == -EAGAIN) goto out_restart; out: dprintk("<-- %s\n", __func__); @@ -8718,7 +8723,8 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { | NFS_CAP_ALLOCATE | NFS_CAP_DEALLOCATE | NFS_CAP_SEEK - | NFS_CAP_LAYOUTSTATS, + | NFS_CAP_LAYOUTSTATS + | NFS_CAP_CLONE, .init_client = nfs41_init_client, .shutdown_client = nfs41_shutdown_client, .match_stateid = nfs41_match_stateid, diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 788adf3897c7..4e4441216804 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1659,7 +1659,7 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compoun *p = cpu_to_be32(FATTR4_WORD0_ACL); p = reserve_space(xdr, 4); *p = cpu_to_be32(arg->acl_len); - xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len); + xdr_write_pages(xdr, arg->acl_pages, 0, arg->acl_len); } static void @@ -2491,7 +2491,7 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr, encode_getattr_two(xdr, FATTR4_WORD0_ACL, 0, &hdr); xdr_inline_pages(&req->rq_rcv_buf, replen << 2, - args->acl_pages, args->acl_pgbase, args->acl_len); + args->acl_pages, 0, args->acl_len); encode_nops(&hdr); } @@ -3615,6 +3615,7 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st status = 0; if (unlikely(!(bitmap[0] & FATTR4_WORD0_FS_LOCATIONS))) goto out; + bitmap[0] &= ~FATTR4_WORD0_FS_LOCATIONS; status = -EIO; /* Ignore borken servers that return unrequested attrs */ if (unlikely(res == NULL)) @@ -4375,6 +4376,11 @@ static int decode_statfs(struct xdr_stream *xdr, struct nfs_fsstat *fsstat) goto xdr_error; if ((status = decode_attr_files_total(xdr, bitmap, &fsstat->tfiles)) != 0) goto xdr_error; + + status = -EIO; + if (unlikely(bitmap[0])) + goto xdr_error; + if ((status = decode_attr_space_avail(xdr, bitmap, &fsstat->abytes)) != 0) goto xdr_error; if ((status = decode_attr_space_free(xdr, bitmap, &fsstat->fbytes)) != 0) @@ -4574,6 +4580,10 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, goto xdr_error; fattr->valid |= status; + status = -EIO; + if (unlikely(bitmap[0])) + goto xdr_error; + status = decode_attr_mode(xdr, bitmap, &fmode); if (status < 0) goto xdr_error; @@ -4627,6 +4637,10 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, goto xdr_error; fattr->valid |= status; + status = -EIO; + if (unlikely(bitmap[1])) + goto xdr_error; + status = decode_attr_mdsthreshold(xdr, bitmap, fattr->mdsthreshold); if (status < 0) goto xdr_error; @@ -4764,6 +4778,28 @@ static int decode_attr_layout_blksize(struct xdr_stream *xdr, uint32_t *bitmap, return 0; } +/* + * The granularity of a CLONE operation. + */ +static int decode_attr_clone_blksize(struct xdr_stream *xdr, uint32_t *bitmap, + uint32_t *res) +{ + __be32 *p; + + dprintk("%s: bitmap is %x\n", __func__, bitmap[2]); + *res = 0; + if (bitmap[2] & FATTR4_WORD2_CLONE_BLKSIZE) { + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) { + print_overflow_msg(__func__, xdr); + return -EIO; + } + *res = be32_to_cpup(p); + bitmap[2] &= ~FATTR4_WORD2_CLONE_BLKSIZE; + } + return 0; +} + static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) { unsigned int savep; @@ -4789,15 +4825,28 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) if ((status = decode_attr_maxwrite(xdr, bitmap, &fsinfo->wtmax)) != 0) goto xdr_error; fsinfo->wtpref = fsinfo->wtmax; + + status = -EIO; + if (unlikely(bitmap[0])) + goto xdr_error; + status = decode_attr_time_delta(xdr, bitmap, &fsinfo->time_delta); if (status != 0) goto xdr_error; status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->layouttype); if (status != 0) goto xdr_error; + + status = -EIO; + if (unlikely(bitmap[1])) + goto xdr_error; + status = decode_attr_layout_blksize(xdr, bitmap, &fsinfo->blksize); if (status) goto xdr_error; + status = decode_attr_clone_blksize(xdr, bitmap, &fsinfo->clone_blksize); + if (status) + goto xdr_error; status = verify_attr_len(xdr, savep, attrlen); xdr_error: @@ -7465,6 +7514,7 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(ALLOCATE, enc_allocate, dec_allocate), PROC(DEALLOCATE, enc_deallocate, dec_deallocate), PROC(LAYOUTSTATS, enc_layoutstats, dec_layoutstats), + PROC(CLONE, enc_clone, dec_clone), #endif /* CONFIG_NFS_V4_2 */ }; diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index 9bc9f04fb7f6..89a15dbe5efc 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -90,7 +90,7 @@ #define NFS_DEF_OPTIONS "vers=2,udp,rsize=4096,wsize=4096" /* Parameters passed from the kernel command line */ -static char nfs_root_parms[256] __initdata = ""; +static char nfs_root_parms[NFS_MAXPATHLEN + 1] __initdata = ""; /* Text-based mount options passed to super.c */ static char nfs_root_options[256] __initdata = NFS_DEF_OPTIONS; diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 5aaed363556a..5c0c6b58157f 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -124,7 +124,7 @@ objio_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, retry_lookup: od = osduld_info_lookup(&odi); - if (unlikely(IS_ERR(od))) { + if (IS_ERR(od)) { err = PTR_ERR(od); dprintk("%s: osduld_info_lookup => %d\n", __func__, err); if (err == -ENODEV && retry_flag) { diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 8abe27165ad0..5a8ae2125b50 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -872,33 +872,38 @@ send_layoutget(struct pnfs_layout_hdr *lo, dprintk("--> %s\n", __func__); - lgp = kzalloc(sizeof(*lgp), gfp_flags); - if (lgp == NULL) - return NULL; + /* + * Synchronously retrieve layout information from server and + * store in lseg. If we race with a concurrent seqid morphing + * op, then re-send the LAYOUTGET. + */ + do { + lgp = kzalloc(sizeof(*lgp), gfp_flags); + if (lgp == NULL) + return NULL; + + i_size = i_size_read(ino); + + lgp->args.minlength = PAGE_CACHE_SIZE; + if (lgp->args.minlength > range->length) + lgp->args.minlength = range->length; + if (range->iomode == IOMODE_READ) { + if (range->offset >= i_size) + lgp->args.minlength = 0; + else if (i_size - range->offset < lgp->args.minlength) + lgp->args.minlength = i_size - range->offset; + } + lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; + lgp->args.range = *range; + lgp->args.type = server->pnfs_curr_ld->id; + lgp->args.inode = ino; + lgp->args.ctx = get_nfs_open_context(ctx); + lgp->gfp_flags = gfp_flags; + lgp->cred = lo->plh_lc_cred; - i_size = i_size_read(ino); + lseg = nfs4_proc_layoutget(lgp, gfp_flags); + } while (lseg == ERR_PTR(-EAGAIN)); - lgp->args.minlength = PAGE_CACHE_SIZE; - if (lgp->args.minlength > range->length) - lgp->args.minlength = range->length; - if (range->iomode == IOMODE_READ) { - if (range->offset >= i_size) - lgp->args.minlength = 0; - else if (i_size - range->offset < lgp->args.minlength) - lgp->args.minlength = i_size - range->offset; - } - lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; - lgp->args.range = *range; - lgp->args.type = server->pnfs_curr_ld->id; - lgp->args.inode = ino; - lgp->args.ctx = get_nfs_open_context(ctx); - lgp->gfp_flags = gfp_flags; - lgp->cred = lo->plh_lc_cred; - - /* Synchronously retrieve layout information from server and - * store in lseg. - */ - lseg = nfs4_proc_layoutget(lgp, gfp_flags); if (IS_ERR(lseg)) { switch (PTR_ERR(lseg)) { case -ENOMEM: @@ -1687,6 +1692,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) /* existing state ID, make sure the sequence number matches. */ if (pnfs_layout_stateid_blocked(lo, &res->stateid)) { dprintk("%s forget reply due to sequence\n", __func__); + status = -EAGAIN; goto out_forget_reply; } pnfs_set_layout_stateid(lo, &res->stateid, false); @@ -1912,12 +1918,13 @@ static void pnfs_ld_handle_write_error(struct nfs_pgio_header *hdr) */ void pnfs_ld_write_done(struct nfs_pgio_header *hdr) { - trace_nfs4_pnfs_write(hdr, hdr->pnfs_error); - if (!hdr->pnfs_error) { + if (likely(!hdr->pnfs_error)) { pnfs_set_layoutcommit(hdr->inode, hdr->lseg, hdr->mds_offset + hdr->res.count); hdr->mds_ops->rpc_call_done(&hdr->task, hdr); - } else + } + trace_nfs4_pnfs_write(hdr, hdr->pnfs_error); + if (unlikely(hdr->pnfs_error)) pnfs_ld_handle_write_error(hdr); hdr->mds_ops->rpc_release(hdr); } @@ -2028,11 +2035,12 @@ static void pnfs_ld_handle_read_error(struct nfs_pgio_header *hdr) */ void pnfs_ld_read_done(struct nfs_pgio_header *hdr) { - trace_nfs4_pnfs_read(hdr, hdr->pnfs_error); if (likely(!hdr->pnfs_error)) { __nfs4_read_done_cb(hdr); hdr->mds_ops->rpc_call_done(&hdr->task, hdr); - } else + } + trace_nfs4_pnfs_read(hdr, hdr->pnfs_error); + if (unlikely(hdr->pnfs_error)) pnfs_ld_handle_read_error(hdr); hdr->mds_ops->rpc_release(hdr); } diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 01b8cc8e8cfc..0a5e33f33b5c 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -246,6 +246,13 @@ static void nfs_readpage_retry(struct rpc_task *task, nfs_set_pgio_error(hdr, -EIO, argp->offset); return; } + + /* For non rpc-based layout drivers, retry-through-MDS */ + if (!task->tk_ops) { + hdr->pnfs_error = -EAGAIN; + return; + } + /* Yes, so retry the read at the end of the hdr */ hdr->mds_offset += resp->count; argp->offset += resp->count; @@ -268,7 +275,7 @@ static void nfs_readpage_result(struct rpc_task *task, hdr->good_bytes = bound - hdr->io_start; } spin_unlock(&hdr->lock); - } else if (hdr->res.count != hdr->args.count) + } else if (hdr->res.count < hdr->args.count) nfs_readpage_retry(task, hdr); } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 383a027de452..f1268280244e 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2816,7 +2816,6 @@ out_invalid_transport_udp: * NFS client for backwards compatibility */ unsigned int nfs_callback_set_tcpport; -unsigned short nfs_callback_tcpport; /* Default cache timeout is 10 minutes */ unsigned int nfs_idmap_cache_timeout = 600; /* Turn off NFSv4 uid/gid mapping when using AUTH_SYS */ @@ -2827,7 +2826,6 @@ char nfs4_client_id_uniquifier[NFS4_CLIENT_ID_UNIQ_LEN] = ""; bool recover_lost_locks = false; EXPORT_SYMBOL_GPL(nfs_callback_set_tcpport); -EXPORT_SYMBOL_GPL(nfs_callback_tcpport); EXPORT_SYMBOL_GPL(nfs_idmap_cache_timeout); EXPORT_SYMBOL_GPL(nfs4_disable_idmapping); EXPORT_SYMBOL_GPL(max_session_slots); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 75ab7622e0cc..7b9316406930 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1505,6 +1505,13 @@ static void nfs_writeback_result(struct rpc_task *task, task->tk_status = -EIO; return; } + + /* For non rpc-based layout drivers, retry-through-MDS */ + if (!task->tk_ops) { + hdr->pnfs_error = -EAGAIN; + return; + } + /* Was this an NFSv2 write or an NFSv3 stable write? */ if (resp->verf->committed != NFS_UNSTABLE) { /* Resend from where the server left off */ diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index f6e7cbabac5a..00575d776d91 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -262,11 +262,11 @@ void fill_post_wcc(struct svc_fh *fhp) err = fh_getattr(fhp, &fhp->fh_post_attr); fhp->fh_post_change = d_inode(fhp->fh_dentry)->i_version; if (err) { - fhp->fh_post_saved = 0; + fhp->fh_post_saved = false; /* Grab the ctime anyway - set_change_info might use it */ fhp->fh_post_attr.ctime = d_inode(fhp->fh_dentry)->i_ctime; } else - fhp->fh_post_saved = 1; + fhp->fh_post_saved = true; } /* diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index ebf90e487c75..9ffef06b30d5 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -201,6 +201,7 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate, INIT_LIST_HEAD(&ls->ls_perfile); spin_lock_init(&ls->ls_lock); INIT_LIST_HEAD(&ls->ls_layouts); + mutex_init(&ls->ls_mutex); ls->ls_layout_type = layout_type; nfsd4_init_cb(&ls->ls_recall, clp, &nfsd4_cb_layout_ops, NFSPROC4_CLNT_CB_LAYOUT); @@ -262,19 +263,23 @@ nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp, status = nfserr_jukebox; if (!ls) goto out; + mutex_lock(&ls->ls_mutex); } else { ls = container_of(stid, struct nfs4_layout_stateid, ls_stid); status = nfserr_bad_stateid; + mutex_lock(&ls->ls_mutex); if (stateid->si_generation > stid->sc_stateid.si_generation) - goto out_put_stid; + goto out_unlock_stid; if (layout_type != ls->ls_layout_type) - goto out_put_stid; + goto out_unlock_stid; } *lsp = ls; return 0; +out_unlock_stid: + mutex_unlock(&ls->ls_mutex); out_put_stid: nfs4_put_stid(stid); out: @@ -296,8 +301,6 @@ nfsd4_recall_file_layout(struct nfs4_layout_stateid *ls) trace_layout_recall(&ls->ls_stid.sc_stateid); atomic_inc(&ls->ls_stid.sc_count); - update_stateid(&ls->ls_stid.sc_stateid); - memcpy(&ls->ls_recall_sid, &ls->ls_stid.sc_stateid, sizeof(stateid_t)); nfsd4_run_cb(&ls->ls_recall); out_unlock: @@ -406,8 +409,7 @@ nfsd4_insert_layout(struct nfsd4_layoutget *lgp, struct nfs4_layout_stateid *ls) list_add_tail(&new->lo_perstate, &ls->ls_layouts); new = NULL; done: - update_stateid(&ls->ls_stid.sc_stateid); - memcpy(&lgp->lg_sid, &ls->ls_stid.sc_stateid, sizeof(stateid_t)); + nfs4_inc_and_copy_stateid(&lgp->lg_sid, &ls->ls_stid); spin_unlock(&ls->ls_lock); out: spin_unlock(&fp->fi_lock); @@ -481,11 +483,8 @@ nfsd4_return_file_layouts(struct svc_rqst *rqstp, } } if (!list_empty(&ls->ls_layouts)) { - if (found) { - update_stateid(&ls->ls_stid.sc_stateid); - memcpy(&lrp->lr_sid, &ls->ls_stid.sc_stateid, - sizeof(stateid_t)); - } + if (found) + nfs4_inc_and_copy_stateid(&lrp->lr_sid, &ls->ls_stid); lrp->lrs_present = 1; } else { trace_layoutstate_unhash(&ls->ls_stid.sc_stateid); @@ -494,6 +493,7 @@ nfsd4_return_file_layouts(struct svc_rqst *rqstp, } spin_unlock(&ls->ls_lock); + mutex_unlock(&ls->ls_mutex); nfs4_put_stid(&ls->ls_stid); nfsd4_free_layouts(&reaplist); return nfs_ok; @@ -608,6 +608,16 @@ nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls) } } +static void +nfsd4_cb_layout_prepare(struct nfsd4_callback *cb) +{ + struct nfs4_layout_stateid *ls = + container_of(cb, struct nfs4_layout_stateid, ls_recall); + + mutex_lock(&ls->ls_mutex); + nfs4_inc_and_copy_stateid(&ls->ls_recall_sid, &ls->ls_stid); +} + static int nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task) { @@ -649,12 +659,14 @@ nfsd4_cb_layout_release(struct nfsd4_callback *cb) trace_layout_recall_release(&ls->ls_stid.sc_stateid); + mutex_unlock(&ls->ls_mutex); nfsd4_return_all_layouts(ls, &reaplist); nfsd4_free_layouts(&reaplist); nfs4_put_stid(&ls->ls_stid); } static struct nfsd4_callback_ops nfsd4_cb_layout_ops = { + .prepare = nfsd4_cb_layout_prepare, .done = nfsd4_cb_layout_done, .release = nfsd4_cb_layout_release, }; diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 4ce6b97b31ad..a9f096c7e99f 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1309,6 +1309,7 @@ nfsd4_layoutget(struct svc_rqst *rqstp, nfserr = nfsd4_insert_layout(lgp, ls); out_put_stid: + mutex_unlock(&ls->ls_mutex); nfs4_put_stid(&ls->ls_stid); out: return nfserr; @@ -1362,6 +1363,9 @@ nfsd4_layoutcommit(struct svc_rqst *rqstp, goto out; } + /* LAYOUTCOMMIT does not require any serialization */ + mutex_unlock(&ls->ls_mutex); + if (new_size > i_size_read(inode)) { lcp->lc_size_chg = 1; lcp->lc_newsize = new_size; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 0f1d5691b795..6b800b5b8fed 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -575,6 +575,7 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, stid->sc_stateid.si_opaque.so_clid = cl->cl_clientid; /* Will be incremented before return to client: */ atomic_set(&stid->sc_count, 1); + spin_lock_init(&stid->sc_lock); /* * It shouldn't be a problem to reuse an opaque stateid value. @@ -745,6 +746,18 @@ nfs4_put_stid(struct nfs4_stid *s) put_nfs4_file(fp); } +void +nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid) +{ + stateid_t *src = &stid->sc_stateid; + + spin_lock(&stid->sc_lock); + if (unlikely(++src->si_generation == 0)) + src->si_generation = 1; + memcpy(dst, src, sizeof(*dst)); + spin_unlock(&stid->sc_lock); +} + static void nfs4_put_deleg_lease(struct nfs4_file *fp) { struct file *filp = NULL; @@ -765,16 +778,68 @@ void nfs4_unhash_stid(struct nfs4_stid *s) s->sc_type = 0; } -static void +/** + * nfs4_get_existing_delegation - Discover if this delegation already exists + * @clp: a pointer to the nfs4_client we're granting a delegation to + * @fp: a pointer to the nfs4_file we're granting a delegation on + * + * Return: + * On success: NULL if an existing delegation was not found. + * + * On error: -EAGAIN if one was previously granted to this nfs4_client + * for this nfs4_file. + * + */ + +static int +nfs4_get_existing_delegation(struct nfs4_client *clp, struct nfs4_file *fp) +{ + struct nfs4_delegation *searchdp = NULL; + struct nfs4_client *searchclp = NULL; + + lockdep_assert_held(&state_lock); + lockdep_assert_held(&fp->fi_lock); + + list_for_each_entry(searchdp, &fp->fi_delegations, dl_perfile) { + searchclp = searchdp->dl_stid.sc_client; + if (clp == searchclp) { + return -EAGAIN; + } + } + return 0; +} + +/** + * hash_delegation_locked - Add a delegation to the appropriate lists + * @dp: a pointer to the nfs4_delegation we are adding. + * @fp: a pointer to the nfs4_file we're granting a delegation on + * + * Return: + * On success: NULL if the delegation was successfully hashed. + * + * On error: -EAGAIN if one was previously granted to this + * nfs4_client for this nfs4_file. Delegation is not hashed. + * + */ + +static int hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp) { + int status; + struct nfs4_client *clp = dp->dl_stid.sc_client; + lockdep_assert_held(&state_lock); lockdep_assert_held(&fp->fi_lock); + status = nfs4_get_existing_delegation(clp, fp); + if (status) + return status; + ++fp->fi_delegees; atomic_inc(&dp->dl_stid.sc_count); dp->dl_stid.sc_type = NFS4_DELEG_STID; list_add(&dp->dl_perfile, &fp->fi_delegations); - list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); + list_add(&dp->dl_perclnt, &clp->cl_delegations); + return 0; } static bool @@ -2256,15 +2321,20 @@ nfsd4_set_ex_flags(struct nfs4_client *new, struct nfsd4_exchange_id *clid) clid->flags = new->cl_exchange_flags; } +static bool client_has_openowners(struct nfs4_client *clp) +{ + struct nfs4_openowner *oo; + + list_for_each_entry(oo, &clp->cl_openowners, oo_perclient) { + if (!list_empty(&oo->oo_owner.so_stateids)) + return true; + } + return false; +} + static bool client_has_state(struct nfs4_client *clp) { - /* - * Note clp->cl_openowners check isn't quite right: there's no - * need to count owners without stateid's. - * - * Also note we should probably be using this in 4.0 case too. - */ - return !list_empty(&clp->cl_openowners) + return client_has_openowners(clp) #ifdef CONFIG_NFSD_PNFS || !list_empty(&clp->cl_lo_states) #endif @@ -3049,7 +3119,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, /* Cases below refer to rfc 3530 section 14.2.33: */ spin_lock(&nn->client_lock); conf = find_confirmed_client_by_name(&clname, nn); - if (conf) { + if (conf && client_has_state(conf)) { /* case 0: */ status = nfserr_clid_inuse; if (clp_used_exchangeid(conf)) @@ -3136,6 +3206,11 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, } else { /* case 3: normal case; new or rebooted client */ old = find_confirmed_client_by_name(&unconf->cl_name, nn); if (old) { + status = nfserr_clid_inuse; + if (client_has_state(old) + && !same_creds(&unconf->cl_cred, + &old->cl_cred)) + goto out; status = mark_client_expired_locked(old); if (status) { old = NULL; @@ -3317,6 +3392,27 @@ static const struct nfs4_stateowner_operations openowner_ops = { .so_free = nfs4_free_openowner, }; +static struct nfs4_ol_stateid * +nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open) +{ + struct nfs4_ol_stateid *local, *ret = NULL; + struct nfs4_openowner *oo = open->op_openowner; + + lockdep_assert_held(&fp->fi_lock); + + list_for_each_entry(local, &fp->fi_stateids, st_perfile) { + /* ignore lock owners */ + if (local->st_stateowner->so_is_open_owner == 0) + continue; + if (local->st_stateowner == &oo->oo_owner) { + ret = local; + atomic_inc(&ret->st_stid.sc_count); + break; + } + } + return ret; +} + static struct nfs4_openowner * alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open, struct nfsd4_compound_state *cstate) @@ -3348,9 +3444,20 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open, return ret; } -static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) { +static struct nfs4_ol_stateid * +init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, + struct nfsd4_open *open) +{ + struct nfs4_openowner *oo = open->op_openowner; + struct nfs4_ol_stateid *retstp = NULL; + spin_lock(&oo->oo_owner.so_client->cl_lock); + spin_lock(&fp->fi_lock); + + retstp = nfsd4_find_existing_open(fp, open); + if (retstp) + goto out_unlock; atomic_inc(&stp->st_stid.sc_count); stp->st_stid.sc_type = NFS4_OPEN_STID; INIT_LIST_HEAD(&stp->st_locks); @@ -3360,12 +3467,14 @@ static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, stp->st_access_bmap = 0; stp->st_deny_bmap = 0; stp->st_openstp = NULL; - spin_lock(&oo->oo_owner.so_client->cl_lock); + init_rwsem(&stp->st_rwsem); list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids); - spin_lock(&fp->fi_lock); list_add(&stp->st_perfile, &fp->fi_stateids); + +out_unlock: spin_unlock(&fp->fi_lock); spin_unlock(&oo->oo_owner.so_client->cl_lock); + return retstp; } /* @@ -3776,27 +3885,6 @@ out: return nfs_ok; } -static struct nfs4_ol_stateid * -nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open) -{ - struct nfs4_ol_stateid *local, *ret = NULL; - struct nfs4_openowner *oo = open->op_openowner; - - spin_lock(&fp->fi_lock); - list_for_each_entry(local, &fp->fi_stateids, st_perfile) { - /* ignore lock owners */ - if (local->st_stateowner->so_is_open_owner == 0) - continue; - if (local->st_stateowner == &oo->oo_owner) { - ret = local; - atomic_inc(&ret->st_stid.sc_count); - break; - } - } - spin_unlock(&fp->fi_lock); - return ret; -} - static inline int nfs4_access_to_access(u32 nfs4_access) { int flags = 0; @@ -3945,6 +4033,18 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_file *fp, int flag) return fl; } +/** + * nfs4_setlease - Obtain a delegation by requesting lease from vfs layer + * @dp: a pointer to the nfs4_delegation we're adding. + * + * Return: + * On success: Return code will be 0 on success. + * + * On error: -EAGAIN if there was an existing delegation. + * nonzero if there is an error in other cases. + * + */ + static int nfs4_setlease(struct nfs4_delegation *dp) { struct nfs4_file *fp = dp->dl_stid.sc_file; @@ -3976,16 +4076,19 @@ static int nfs4_setlease(struct nfs4_delegation *dp) goto out_unlock; /* Race breaker */ if (fp->fi_deleg_file) { - status = 0; - ++fp->fi_delegees; - hash_delegation_locked(dp, fp); + status = hash_delegation_locked(dp, fp); goto out_unlock; } fp->fi_deleg_file = filp; - fp->fi_delegees = 1; - hash_delegation_locked(dp, fp); + fp->fi_delegees = 0; + status = hash_delegation_locked(dp, fp); spin_unlock(&fp->fi_lock); spin_unlock(&state_lock); + if (status) { + /* Should never happen, this is a new fi_deleg_file */ + WARN_ON_ONCE(1); + goto out_fput; + } return 0; out_unlock: spin_unlock(&fp->fi_lock); @@ -4005,6 +4108,15 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, if (fp->fi_had_conflict) return ERR_PTR(-EAGAIN); + spin_lock(&state_lock); + spin_lock(&fp->fi_lock); + status = nfs4_get_existing_delegation(clp, fp); + spin_unlock(&fp->fi_lock); + spin_unlock(&state_lock); + + if (status) + return ERR_PTR(status); + dp = alloc_init_deleg(clp, fh, odstate); if (!dp) return ERR_PTR(-ENOMEM); @@ -4023,9 +4135,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, status = -EAGAIN; goto out_unlock; } - ++fp->fi_delegees; - hash_delegation_locked(dp, fp); - status = 0; + status = hash_delegation_locked(dp, fp); out_unlock: spin_unlock(&fp->fi_lock); spin_unlock(&state_lock); @@ -4160,6 +4270,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf struct nfs4_client *cl = open->op_openowner->oo_owner.so_client; struct nfs4_file *fp = NULL; struct nfs4_ol_stateid *stp = NULL; + struct nfs4_ol_stateid *swapstp = NULL; struct nfs4_delegation *dp = NULL; __be32 status; @@ -4173,7 +4284,9 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf status = nfs4_check_deleg(cl, open, &dp); if (status) goto out; + spin_lock(&fp->fi_lock); stp = nfsd4_find_existing_open(fp, open); + spin_unlock(&fp->fi_lock); } else { open->op_file = NULL; status = nfserr_bad_stateid; @@ -4187,15 +4300,32 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf */ if (stp) { /* Stateid was found, this is an OPEN upgrade */ + down_read(&stp->st_rwsem); status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open); - if (status) + if (status) { + up_read(&stp->st_rwsem); goto out; + } } else { stp = open->op_stp; open->op_stp = NULL; - init_open_stateid(stp, fp, open); + swapstp = init_open_stateid(stp, fp, open); + if (swapstp) { + nfs4_put_stid(&stp->st_stid); + stp = swapstp; + down_read(&stp->st_rwsem); + status = nfs4_upgrade_open(rqstp, fp, current_fh, + stp, open); + if (status) { + up_read(&stp->st_rwsem); + goto out; + } + goto upgrade_out; + } + down_read(&stp->st_rwsem); status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open); if (status) { + up_read(&stp->st_rwsem); release_open_stateid(stp); goto out; } @@ -4205,8 +4335,9 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf if (stp->st_clnt_odstate == open->op_odstate) open->op_odstate = NULL; } - update_stateid(&stp->st_stid.sc_stateid); - memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); +upgrade_out: + nfs4_inc_and_copy_stateid(&open->op_stateid, &stp->st_stid); + up_read(&stp->st_rwsem); if (nfsd4_has_session(&resp->cstate)) { if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) { @@ -4819,10 +4950,13 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_ * revoked delegations are kept only for free_stateid. */ return nfserr_bad_stateid; + down_write(&stp->st_rwsem); status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate)); - if (status) - return status; - return nfs4_check_fh(current_fh, &stp->st_stid); + if (status == nfs_ok) + status = nfs4_check_fh(current_fh, &stp->st_stid); + if (status != nfs_ok) + up_write(&stp->st_rwsem); + return status; } /* @@ -4869,6 +5003,7 @@ static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cs return status; oo = openowner(stp->st_stateowner); if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) { + up_write(&stp->st_rwsem); nfs4_put_stid(&stp->st_stid); return nfserr_bad_stateid; } @@ -4899,11 +5034,13 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; oo = openowner(stp->st_stateowner); status = nfserr_bad_stateid; - if (oo->oo_flags & NFS4_OO_CONFIRMED) + if (oo->oo_flags & NFS4_OO_CONFIRMED) { + up_write(&stp->st_rwsem); goto put_stateid; + } oo->oo_flags |= NFS4_OO_CONFIRMED; - update_stateid(&stp->st_stid.sc_stateid); - memcpy(&oc->oc_resp_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); + nfs4_inc_and_copy_stateid(&oc->oc_resp_stateid, &stp->st_stid); + up_write(&stp->st_rwsem); dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n", __func__, oc->oc_seqid, STATEID_VAL(&stp->st_stid.sc_stateid)); @@ -4975,13 +5112,11 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, goto put_stateid; } nfs4_stateid_downgrade(stp, od->od_share_access); - reset_union_bmap_deny(od->od_share_deny, stp); - - update_stateid(&stp->st_stid.sc_stateid); - memcpy(&od->od_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); + nfs4_inc_and_copy_stateid(&od->od_stateid, &stp->st_stid); status = nfs_ok; put_stateid: + up_write(&stp->st_rwsem); nfs4_put_stid(&stp->st_stid); out: nfsd4_bump_seqid(cstate, status); @@ -5033,8 +5168,8 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfsd4_bump_seqid(cstate, status); if (status) goto out; - update_stateid(&stp->st_stid.sc_stateid); - memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); + nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid); + up_write(&stp->st_rwsem); nfsd4_close_open_stateid(stp); @@ -5260,6 +5395,7 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo, stp->st_access_bmap = 0; stp->st_deny_bmap = open_stp->st_deny_bmap; stp->st_openstp = open_stp; + init_rwsem(&stp->st_rwsem); list_add(&stp->st_locks, &open_stp->st_locks); list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids); spin_lock(&fp->fi_lock); @@ -5428,6 +5564,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, &open_stp, nn); if (status) goto out; + up_write(&open_stp->st_rwsem); open_sop = openowner(open_stp->st_stateowner); status = nfserr_bad_stateid; if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid, @@ -5435,6 +5572,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; status = lookup_or_create_lock_state(cstate, open_stp, lock, &lock_stp, &new); + if (status == nfs_ok) + down_write(&lock_stp->st_rwsem); } else { status = nfs4_preprocess_seqid_op(cstate, lock->lk_old_lock_seqid, @@ -5512,9 +5651,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, err = vfs_lock_file(filp, F_SETLK, file_lock, conflock); switch (-err) { case 0: /* success! */ - update_stateid(&lock_stp->st_stid.sc_stateid); - memcpy(&lock->lk_resp_stateid, &lock_stp->st_stid.sc_stateid, - sizeof(stateid_t)); + nfs4_inc_and_copy_stateid(&lock->lk_resp_stateid, &lock_stp->st_stid); status = 0; break; case (EAGAIN): /* conflock holds conflicting lock */ @@ -5540,6 +5677,8 @@ out: seqid_mutating_err(ntohl(status))) lock_sop->lo_owner.so_seqid++; + up_write(&lock_stp->st_rwsem); + /* * If this is a new, never-before-used stateid, and we are * returning an error, then just go ahead and release it. @@ -5704,11 +5843,11 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, dprintk("NFSD: nfs4_locku: vfs_lock_file failed!\n"); goto out_nfserr; } - update_stateid(&stp->st_stid.sc_stateid); - memcpy(&locku->lu_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); + nfs4_inc_and_copy_stateid(&locku->lu_stateid, &stp->st_stid); fput: fput(filp); put_stateid: + up_write(&stp->st_rwsem); nfs4_put_stid(&stp->st_stid); out: nfsd4_bump_seqid(cstate, status); diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 46ec934f5dee..54cde9a5864e 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -63,7 +63,6 @@ static unsigned int longest_chain; static unsigned int longest_chain_cachesize; static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec); -static void cache_cleaner_func(struct work_struct *unused); static unsigned long nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc); static unsigned long nfsd_reply_cache_scan(struct shrinker *shrink, @@ -76,13 +75,6 @@ static struct shrinker nfsd_reply_cache_shrinker = { }; /* - * locking for the reply cache: - * A cache entry is "single use" if c_state == RC_INPROG - * Otherwise, it when accessing _prev or _next, the lock must be held. - */ -static DECLARE_DELAYED_WORK(cache_cleaner, cache_cleaner_func); - -/* * Put a cap on the size of the DRC based on the amount of available * low memory in the machine. * @@ -203,7 +195,6 @@ void nfsd_reply_cache_shutdown(void) unsigned int i; unregister_shrinker(&nfsd_reply_cache_shrinker); - cancel_delayed_work_sync(&cache_cleaner); for (i = 0; i < drc_hashsize; i++) { struct list_head *head = &drc_hashtbl[i].lru_head; @@ -217,10 +208,8 @@ void nfsd_reply_cache_shutdown(void) drc_hashtbl = NULL; drc_hashsize = 0; - if (drc_slab) { - kmem_cache_destroy(drc_slab); - drc_slab = NULL; - } + kmem_cache_destroy(drc_slab); + drc_slab = NULL; } /* @@ -232,7 +221,6 @@ lru_put_end(struct nfsd_drc_bucket *b, struct svc_cacherep *rp) { rp->c_timestamp = jiffies; list_move_tail(&rp->c_lru, &b->lru_head); - schedule_delayed_work(&cache_cleaner, RC_EXPIRE); } static long @@ -266,7 +254,6 @@ prune_cache_entries(void) { unsigned int i; long freed = 0; - bool cancel = true; for (i = 0; i < drc_hashsize; i++) { struct nfsd_drc_bucket *b = &drc_hashtbl[i]; @@ -275,26 +262,11 @@ prune_cache_entries(void) continue; spin_lock(&b->cache_lock); freed += prune_bucket(b); - if (!list_empty(&b->lru_head)) - cancel = false; spin_unlock(&b->cache_lock); } - - /* - * Conditionally rearm the job to run in RC_EXPIRE since we just - * ran the pruner. - */ - if (!cancel) - mod_delayed_work(system_wq, &cache_cleaner, RC_EXPIRE); return freed; } -static void -cache_cleaner_func(struct work_struct *unused) -{ - prune_cache_entries(); -} - static unsigned long nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc) { diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 350041a40fe5..c1681ce894c5 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -631,10 +631,7 @@ fh_put(struct svc_fh *fhp) fh_unlock(fhp); fhp->fh_dentry = NULL; dput(dentry); -#ifdef CONFIG_NFSD_V3 - fhp->fh_pre_saved = 0; - fhp->fh_post_saved = 0; -#endif + fh_clear_wcc(fhp); } fh_drop_write(fhp); if (exp) { diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index 1e90dad4926b..2087bae17582 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -26,16 +26,16 @@ static inline ino_t u32_to_ino_t(__u32 uino) */ typedef struct svc_fh { struct knfsd_fh fh_handle; /* FH data */ + int fh_maxsize; /* max size for fh_handle */ struct dentry * fh_dentry; /* validated dentry */ struct svc_export * fh_export; /* export pointer */ - int fh_maxsize; /* max size for fh_handle */ - unsigned char fh_locked; /* inode locked by us */ - unsigned char fh_want_write; /* remount protection taken */ + bool fh_locked; /* inode locked by us */ + bool fh_want_write; /* remount protection taken */ #ifdef CONFIG_NFSD_V3 - unsigned char fh_post_saved; /* post-op attrs saved */ - unsigned char fh_pre_saved; /* pre-op attrs saved */ + bool fh_post_saved; /* post-op attrs saved */ + bool fh_pre_saved; /* pre-op attrs saved */ /* Pre-op attributes saved during fh_lock */ __u64 fh_pre_size; /* size before operation */ @@ -213,8 +213,8 @@ static inline bool fh_fsid_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2) static inline void fh_clear_wcc(struct svc_fh *fhp) { - fhp->fh_post_saved = 0; - fhp->fh_pre_saved = 0; + fhp->fh_post_saved = false; + fhp->fh_pre_saved = false; } /* @@ -231,7 +231,7 @@ fill_pre_wcc(struct svc_fh *fhp) fhp->fh_pre_ctime = inode->i_ctime; fhp->fh_pre_size = inode->i_size; fhp->fh_pre_change = inode->i_version; - fhp->fh_pre_saved = 1; + fhp->fh_pre_saved = true; } } @@ -267,7 +267,7 @@ fh_lock_nested(struct svc_fh *fhp, unsigned int subclass) inode = d_inode(dentry); mutex_lock_nested(&inode->i_mutex, subclass); fill_pre_wcc(fhp); - fhp->fh_locked = 1; + fhp->fh_locked = true; } static inline void @@ -285,7 +285,7 @@ fh_unlock(struct svc_fh *fhp) if (fhp->fh_locked) { fill_post_wcc(fhp); mutex_unlock(&d_inode(fhp->fh_dentry)->i_mutex); - fhp->fh_locked = 0; + fhp->fh_locked = false; } } diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 583ffc13cae2..77fdf4de91ba 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -84,7 +84,7 @@ struct nfsd4_callback_ops { * fields that are of general use to any stateid. */ struct nfs4_stid { - atomic_t sc_count; + atomic_t sc_count; #define NFS4_OPEN_STID 1 #define NFS4_LOCK_STID 2 #define NFS4_DELEG_STID 4 @@ -94,11 +94,12 @@ struct nfs4_stid { #define NFS4_REVOKED_DELEG_STID 16 #define NFS4_CLOSED_DELEG_STID 32 #define NFS4_LAYOUT_STID 64 - unsigned char sc_type; - stateid_t sc_stateid; - struct nfs4_client *sc_client; - struct nfs4_file *sc_file; - void (*sc_free)(struct nfs4_stid *); + unsigned char sc_type; + stateid_t sc_stateid; + spinlock_t sc_lock; + struct nfs4_client *sc_client; + struct nfs4_file *sc_file; + void (*sc_free)(struct nfs4_stid *); }; /* @@ -364,15 +365,6 @@ struct nfs4_client_reclaim { char cr_recdir[HEXDIR_LEN]; /* recover dir */ }; -static inline void -update_stateid(stateid_t *stateid) -{ - stateid->si_generation++; - /* Wraparound recommendation from 3530bis-13 9.1.3.2: */ - if (stateid->si_generation == 0) - stateid->si_generation = 1; -} - /* A reasonable value for REPLAY_ISIZE was estimated as follows: * The OPEN response, typically the largest, requires * 4(status) + 8(stateid) + 20(changeinfo) + 4(rflags) + 8(verifier) + @@ -534,15 +526,16 @@ struct nfs4_file { * Better suggestions welcome. */ struct nfs4_ol_stateid { - struct nfs4_stid st_stid; /* must be first field */ - struct list_head st_perfile; - struct list_head st_perstateowner; - struct list_head st_locks; - struct nfs4_stateowner * st_stateowner; - struct nfs4_clnt_odstate * st_clnt_odstate; - unsigned char st_access_bmap; - unsigned char st_deny_bmap; - struct nfs4_ol_stateid * st_openstp; + struct nfs4_stid st_stid; + struct list_head st_perfile; + struct list_head st_perstateowner; + struct list_head st_locks; + struct nfs4_stateowner *st_stateowner; + struct nfs4_clnt_odstate *st_clnt_odstate; + unsigned char st_access_bmap; + unsigned char st_deny_bmap; + struct nfs4_ol_stateid *st_openstp; + struct rw_semaphore st_rwsem; }; static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s) @@ -561,6 +554,7 @@ struct nfs4_layout_stateid { struct nfsd4_callback ls_recall; stateid_t ls_recall_sid; bool ls_recalled; + struct mutex ls_mutex; }; static inline struct nfs4_layout_stateid *layoutstateid(struct nfs4_stid *s) @@ -593,6 +587,7 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab); void nfs4_unhash_stid(struct nfs4_stid *s); void nfs4_put_stid(struct nfs4_stid *s); +void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid); void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *, struct nfsd_net *); extern void nfs4_release_reclaim(struct nfsd_net *); extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir, diff --git a/fs/nfsd/trace.c b/fs/nfsd/trace.c index 82f89070594c..90967466a1e5 100644 --- a/fs/nfsd/trace.c +++ b/fs/nfsd/trace.c @@ -1,5 +1,3 @@ -#include "state.h" - #define CREATE_TRACE_POINTS #include "trace.h" diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index c668520c344b..0befe762762b 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -9,6 +9,8 @@ #include <linux/tracepoint.h> +#include "state.h" + DECLARE_EVENT_CLASS(nfsd_stateid_class, TP_PROTO(stateid_t *stp), TP_ARGS(stp), diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 45c04979e7b3..994d66fbb446 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1631,7 +1631,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, /* cannot use fh_lock as we need deadlock protective ordering * so do it by hand */ trap = lock_rename(tdentry, fdentry); - ffhp->fh_locked = tfhp->fh_locked = 1; + ffhp->fh_locked = tfhp->fh_locked = true; fill_pre_wcc(ffhp); fill_pre_wcc(tfhp); @@ -1681,7 +1681,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, fill_post_wcc(ffhp); fill_post_wcc(tfhp); unlock_rename(tdentry, fdentry); - ffhp->fh_locked = tfhp->fh_locked = 0; + ffhp->fh_locked = tfhp->fh_locked = false; fh_drop_write(ffhp); out: diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index fee2451ae248..fcfc48cbe136 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -112,14 +112,14 @@ static inline int fh_want_write(struct svc_fh *fh) int ret = mnt_want_write(fh->fh_export->ex_path.mnt); if (!ret) - fh->fh_want_write = 1; + fh->fh_want_write = true; return ret; } static inline void fh_drop_write(struct svc_fh *fh) { if (fh->fh_want_write) { - fh->fh_want_write = 0; + fh->fh_want_write = false; mnt_drop_write(fh->fh_export->ex_path.mnt); } } diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 9f991007a578..ce7362c88b48 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -632,7 +632,7 @@ static inline void set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp) { BUG_ON(!fhp->fh_pre_saved); - cinfo->atomic = fhp->fh_post_saved; + cinfo->atomic = (u32)fhp->fh_post_saved; cinfo->change_supported = IS_I_VERSION(d_inode(fhp->fh_dentry)); cinfo->before_change = fhp->fh_pre_change; diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c index 8df0f3b7839b..2ccbf5531554 100644 --- a/fs/nilfs2/alloc.c +++ b/fs/nilfs2/alloc.c @@ -133,38 +133,38 @@ nilfs_palloc_bitmap_blkoff(const struct inode *inode, unsigned long group) /** * nilfs_palloc_group_desc_nfrees - get the number of free entries in a group - * @inode: inode of metadata file using this allocator - * @group: group number * @desc: pointer to descriptor structure for the group + * @lock: spin lock protecting @desc */ static unsigned long -nilfs_palloc_group_desc_nfrees(struct inode *inode, unsigned long group, - const struct nilfs_palloc_group_desc *desc) +nilfs_palloc_group_desc_nfrees(const struct nilfs_palloc_group_desc *desc, + spinlock_t *lock) { unsigned long nfree; - spin_lock(nilfs_mdt_bgl_lock(inode, group)); + spin_lock(lock); nfree = le32_to_cpu(desc->pg_nfrees); - spin_unlock(nilfs_mdt_bgl_lock(inode, group)); + spin_unlock(lock); return nfree; } /** * nilfs_palloc_group_desc_add_entries - adjust count of free entries - * @inode: inode of metadata file using this allocator - * @group: group number * @desc: pointer to descriptor structure for the group + * @lock: spin lock protecting @desc * @n: delta to be added */ -static void -nilfs_palloc_group_desc_add_entries(struct inode *inode, - unsigned long group, - struct nilfs_palloc_group_desc *desc, - u32 n) +static u32 +nilfs_palloc_group_desc_add_entries(struct nilfs_palloc_group_desc *desc, + spinlock_t *lock, u32 n) { - spin_lock(nilfs_mdt_bgl_lock(inode, group)); + u32 nfree; + + spin_lock(lock); le32_add_cpu(&desc->pg_nfrees, n); - spin_unlock(nilfs_mdt_bgl_lock(inode, group)); + nfree = le32_to_cpu(desc->pg_nfrees); + spin_unlock(lock); + return nfree; } /** @@ -240,6 +240,26 @@ static int nilfs_palloc_get_block(struct inode *inode, unsigned long blkoff, } /** + * nilfs_palloc_delete_block - delete a block on the persistent allocator file + * @inode: inode of metadata file using this allocator + * @blkoff: block offset + * @prev: nilfs_bh_assoc struct of the last used buffer + * @lock: spin lock protecting @prev + */ +static int nilfs_palloc_delete_block(struct inode *inode, unsigned long blkoff, + struct nilfs_bh_assoc *prev, + spinlock_t *lock) +{ + spin_lock(lock); + if (prev->bh && blkoff == prev->blkoff) { + brelse(prev->bh); + prev->bh = NULL; + } + spin_unlock(lock); + return nilfs_mdt_delete_block(inode, blkoff); +} + +/** * nilfs_palloc_get_desc_block - get buffer head of a group descriptor block * @inode: inode of metadata file using this allocator * @group: group number @@ -278,6 +298,22 @@ static int nilfs_palloc_get_bitmap_block(struct inode *inode, } /** + * nilfs_palloc_delete_bitmap_block - delete a bitmap block + * @inode: inode of metadata file using this allocator + * @group: group number + */ +static int nilfs_palloc_delete_bitmap_block(struct inode *inode, + unsigned long group) +{ + struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache; + + return nilfs_palloc_delete_block(inode, + nilfs_palloc_bitmap_blkoff(inode, + group), + &cache->prev_bitmap, &cache->lock); +} + +/** * nilfs_palloc_get_entry_block - get buffer head of an entry block * @inode: inode of metadata file using this allocator * @nr: serial number of the entry (e.g. inode number) @@ -296,6 +332,20 @@ int nilfs_palloc_get_entry_block(struct inode *inode, __u64 nr, } /** + * nilfs_palloc_delete_entry_block - delete an entry block + * @inode: inode of metadata file using this allocator + * @nr: serial number of the entry + */ +static int nilfs_palloc_delete_entry_block(struct inode *inode, __u64 nr) +{ + struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache; + + return nilfs_palloc_delete_block(inode, + nilfs_palloc_entry_blkoff(inode, nr), + &cache->prev_entry, &cache->lock); +} + +/** * nilfs_palloc_block_get_group_desc - get kernel address of a group descriptor * @inode: inode of metadata file using this allocator * @group: group number @@ -332,51 +382,40 @@ void *nilfs_palloc_block_get_entry(const struct inode *inode, __u64 nr, /** * nilfs_palloc_find_available_slot - find available slot in a group - * @inode: inode of metadata file using this allocator - * @group: group number - * @target: offset number of an entry in the group (start point) * @bitmap: bitmap of the group + * @target: offset number of an entry in the group (start point) * @bsize: size in bits + * @lock: spin lock protecting @bitmap */ -static int nilfs_palloc_find_available_slot(struct inode *inode, - unsigned long group, +static int nilfs_palloc_find_available_slot(unsigned char *bitmap, unsigned long target, - unsigned char *bitmap, - int bsize) -{ - int curr, pos, end, i; - - if (target > 0) { - end = (target + BITS_PER_LONG - 1) & ~(BITS_PER_LONG - 1); - if (end > bsize) - end = bsize; - pos = nilfs_find_next_zero_bit(bitmap, end, target); - if (pos < end && - !nilfs_set_bit_atomic( - nilfs_mdt_bgl_lock(inode, group), pos, bitmap)) - return pos; - } else - end = 0; - - for (i = 0, curr = end; - i < bsize; - i += BITS_PER_LONG, curr += BITS_PER_LONG) { - /* wrap around */ - if (curr >= bsize) - curr = 0; - while (*((unsigned long *)bitmap + curr / BITS_PER_LONG) - != ~0UL) { - end = curr + BITS_PER_LONG; - if (end > bsize) - end = bsize; - pos = nilfs_find_next_zero_bit(bitmap, end, curr); - if ((pos < end) && - !nilfs_set_bit_atomic( - nilfs_mdt_bgl_lock(inode, group), pos, - bitmap)) + unsigned bsize, + spinlock_t *lock) +{ + int pos, end = bsize; + + if (likely(target < bsize)) { + pos = target; + do { + pos = nilfs_find_next_zero_bit(bitmap, end, pos); + if (pos >= end) + break; + if (!nilfs_set_bit_atomic(lock, pos, bitmap)) return pos; - } + } while (++pos < end); + + end = target; + } + + /* wrap around */ + for (pos = 0; pos < end; pos++) { + pos = nilfs_find_next_zero_bit(bitmap, end, pos); + if (pos >= end) + break; + if (!nilfs_set_bit_atomic(lock, pos, bitmap)) + return pos; } + return -ENOSPC; } @@ -475,15 +514,15 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode, void *desc_kaddr, *bitmap_kaddr; unsigned long group, maxgroup, ngroups; unsigned long group_offset, maxgroup_offset; - unsigned long n, entries_per_group, groups_per_desc_block; + unsigned long n, entries_per_group; unsigned long i, j; + spinlock_t *lock; int pos, ret; ngroups = nilfs_palloc_groups_count(inode); maxgroup = ngroups - 1; group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset); entries_per_group = nilfs_palloc_entries_per_group(inode); - groups_per_desc_block = nilfs_palloc_groups_per_desc_block(inode); for (i = 0; i < ngroups; i += n) { if (group >= ngroups) { @@ -501,8 +540,8 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode, n = nilfs_palloc_rest_groups_in_desc_block(inode, group, maxgroup); for (j = 0; j < n; j++, desc++, group++) { - if (nilfs_palloc_group_desc_nfrees(inode, group, desc) - > 0) { + lock = nilfs_mdt_bgl_lock(inode, group); + if (nilfs_palloc_group_desc_nfrees(desc, lock) > 0) { ret = nilfs_palloc_get_bitmap_block( inode, group, 1, &bitmap_bh); if (ret < 0) @@ -510,12 +549,12 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode, bitmap_kaddr = kmap(bitmap_bh->b_page); bitmap = bitmap_kaddr + bh_offset(bitmap_bh); pos = nilfs_palloc_find_available_slot( - inode, group, group_offset, bitmap, - entries_per_group); + bitmap, group_offset, + entries_per_group, lock); if (pos >= 0) { /* found a free entry */ nilfs_palloc_group_desc_add_entries( - inode, group, desc, -1); + desc, lock, -1); req->pr_entry_nr = entries_per_group * group + pos; kunmap(desc_bh->b_page); @@ -573,6 +612,7 @@ void nilfs_palloc_commit_free_entry(struct inode *inode, unsigned long group, group_offset; unsigned char *bitmap; void *desc_kaddr, *bitmap_kaddr; + spinlock_t *lock; group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset); desc_kaddr = kmap(req->pr_desc_bh->b_page); @@ -580,13 +620,15 @@ void nilfs_palloc_commit_free_entry(struct inode *inode, req->pr_desc_bh, desc_kaddr); bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page); bitmap = bitmap_kaddr + bh_offset(req->pr_bitmap_bh); + lock = nilfs_mdt_bgl_lock(inode, group); - if (!nilfs_clear_bit_atomic(nilfs_mdt_bgl_lock(inode, group), - group_offset, bitmap)) - printk(KERN_WARNING "%s: entry number %llu already freed\n", - __func__, (unsigned long long)req->pr_entry_nr); + if (!nilfs_clear_bit_atomic(lock, group_offset, bitmap)) + nilfs_warning(inode->i_sb, __func__, + "entry number %llu already freed: ino=%lu\n", + (unsigned long long)req->pr_entry_nr, + (unsigned long)inode->i_ino); else - nilfs_palloc_group_desc_add_entries(inode, group, desc, 1); + nilfs_palloc_group_desc_add_entries(desc, lock, 1); kunmap(req->pr_bitmap_bh->b_page); kunmap(req->pr_desc_bh->b_page); @@ -611,6 +653,7 @@ void nilfs_palloc_abort_alloc_entry(struct inode *inode, void *desc_kaddr, *bitmap_kaddr; unsigned char *bitmap; unsigned long group, group_offset; + spinlock_t *lock; group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset); desc_kaddr = kmap(req->pr_desc_bh->b_page); @@ -618,12 +661,15 @@ void nilfs_palloc_abort_alloc_entry(struct inode *inode, req->pr_desc_bh, desc_kaddr); bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page); bitmap = bitmap_kaddr + bh_offset(req->pr_bitmap_bh); - if (!nilfs_clear_bit_atomic(nilfs_mdt_bgl_lock(inode, group), - group_offset, bitmap)) - printk(KERN_WARNING "%s: entry number %llu already freed\n", - __func__, (unsigned long long)req->pr_entry_nr); + lock = nilfs_mdt_bgl_lock(inode, group); + + if (!nilfs_clear_bit_atomic(lock, group_offset, bitmap)) + nilfs_warning(inode->i_sb, __func__, + "entry number %llu already freed: ino=%lu\n", + (unsigned long long)req->pr_entry_nr, + (unsigned long)inode->i_ino); else - nilfs_palloc_group_desc_add_entries(inode, group, desc, 1); + nilfs_palloc_group_desc_add_entries(desc, lock, 1); kunmap(req->pr_bitmap_bh->b_page); kunmap(req->pr_desc_bh->b_page); @@ -680,22 +726,6 @@ void nilfs_palloc_abort_free_entry(struct inode *inode, } /** - * nilfs_palloc_group_is_in - judge if an entry is in a group - * @inode: inode of metadata file using this allocator - * @group: group number - * @nr: serial number of the entry (e.g. inode number) - */ -static int -nilfs_palloc_group_is_in(struct inode *inode, unsigned long group, __u64 nr) -{ - __u64 first, last; - - first = group * nilfs_palloc_entries_per_group(inode); - last = first + nilfs_palloc_entries_per_group(inode) - 1; - return (nr >= first) && (nr <= last); -} - -/** * nilfs_palloc_freev - deallocate a set of persistent objects * @inode: inode of metadata file using this allocator * @entry_nrs: array of entry numbers to be deallocated @@ -708,9 +738,18 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) unsigned char *bitmap; void *desc_kaddr, *bitmap_kaddr; unsigned long group, group_offset; - int i, j, n, ret; + __u64 group_min_nr, last_nrs[8]; + const unsigned long epg = nilfs_palloc_entries_per_group(inode); + const unsigned epb = NILFS_MDT(inode)->mi_entries_per_block; + unsigned entry_start, end, pos; + spinlock_t *lock; + int i, j, k, ret; + u32 nfree; for (i = 0; i < nitems; i = j) { + int change_group = false; + int nempties = 0, n = 0; + group = nilfs_palloc_group(inode, entry_nrs[i], &group_offset); ret = nilfs_palloc_get_desc_block(inode, group, 0, &desc_bh); if (ret < 0) @@ -721,38 +760,89 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) brelse(desc_bh); return ret; } - desc_kaddr = kmap(desc_bh->b_page); - desc = nilfs_palloc_block_get_group_desc( - inode, group, desc_bh, desc_kaddr); + + /* Get the first entry number of the group */ + group_min_nr = (__u64)group * epg; + bitmap_kaddr = kmap(bitmap_bh->b_page); bitmap = bitmap_kaddr + bh_offset(bitmap_bh); - for (j = i, n = 0; - (j < nitems) && nilfs_palloc_group_is_in(inode, group, - entry_nrs[j]); - j++) { - nilfs_palloc_group(inode, entry_nrs[j], &group_offset); - if (!nilfs_clear_bit_atomic( - nilfs_mdt_bgl_lock(inode, group), - group_offset, bitmap)) { - printk(KERN_WARNING - "%s: entry number %llu already freed\n", - __func__, - (unsigned long long)entry_nrs[j]); + lock = nilfs_mdt_bgl_lock(inode, group); + + j = i; + entry_start = rounddown(group_offset, epb); + do { + if (!nilfs_clear_bit_atomic(lock, group_offset, + bitmap)) { + nilfs_warning(inode->i_sb, __func__, + "entry number %llu already freed: ino=%lu\n", + (unsigned long long)entry_nrs[j], + (unsigned long)inode->i_ino); } else { n++; } - } - nilfs_palloc_group_desc_add_entries(inode, group, desc, n); + + j++; + if (j >= nitems || entry_nrs[j] < group_min_nr || + entry_nrs[j] >= group_min_nr + epg) { + change_group = true; + } else { + group_offset = entry_nrs[j] - group_min_nr; + if (group_offset >= entry_start && + group_offset < entry_start + epb) { + /* This entry is in the same block */ + continue; + } + } + + /* Test if the entry block is empty or not */ + end = entry_start + epb; + pos = nilfs_find_next_bit(bitmap, end, entry_start); + if (pos >= end) { + last_nrs[nempties++] = entry_nrs[j - 1]; + if (nempties >= ARRAY_SIZE(last_nrs)) + break; + } + + if (change_group) + break; + + /* Go on to the next entry block */ + entry_start = rounddown(group_offset, epb); + } while (true); kunmap(bitmap_bh->b_page); - kunmap(desc_bh->b_page); + mark_buffer_dirty(bitmap_bh); + brelse(bitmap_bh); + for (k = 0; k < nempties; k++) { + ret = nilfs_palloc_delete_entry_block(inode, + last_nrs[k]); + if (ret && ret != -ENOENT) { + nilfs_warning(inode->i_sb, __func__, + "failed to delete block of entry %llu: ino=%lu, err=%d\n", + (unsigned long long)last_nrs[k], + (unsigned long)inode->i_ino, ret); + } + } + + desc_kaddr = kmap_atomic(desc_bh->b_page); + desc = nilfs_palloc_block_get_group_desc( + inode, group, desc_bh, desc_kaddr); + nfree = nilfs_palloc_group_desc_add_entries(desc, lock, n); + kunmap_atomic(desc_kaddr); mark_buffer_dirty(desc_bh); - mark_buffer_dirty(bitmap_bh); nilfs_mdt_mark_dirty(inode); - - brelse(bitmap_bh); brelse(desc_bh); + + if (nfree == nilfs_palloc_entries_per_group(inode)) { + ret = nilfs_palloc_delete_bitmap_block(inode, group); + if (ret && ret != -ENOENT) { + nilfs_warning(inode->i_sb, __func__, + "failed to delete bitmap block of group %lu: ino=%lu, err=%d\n", + group, + (unsigned long)inode->i_ino, ret); + } + } } return 0; } diff --git a/fs/nilfs2/alloc.h b/fs/nilfs2/alloc.h index 4bd6451b5703..6e6f49aa53df 100644 --- a/fs/nilfs2/alloc.h +++ b/fs/nilfs2/alloc.h @@ -77,6 +77,7 @@ int nilfs_palloc_freev(struct inode *, __u64 *, size_t); #define nilfs_set_bit_atomic ext2_set_bit_atomic #define nilfs_clear_bit_atomic ext2_clear_bit_atomic #define nilfs_find_next_zero_bit find_next_zero_bit_le +#define nilfs_find_next_bit find_next_bit_le /** * struct nilfs_bh_assoc - block offset and buffer head association diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index 919fd5bb14a8..3a3821b00486 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -919,8 +919,6 @@ static void nilfs_btree_split(struct nilfs_bmap *btree, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node, *right; - __u64 newkey; - __u64 newptr; int nchildren, n, move, ncblk; node = nilfs_btree_get_nonroot_node(path, level); @@ -942,9 +940,6 @@ static void nilfs_btree_split(struct nilfs_bmap *btree, if (!buffer_dirty(path[level].bp_sib_bh)) mark_buffer_dirty(path[level].bp_sib_bh); - newkey = nilfs_btree_node_get_key(right, 0); - newptr = path[level].bp_newreq.bpr_ptr; - if (move) { path[level].bp_index -= nilfs_btree_node_get_nchildren(node); nilfs_btree_node_insert(right, path[level].bp_index, @@ -1856,7 +1851,7 @@ int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr, const __u64 *keys, const __u64 *ptrs, int n) { - struct buffer_head *bh; + struct buffer_head *bh = NULL; union nilfs_bmap_ptr_req dreq, nreq, *di, *ni; struct nilfs_bmap_stats stats; int ret; diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index 0d5fada91191..7dc23f100e57 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -155,7 +155,6 @@ void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req, int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req) { struct nilfs_dat_entry *entry; - __u64 start; sector_t blocknr; void *kaddr; int ret; @@ -169,7 +168,6 @@ int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req) kaddr = kmap_atomic(req->pr_entry_bh->b_page); entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, req->pr_entry_bh, kaddr); - start = le64_to_cpu(entry->de_start); blocknr = le64_to_cpu(entry->de_blocknr); kunmap_atomic(kaddr); diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index 54575e3cc1a2..088ba001c6ef 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c @@ -109,7 +109,7 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) goto out; file_update_time(vma->vm_file); - ret = __block_page_mkwrite(vma, vmf, nilfs_get_block); + ret = block_page_mkwrite(vma, vmf, nilfs_get_block); if (ret) { nilfs_transaction_abort(inode->i_sb); goto out; diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 4a73d6dffabf..ac2f64943ff4 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -356,7 +356,7 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) goto failed; mapping_set_gfp_mask(inode->i_mapping, - mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); + mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS)); root = NILFS_I(dir)->i_root; ii = NILFS_I(inode); @@ -522,7 +522,7 @@ static int __nilfs_read_inode(struct super_block *sb, up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); nilfs_set_inode_flags(inode); mapping_set_gfp_mask(inode->i_mapping, - mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); + mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS)); return 0; failed_unmap: diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index dee34d990281..1125f40233ff 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -33,6 +33,7 @@ #include "page.h" #include "mdt.h" +#include <trace/events/nilfs2.h> #define NILFS_MDT_MAX_RA_BLOCKS (16 - 1) @@ -68,6 +69,9 @@ nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block, set_buffer_uptodate(bh); mark_buffer_dirty(bh); nilfs_mdt_mark_dirty(inode); + + trace_nilfs2_mdt_insert_new_block(inode, inode->i_ino, block); + return 0; } @@ -158,6 +162,8 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff, get_bh(bh); submit_bh(mode, bh); ret = 0; + + trace_nilfs2_mdt_submit_block(inode, inode->i_ino, blkoff, mode); out: get_bh(bh); *out_bh = bh; diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h index fe529a87a208..03246cac3338 100644 --- a/fs/nilfs2/mdt.h +++ b/fs/nilfs2/mdt.h @@ -72,7 +72,7 @@ static inline struct nilfs_mdt_info *NILFS_MDT(const struct inode *inode) } /* Default GFP flags using highmem */ -#define NILFS_MDT_GFP (__GFP_WAIT | __GFP_IO | __GFP_HIGHMEM) +#define NILFS_MDT_GFP (__GFP_RECLAIM | __GFP_IO | __GFP_HIGHMEM) int nilfs_mdt_get_block(struct inode *, unsigned long, int, void (*init_block)(struct inode *, diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index 37dd6b05b1b5..c9a1a491aa91 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -120,9 +120,6 @@ nilfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) struct nilfs_transaction_info ti; int err; - if (!new_valid_dev(rdev)) - return -EINVAL; - err = nilfs_transaction_begin(dir->i_sb, &ti, 1); if (err) return err; diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index ff00a0b7acb9..9b4f205d1173 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -582,7 +582,7 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs, struct nilfs_recovery_info *ri) { struct buffer_head *bh_sum = NULL; - struct nilfs_segment_summary *sum; + struct nilfs_segment_summary *sum = NULL; sector_t pseg_start; sector_t seg_start, seg_end; /* Starting/ending DBN of full segment */ unsigned long nsalvaged_blocks = 0; @@ -814,7 +814,7 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_recovery_info *ri) { struct buffer_head *bh_sum = NULL; - struct nilfs_segment_summary *sum; + struct nilfs_segment_summary *sum = NULL; sector_t pseg_start, pseg_end, sr_pseg_start = 0; sector_t seg_start, seg_end; /* range of full segment (block number) */ sector_t b, end; diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index c6abbad9b8e3..3b65adaae7e4 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -77,6 +77,36 @@ enum { NILFS_ST_DONE, }; +#define CREATE_TRACE_POINTS +#include <trace/events/nilfs2.h> + +/* + * nilfs_sc_cstage_inc(), nilfs_sc_cstage_set(), nilfs_sc_cstage_get() are + * wrapper functions of stage count (nilfs_sc_info->sc_stage.scnt). Users of + * the variable must use them because transition of stage count must involve + * trace events (trace_nilfs2_collection_stage_transition). + * + * nilfs_sc_cstage_get() isn't required for the above purpose because it doesn't + * produce tracepoint events. It is provided just for making the intention + * clear. + */ +static inline void nilfs_sc_cstage_inc(struct nilfs_sc_info *sci) +{ + sci->sc_stage.scnt++; + trace_nilfs2_collection_stage_transition(sci); +} + +static inline void nilfs_sc_cstage_set(struct nilfs_sc_info *sci, int next_scnt) +{ + sci->sc_stage.scnt = next_scnt; + trace_nilfs2_collection_stage_transition(sci); +} + +static inline int nilfs_sc_cstage_get(struct nilfs_sc_info *sci) +{ + return sci->sc_stage.scnt; +} + /* State flags of collection */ #define NILFS_CF_NODE 0x0001 /* Collecting node blocks */ #define NILFS_CF_IFILE_STARTED 0x0002 /* IFILE stage has started */ @@ -184,11 +214,18 @@ int nilfs_transaction_begin(struct super_block *sb, { struct the_nilfs *nilfs; int ret = nilfs_prepare_segment_lock(ti); + struct nilfs_transaction_info *trace_ti; if (unlikely(ret < 0)) return ret; - if (ret > 0) + if (ret > 0) { + trace_ti = current->journal_info; + + trace_nilfs2_transaction_transition(sb, trace_ti, + trace_ti->ti_count, trace_ti->ti_flags, + TRACE_NILFS2_TRANSACTION_BEGIN); return 0; + } sb_start_intwrite(sb); @@ -199,6 +236,11 @@ int nilfs_transaction_begin(struct super_block *sb, ret = -ENOSPC; goto failed; } + + trace_ti = current->journal_info; + trace_nilfs2_transaction_transition(sb, trace_ti, trace_ti->ti_count, + trace_ti->ti_flags, + TRACE_NILFS2_TRANSACTION_BEGIN); return 0; failed: @@ -231,6 +273,8 @@ int nilfs_transaction_commit(struct super_block *sb) ti->ti_flags |= NILFS_TI_COMMIT; if (ti->ti_count > 0) { ti->ti_count--; + trace_nilfs2_transaction_transition(sb, ti, ti->ti_count, + ti->ti_flags, TRACE_NILFS2_TRANSACTION_COMMIT); return 0; } if (nilfs->ns_writer) { @@ -242,6 +286,9 @@ int nilfs_transaction_commit(struct super_block *sb) nilfs_segctor_do_flush(sci, 0); } up_read(&nilfs->ns_segctor_sem); + trace_nilfs2_transaction_transition(sb, ti, ti->ti_count, + ti->ti_flags, TRACE_NILFS2_TRANSACTION_COMMIT); + current->journal_info = ti->ti_save; if (ti->ti_flags & NILFS_TI_SYNC) @@ -260,10 +307,15 @@ void nilfs_transaction_abort(struct super_block *sb) BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC); if (ti->ti_count > 0) { ti->ti_count--; + trace_nilfs2_transaction_transition(sb, ti, ti->ti_count, + ti->ti_flags, TRACE_NILFS2_TRANSACTION_ABORT); return; } up_read(&nilfs->ns_segctor_sem); + trace_nilfs2_transaction_transition(sb, ti, ti->ti_count, + ti->ti_flags, TRACE_NILFS2_TRANSACTION_ABORT); + current->journal_info = ti->ti_save; if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) kmem_cache_free(nilfs_transaction_cachep, ti); @@ -309,6 +361,9 @@ static void nilfs_transaction_lock(struct super_block *sb, current->journal_info = ti; for (;;) { + trace_nilfs2_transaction_transition(sb, ti, ti->ti_count, + ti->ti_flags, TRACE_NILFS2_TRANSACTION_TRYLOCK); + down_write(&nilfs->ns_segctor_sem); if (!test_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags)) break; @@ -320,6 +375,9 @@ static void nilfs_transaction_lock(struct super_block *sb, } if (gcflag) ti->ti_flags |= NILFS_TI_GC; + + trace_nilfs2_transaction_transition(sb, ti, ti->ti_count, + ti->ti_flags, TRACE_NILFS2_TRANSACTION_LOCK); } static void nilfs_transaction_unlock(struct super_block *sb) @@ -332,6 +390,9 @@ static void nilfs_transaction_unlock(struct super_block *sb) up_write(&nilfs->ns_segctor_sem); current->journal_info = ti->ti_save; + + trace_nilfs2_transaction_transition(sb, ti, ti->ti_count, + ti->ti_flags, TRACE_NILFS2_TRANSACTION_UNLOCK); } static void *nilfs_segctor_map_segsum_entry(struct nilfs_sc_info *sci, @@ -1062,7 +1123,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) size_t ndone; int err = 0; - switch (sci->sc_stage.scnt) { + switch (nilfs_sc_cstage_get(sci)) { case NILFS_ST_INIT: /* Pre-processes */ sci->sc_stage.flags = 0; @@ -1071,7 +1132,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) sci->sc_nblk_inc = 0; sci->sc_curseg->sb_sum.flags = NILFS_SS_LOGBGN; if (mode == SC_LSEG_DSYNC) { - sci->sc_stage.scnt = NILFS_ST_DSYNC; + nilfs_sc_cstage_set(sci, NILFS_ST_DSYNC); goto dsync_mode; } } @@ -1079,10 +1140,10 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) sci->sc_stage.dirty_file_ptr = NULL; sci->sc_stage.gc_inode_ptr = NULL; if (mode == SC_FLUSH_DAT) { - sci->sc_stage.scnt = NILFS_ST_DAT; + nilfs_sc_cstage_set(sci, NILFS_ST_DAT); goto dat_stage; } - sci->sc_stage.scnt++; /* Fall through */ + nilfs_sc_cstage_inc(sci); /* Fall through */ case NILFS_ST_GC: if (nilfs_doing_gc()) { head = &sci->sc_gc_inodes; @@ -1103,7 +1164,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) } sci->sc_stage.gc_inode_ptr = NULL; } - sci->sc_stage.scnt++; /* Fall through */ + nilfs_sc_cstage_inc(sci); /* Fall through */ case NILFS_ST_FILE: head = &sci->sc_dirty_files; ii = list_prepare_entry(sci->sc_stage.dirty_file_ptr, head, @@ -1125,10 +1186,10 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) } sci->sc_stage.dirty_file_ptr = NULL; if (mode == SC_FLUSH_FILE) { - sci->sc_stage.scnt = NILFS_ST_DONE; + nilfs_sc_cstage_set(sci, NILFS_ST_DONE); return 0; } - sci->sc_stage.scnt++; + nilfs_sc_cstage_inc(sci); sci->sc_stage.flags |= NILFS_CF_IFILE_STARTED; /* Fall through */ case NILFS_ST_IFILE: @@ -1136,7 +1197,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) &nilfs_sc_file_ops); if (unlikely(err)) break; - sci->sc_stage.scnt++; + nilfs_sc_cstage_inc(sci); /* Creating a checkpoint */ err = nilfs_segctor_create_checkpoint(sci); if (unlikely(err)) @@ -1147,7 +1208,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) &nilfs_sc_file_ops); if (unlikely(err)) break; - sci->sc_stage.scnt++; /* Fall through */ + nilfs_sc_cstage_inc(sci); /* Fall through */ case NILFS_ST_SUFILE: err = nilfs_sufile_freev(nilfs->ns_sufile, sci->sc_freesegs, sci->sc_nfreesegs, &ndone); @@ -1163,7 +1224,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) &nilfs_sc_file_ops); if (unlikely(err)) break; - sci->sc_stage.scnt++; /* Fall through */ + nilfs_sc_cstage_inc(sci); /* Fall through */ case NILFS_ST_DAT: dat_stage: err = nilfs_segctor_scan_file(sci, nilfs->ns_dat, @@ -1171,10 +1232,10 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) if (unlikely(err)) break; if (mode == SC_FLUSH_DAT) { - sci->sc_stage.scnt = NILFS_ST_DONE; + nilfs_sc_cstage_set(sci, NILFS_ST_DONE); return 0; } - sci->sc_stage.scnt++; /* Fall through */ + nilfs_sc_cstage_inc(sci); /* Fall through */ case NILFS_ST_SR: if (mode == SC_LSEG_SR) { /* Appending a super root */ @@ -1184,7 +1245,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) } /* End of a logical segment */ sci->sc_curseg->sb_sum.flags |= NILFS_SS_LOGEND; - sci->sc_stage.scnt = NILFS_ST_DONE; + nilfs_sc_cstage_set(sci, NILFS_ST_DONE); return 0; case NILFS_ST_DSYNC: dsync_mode: @@ -1197,7 +1258,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) if (unlikely(err)) break; sci->sc_curseg->sb_sum.flags |= NILFS_SS_LOGEND; - sci->sc_stage.scnt = NILFS_ST_DONE; + nilfs_sc_cstage_set(sci, NILFS_ST_DONE); return 0; case NILFS_ST_DONE: return 0; @@ -1442,7 +1503,8 @@ static int nilfs_segctor_collect(struct nilfs_sc_info *sci, goto failed; /* The current segment is filled up */ - if (mode != SC_LSEG_SR || sci->sc_stage.scnt < NILFS_ST_CPFILE) + if (mode != SC_LSEG_SR || + nilfs_sc_cstage_get(sci) < NILFS_ST_CPFILE) break; nilfs_clear_logs(&sci->sc_segbufs); @@ -1946,7 +2008,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) struct the_nilfs *nilfs = sci->sc_super->s_fs_info; int err; - sci->sc_stage.scnt = NILFS_ST_INIT; + nilfs_sc_cstage_set(sci, NILFS_ST_INIT); sci->sc_cno = nilfs->ns_cno; err = nilfs_segctor_collect_dirty_files(sci, nilfs); @@ -1974,7 +2036,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) goto failed; /* Avoid empty segment */ - if (sci->sc_stage.scnt == NILFS_ST_DONE && + if (nilfs_sc_cstage_get(sci) == NILFS_ST_DONE && nilfs_segbuf_empty(sci->sc_curseg)) { nilfs_segctor_abort_construction(sci, nilfs, 1); goto out; @@ -1988,7 +2050,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) nilfs_segctor_fill_in_file_bmap(sci); if (mode == SC_LSEG_SR && - sci->sc_stage.scnt >= NILFS_ST_CPFILE) { + nilfs_sc_cstage_get(sci) >= NILFS_ST_CPFILE) { err = nilfs_segctor_fill_in_checkpoint(sci); if (unlikely(err)) goto failed_to_write; @@ -2007,7 +2069,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) if (unlikely(err)) goto failed_to_write; - if (sci->sc_stage.scnt == NILFS_ST_DONE || + if (nilfs_sc_cstage_get(sci) == NILFS_ST_DONE || nilfs->ns_blocksize_bits != PAGE_CACHE_SHIFT) { /* * At this point, we avoid double buffering @@ -2020,7 +2082,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) if (err) goto failed_to_write; } - } while (sci->sc_stage.scnt != NILFS_ST_DONE); + } while (nilfs_sc_cstage_get(sci) != NILFS_ST_DONE); out: nilfs_segctor_drop_written_files(sci, nilfs); @@ -2430,7 +2492,6 @@ static void nilfs_segctor_thread_construct(struct nilfs_sc_info *sci, int mode) static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *sci) { int mode = 0; - int err; spin_lock(&sci->sc_state_lock); mode = (sci->sc_flush_request & FLUSH_DAT_BIT) ? @@ -2438,7 +2499,7 @@ static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *sci) spin_unlock(&sci->sc_state_lock); if (mode) { - err = nilfs_segctor_do_construct(sci, mode); + nilfs_segctor_do_construct(sci, mode); spin_lock(&sci->sc_state_lock); sci->sc_flush_request &= (mode == SC_FLUSH_FILE) ? diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index a48d6de1e02c..0408b9b2814b 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -67,7 +67,8 @@ struct nilfs_recovery_info { /** * struct nilfs_cstage - Context of collection stage - * @scnt: Stage count + * @scnt: Stage count, must be accessed via wrappers: + * nilfs_sc_cstage_inc(), nilfs_sc_cstage_set(), nilfs_sc_cstage_get() * @flags: State flags * @dirty_file_ptr: Pointer on dirty_files list, or inode of a target file * @gc_inode_ptr: Pointer on the list of gc-inodes diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index 2a869c35c362..52821ffc11f4 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -30,6 +30,8 @@ #include "mdt.h" #include "sufile.h" +#include <trace/events/nilfs2.h> + /** * struct nilfs_sufile_info - on-memory private data of sufile * @mi: on-memory private data of metadata file @@ -317,7 +319,7 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) size_t susz = NILFS_MDT(sufile)->mi_entry_size; __u64 segnum, maxsegnum, last_alloc; void *kaddr; - unsigned long nsegments, ncleansegs, nsus, cnt; + unsigned long nsegments, nsus, cnt; int ret, j; down_write(&NILFS_MDT(sufile)->mi_sem); @@ -327,7 +329,6 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) goto out_sem; kaddr = kmap_atomic(header_bh->b_page); header = kaddr + bh_offset(header_bh); - ncleansegs = le64_to_cpu(header->sh_ncleansegs); last_alloc = le64_to_cpu(header->sh_last_alloc); kunmap_atomic(kaddr); @@ -358,6 +359,7 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) break; /* never happens */ } } + trace_nilfs2_segment_usage_check(sufile, segnum, cnt); ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 1, &su_bh); if (ret < 0) @@ -388,6 +390,9 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) nilfs_mdt_mark_dirty(sufile); brelse(su_bh); *segnump = segnum; + + trace_nilfs2_segment_usage_allocated(sufile, segnum); + goto out_header; } @@ -490,6 +495,8 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum, NILFS_SUI(sufile)->ncleansegs++; nilfs_mdt_mark_dirty(sufile); + + trace_nilfs2_segment_usage_freed(sufile, segnum); } /** diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index f47585bfeb01..354013ea22ec 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -361,7 +361,7 @@ static int nilfs_move_2nd_super(struct super_block *sb, loff_t sb2off) struct nilfs_super_block *nsbp; sector_t blocknr, newblocknr; unsigned long offset; - int sb2i = -1; /* array index of the secondary superblock */ + int sb2i; /* array index of the secondary superblock */ int ret = 0; /* nilfs->ns_sem must be locked by the caller. */ @@ -372,6 +372,9 @@ static int nilfs_move_2nd_super(struct super_block *sb, loff_t sb2off) } else if (nilfs->ns_sbh[0]->b_blocknr > nilfs->ns_first_data_block) { sb2i = 0; blocknr = nilfs->ns_sbh[0]->b_blocknr; + } else { + sb2i = -1; + blocknr = 0; } if (sb2i >= 0 && (u64)blocknr << nilfs->ns_blocksize_bits == sb2off) goto out; /* super block location is unchanged */ @@ -1405,14 +1408,10 @@ static void nilfs_destroy_cachep(void) */ rcu_barrier(); - if (nilfs_inode_cachep) - kmem_cache_destroy(nilfs_inode_cachep); - if (nilfs_transaction_cachep) - kmem_cache_destroy(nilfs_transaction_cachep); - if (nilfs_segbuf_cachep) - kmem_cache_destroy(nilfs_segbuf_cachep); - if (nilfs_btree_path_cache) - kmem_cache_destroy(nilfs_btree_path_cache); + kmem_cache_destroy(nilfs_inode_cachep); + kmem_cache_destroy(nilfs_transaction_cachep); + kmem_cache_destroy(nilfs_segbuf_cachep); + kmem_cache_destroy(nilfs_btree_path_cache); } static int __init nilfs_init_cachep(void) diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 262561fea923..9d383e5eff0e 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -525,8 +525,8 @@ static inline int __ntfs_grab_cache_pages(struct address_space *mapping, } } err = add_to_page_cache_lru(*cached_page, mapping, - index, - GFP_KERNEL & mapping_gfp_mask(mapping)); + index, + mapping_gfp_constraint(mapping, GFP_KERNEL)); if (unlikely(err)) { if (err == -EEXIST) continue; diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index ddddef0021a0..709fbbd44c65 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -1480,16 +1480,17 @@ static int o2hb_read_block_input(struct o2hb_region *reg, return 0; } -static ssize_t o2hb_region_block_bytes_read(struct o2hb_region *reg, +static ssize_t o2hb_region_block_bytes_show(struct config_item *item, char *page) { - return sprintf(page, "%u\n", reg->hr_block_bytes); + return sprintf(page, "%u\n", to_o2hb_region(item)->hr_block_bytes); } -static ssize_t o2hb_region_block_bytes_write(struct o2hb_region *reg, +static ssize_t o2hb_region_block_bytes_store(struct config_item *item, const char *page, size_t count) { + struct o2hb_region *reg = to_o2hb_region(item); int status; unsigned long block_bytes; unsigned int block_bits; @@ -1508,16 +1509,17 @@ static ssize_t o2hb_region_block_bytes_write(struct o2hb_region *reg, return count; } -static ssize_t o2hb_region_start_block_read(struct o2hb_region *reg, +static ssize_t o2hb_region_start_block_show(struct config_item *item, char *page) { - return sprintf(page, "%llu\n", reg->hr_start_block); + return sprintf(page, "%llu\n", to_o2hb_region(item)->hr_start_block); } -static ssize_t o2hb_region_start_block_write(struct o2hb_region *reg, +static ssize_t o2hb_region_start_block_store(struct config_item *item, const char *page, size_t count) { + struct o2hb_region *reg = to_o2hb_region(item); unsigned long long tmp; char *p = (char *)page; @@ -1533,16 +1535,16 @@ static ssize_t o2hb_region_start_block_write(struct o2hb_region *reg, return count; } -static ssize_t o2hb_region_blocks_read(struct o2hb_region *reg, - char *page) +static ssize_t o2hb_region_blocks_show(struct config_item *item, char *page) { - return sprintf(page, "%d\n", reg->hr_blocks); + return sprintf(page, "%d\n", to_o2hb_region(item)->hr_blocks); } -static ssize_t o2hb_region_blocks_write(struct o2hb_region *reg, +static ssize_t o2hb_region_blocks_store(struct config_item *item, const char *page, size_t count) { + struct o2hb_region *reg = to_o2hb_region(item); unsigned long tmp; char *p = (char *)page; @@ -1561,13 +1563,12 @@ static ssize_t o2hb_region_blocks_write(struct o2hb_region *reg, return count; } -static ssize_t o2hb_region_dev_read(struct o2hb_region *reg, - char *page) +static ssize_t o2hb_region_dev_show(struct config_item *item, char *page) { unsigned int ret = 0; - if (reg->hr_bdev) - ret = sprintf(page, "%s\n", reg->hr_dev_name); + if (to_o2hb_region(item)->hr_bdev) + ret = sprintf(page, "%s\n", to_o2hb_region(item)->hr_dev_name); return ret; } @@ -1677,10 +1678,11 @@ out: } /* this is acting as commit; we set up all of hr_bdev and hr_task or nothing */ -static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, +static ssize_t o2hb_region_dev_store(struct config_item *item, const char *page, size_t count) { + struct o2hb_region *reg = to_o2hb_region(item); struct task_struct *hb_task; long fd; int sectsize; @@ -1841,9 +1843,9 @@ out: return ret; } -static ssize_t o2hb_region_pid_read(struct o2hb_region *reg, - char *page) +static ssize_t o2hb_region_pid_show(struct config_item *item, char *page) { + struct o2hb_region *reg = to_o2hb_region(item); pid_t pid = 0; spin_lock(&o2hb_live_lock); @@ -1857,92 +1859,23 @@ static ssize_t o2hb_region_pid_read(struct o2hb_region *reg, return sprintf(page, "%u\n", pid); } -struct o2hb_region_attribute { - struct configfs_attribute attr; - ssize_t (*show)(struct o2hb_region *, char *); - ssize_t (*store)(struct o2hb_region *, const char *, size_t); -}; - -static struct o2hb_region_attribute o2hb_region_attr_block_bytes = { - .attr = { .ca_owner = THIS_MODULE, - .ca_name = "block_bytes", - .ca_mode = S_IRUGO | S_IWUSR }, - .show = o2hb_region_block_bytes_read, - .store = o2hb_region_block_bytes_write, -}; - -static struct o2hb_region_attribute o2hb_region_attr_start_block = { - .attr = { .ca_owner = THIS_MODULE, - .ca_name = "start_block", - .ca_mode = S_IRUGO | S_IWUSR }, - .show = o2hb_region_start_block_read, - .store = o2hb_region_start_block_write, -}; - -static struct o2hb_region_attribute o2hb_region_attr_blocks = { - .attr = { .ca_owner = THIS_MODULE, - .ca_name = "blocks", - .ca_mode = S_IRUGO | S_IWUSR }, - .show = o2hb_region_blocks_read, - .store = o2hb_region_blocks_write, -}; - -static struct o2hb_region_attribute o2hb_region_attr_dev = { - .attr = { .ca_owner = THIS_MODULE, - .ca_name = "dev", - .ca_mode = S_IRUGO | S_IWUSR }, - .show = o2hb_region_dev_read, - .store = o2hb_region_dev_write, -}; - -static struct o2hb_region_attribute o2hb_region_attr_pid = { - .attr = { .ca_owner = THIS_MODULE, - .ca_name = "pid", - .ca_mode = S_IRUGO | S_IRUSR }, - .show = o2hb_region_pid_read, -}; +CONFIGFS_ATTR(o2hb_region_, block_bytes); +CONFIGFS_ATTR(o2hb_region_, start_block); +CONFIGFS_ATTR(o2hb_region_, blocks); +CONFIGFS_ATTR(o2hb_region_, dev); +CONFIGFS_ATTR_RO(o2hb_region_, pid); static struct configfs_attribute *o2hb_region_attrs[] = { - &o2hb_region_attr_block_bytes.attr, - &o2hb_region_attr_start_block.attr, - &o2hb_region_attr_blocks.attr, - &o2hb_region_attr_dev.attr, - &o2hb_region_attr_pid.attr, + &o2hb_region_attr_block_bytes, + &o2hb_region_attr_start_block, + &o2hb_region_attr_blocks, + &o2hb_region_attr_dev, + &o2hb_region_attr_pid, NULL, }; -static ssize_t o2hb_region_show(struct config_item *item, - struct configfs_attribute *attr, - char *page) -{ - struct o2hb_region *reg = to_o2hb_region(item); - struct o2hb_region_attribute *o2hb_region_attr = - container_of(attr, struct o2hb_region_attribute, attr); - ssize_t ret = 0; - - if (o2hb_region_attr->show) - ret = o2hb_region_attr->show(reg, page); - return ret; -} - -static ssize_t o2hb_region_store(struct config_item *item, - struct configfs_attribute *attr, - const char *page, size_t count) -{ - struct o2hb_region *reg = to_o2hb_region(item); - struct o2hb_region_attribute *o2hb_region_attr = - container_of(attr, struct o2hb_region_attribute, attr); - ssize_t ret = -EINVAL; - - if (o2hb_region_attr->store) - ret = o2hb_region_attr->store(reg, page, count); - return ret; -} - static struct configfs_item_operations o2hb_region_item_ops = { .release = o2hb_region_release, - .show_attribute = o2hb_region_show, - .store_attribute = o2hb_region_store, }; static struct config_item_type o2hb_region_type = { @@ -2137,49 +2070,14 @@ unlock: spin_unlock(&o2hb_live_lock); } -struct o2hb_heartbeat_group_attribute { - struct configfs_attribute attr; - ssize_t (*show)(struct o2hb_heartbeat_group *, char *); - ssize_t (*store)(struct o2hb_heartbeat_group *, const char *, size_t); -}; - -static ssize_t o2hb_heartbeat_group_show(struct config_item *item, - struct configfs_attribute *attr, - char *page) -{ - struct o2hb_heartbeat_group *reg = to_o2hb_heartbeat_group(to_config_group(item)); - struct o2hb_heartbeat_group_attribute *o2hb_heartbeat_group_attr = - container_of(attr, struct o2hb_heartbeat_group_attribute, attr); - ssize_t ret = 0; - - if (o2hb_heartbeat_group_attr->show) - ret = o2hb_heartbeat_group_attr->show(reg, page); - return ret; -} - -static ssize_t o2hb_heartbeat_group_store(struct config_item *item, - struct configfs_attribute *attr, - const char *page, size_t count) -{ - struct o2hb_heartbeat_group *reg = to_o2hb_heartbeat_group(to_config_group(item)); - struct o2hb_heartbeat_group_attribute *o2hb_heartbeat_group_attr = - container_of(attr, struct o2hb_heartbeat_group_attribute, attr); - ssize_t ret = -EINVAL; - - if (o2hb_heartbeat_group_attr->store) - ret = o2hb_heartbeat_group_attr->store(reg, page, count); - return ret; -} - -static ssize_t o2hb_heartbeat_group_threshold_show(struct o2hb_heartbeat_group *group, - char *page) +static ssize_t o2hb_heartbeat_group_threshold_show(struct config_item *item, + char *page) { return sprintf(page, "%u\n", o2hb_dead_threshold); } -static ssize_t o2hb_heartbeat_group_threshold_store(struct o2hb_heartbeat_group *group, - const char *page, - size_t count) +static ssize_t o2hb_heartbeat_group_threshold_store(struct config_item *item, + const char *page, size_t count) { unsigned long tmp; char *p = (char *)page; @@ -2194,17 +2092,15 @@ static ssize_t o2hb_heartbeat_group_threshold_store(struct o2hb_heartbeat_group return count; } -static -ssize_t o2hb_heartbeat_group_mode_show(struct o2hb_heartbeat_group *group, - char *page) +static ssize_t o2hb_heartbeat_group_mode_show(struct config_item *item, + char *page) { return sprintf(page, "%s\n", o2hb_heartbeat_mode_desc[o2hb_heartbeat_mode]); } -static -ssize_t o2hb_heartbeat_group_mode_store(struct o2hb_heartbeat_group *group, - const char *page, size_t count) +static ssize_t o2hb_heartbeat_group_mode_store(struct config_item *item, + const char *page, size_t count) { unsigned int i; int ret; @@ -2229,33 +2125,15 @@ ssize_t o2hb_heartbeat_group_mode_store(struct o2hb_heartbeat_group *group, } -static struct o2hb_heartbeat_group_attribute o2hb_heartbeat_group_attr_threshold = { - .attr = { .ca_owner = THIS_MODULE, - .ca_name = "dead_threshold", - .ca_mode = S_IRUGO | S_IWUSR }, - .show = o2hb_heartbeat_group_threshold_show, - .store = o2hb_heartbeat_group_threshold_store, -}; - -static struct o2hb_heartbeat_group_attribute o2hb_heartbeat_group_attr_mode = { - .attr = { .ca_owner = THIS_MODULE, - .ca_name = "mode", - .ca_mode = S_IRUGO | S_IWUSR }, - .show = o2hb_heartbeat_group_mode_show, - .store = o2hb_heartbeat_group_mode_store, -}; +CONFIGFS_ATTR(o2hb_heartbeat_group_, threshold); +CONFIGFS_ATTR(o2hb_heartbeat_group_, mode); static struct configfs_attribute *o2hb_heartbeat_group_attrs[] = { - &o2hb_heartbeat_group_attr_threshold.attr, - &o2hb_heartbeat_group_attr_mode.attr, + &o2hb_heartbeat_group_attr_threshold, + &o2hb_heartbeat_group_attr_mode, NULL, }; -static struct configfs_item_operations o2hb_heartbeat_group_item_ops = { - .show_attribute = o2hb_heartbeat_group_show, - .store_attribute = o2hb_heartbeat_group_store, -}; - static struct configfs_group_operations o2hb_heartbeat_group_group_ops = { .make_item = o2hb_heartbeat_group_make_item, .drop_item = o2hb_heartbeat_group_drop_item, @@ -2263,7 +2141,6 @@ static struct configfs_group_operations o2hb_heartbeat_group_group_ops = { static struct config_item_type o2hb_heartbeat_group_type = { .ct_group_ops = &o2hb_heartbeat_group_group_ops, - .ct_item_ops = &o2hb_heartbeat_group_item_ops, .ct_attrs = o2hb_heartbeat_group_attrs, .ct_owner = THIS_MODULE, }; diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c index 441c84e169e6..72afdca3cea7 100644 --- a/fs/ocfs2/cluster/nodemanager.c +++ b/fs/ocfs2/cluster/nodemanager.c @@ -172,9 +172,9 @@ static void o2nm_node_release(struct config_item *item) kfree(node); } -static ssize_t o2nm_node_num_read(struct o2nm_node *node, char *page) +static ssize_t o2nm_node_num_show(struct config_item *item, char *page) { - return sprintf(page, "%d\n", node->nd_num); + return sprintf(page, "%d\n", to_o2nm_node(item)->nd_num); } static struct o2nm_cluster *to_o2nm_cluster_from_node(struct o2nm_node *node) @@ -188,15 +188,16 @@ enum { O2NM_NODE_ATTR_NUM = 0, O2NM_NODE_ATTR_PORT, O2NM_NODE_ATTR_ADDRESS, - O2NM_NODE_ATTR_LOCAL, }; -static ssize_t o2nm_node_num_write(struct o2nm_node *node, const char *page, +static ssize_t o2nm_node_num_store(struct config_item *item, const char *page, size_t count) { + struct o2nm_node *node = to_o2nm_node(item); struct o2nm_cluster *cluster = to_o2nm_cluster_from_node(node); unsigned long tmp; char *p = (char *)page; + int ret = 0; tmp = simple_strtoul(p, &p, 0); if (!p || (*p && (*p != '\n'))) @@ -215,26 +216,30 @@ static ssize_t o2nm_node_num_write(struct o2nm_node *node, const char *page, write_lock(&cluster->cl_nodes_lock); if (cluster->cl_nodes[tmp]) - p = NULL; + ret = -EEXIST; + else if (test_and_set_bit(O2NM_NODE_ATTR_NUM, + &node->nd_set_attributes)) + ret = -EBUSY; else { cluster->cl_nodes[tmp] = node; node->nd_num = tmp; set_bit(tmp, cluster->cl_nodes_bitmap); } write_unlock(&cluster->cl_nodes_lock); - if (p == NULL) - return -EEXIST; + if (ret) + return ret; return count; } -static ssize_t o2nm_node_ipv4_port_read(struct o2nm_node *node, char *page) +static ssize_t o2nm_node_ipv4_port_show(struct config_item *item, char *page) { - return sprintf(page, "%u\n", ntohs(node->nd_ipv4_port)); + return sprintf(page, "%u\n", ntohs(to_o2nm_node(item)->nd_ipv4_port)); } -static ssize_t o2nm_node_ipv4_port_write(struct o2nm_node *node, +static ssize_t o2nm_node_ipv4_port_store(struct config_item *item, const char *page, size_t count) { + struct o2nm_node *node = to_o2nm_node(item); unsigned long tmp; char *p = (char *)page; @@ -247,20 +252,23 @@ static ssize_t o2nm_node_ipv4_port_write(struct o2nm_node *node, if (tmp >= (u16)-1) return -ERANGE; + if (test_and_set_bit(O2NM_NODE_ATTR_PORT, &node->nd_set_attributes)) + return -EBUSY; node->nd_ipv4_port = htons(tmp); return count; } -static ssize_t o2nm_node_ipv4_address_read(struct o2nm_node *node, char *page) +static ssize_t o2nm_node_ipv4_address_show(struct config_item *item, char *page) { - return sprintf(page, "%pI4\n", &node->nd_ipv4_address); + return sprintf(page, "%pI4\n", &to_o2nm_node(item)->nd_ipv4_address); } -static ssize_t o2nm_node_ipv4_address_write(struct o2nm_node *node, +static ssize_t o2nm_node_ipv4_address_store(struct config_item *item, const char *page, size_t count) { + struct o2nm_node *node = to_o2nm_node(item); struct o2nm_cluster *cluster = to_o2nm_cluster_from_node(node); int ret, i; struct rb_node **p, *parent; @@ -282,6 +290,9 @@ static ssize_t o2nm_node_ipv4_address_write(struct o2nm_node *node, write_lock(&cluster->cl_nodes_lock); if (o2nm_node_ip_tree_lookup(cluster, ipv4_addr, &p, &parent)) ret = -EEXIST; + else if (test_and_set_bit(O2NM_NODE_ATTR_ADDRESS, + &node->nd_set_attributes)) + ret = -EBUSY; else { rb_link_node(&node->nd_ip_node, parent, p); rb_insert_color(&node->nd_ip_node, &cluster->cl_node_ip_tree); @@ -295,14 +306,15 @@ static ssize_t o2nm_node_ipv4_address_write(struct o2nm_node *node, return count; } -static ssize_t o2nm_node_local_read(struct o2nm_node *node, char *page) +static ssize_t o2nm_node_local_show(struct config_item *item, char *page) { - return sprintf(page, "%d\n", node->nd_local); + return sprintf(page, "%d\n", to_o2nm_node(item)->nd_local); } -static ssize_t o2nm_node_local_write(struct o2nm_node *node, const char *page, +static ssize_t o2nm_node_local_store(struct config_item *item, const char *page, size_t count) { + struct o2nm_node *node = to_o2nm_node(item); struct o2nm_cluster *cluster = to_o2nm_cluster_from_node(node); unsigned long tmp; char *p = (char *)page; @@ -349,108 +361,21 @@ static ssize_t o2nm_node_local_write(struct o2nm_node *node, const char *page, return count; } -struct o2nm_node_attribute { - struct configfs_attribute attr; - ssize_t (*show)(struct o2nm_node *, char *); - ssize_t (*store)(struct o2nm_node *, const char *, size_t); -}; - -static struct o2nm_node_attribute o2nm_node_attr_num = { - .attr = { .ca_owner = THIS_MODULE, - .ca_name = "num", - .ca_mode = S_IRUGO | S_IWUSR }, - .show = o2nm_node_num_read, - .store = o2nm_node_num_write, -}; - -static struct o2nm_node_attribute o2nm_node_attr_ipv4_port = { - .attr = { .ca_owner = THIS_MODULE, - .ca_name = "ipv4_port", - .ca_mode = S_IRUGO | S_IWUSR }, - .show = o2nm_node_ipv4_port_read, - .store = o2nm_node_ipv4_port_write, -}; - -static struct o2nm_node_attribute o2nm_node_attr_ipv4_address = { - .attr = { .ca_owner = THIS_MODULE, - .ca_name = "ipv4_address", - .ca_mode = S_IRUGO | S_IWUSR }, - .show = o2nm_node_ipv4_address_read, - .store = o2nm_node_ipv4_address_write, -}; - -static struct o2nm_node_attribute o2nm_node_attr_local = { - .attr = { .ca_owner = THIS_MODULE, - .ca_name = "local", - .ca_mode = S_IRUGO | S_IWUSR }, - .show = o2nm_node_local_read, - .store = o2nm_node_local_write, -}; +CONFIGFS_ATTR(o2nm_node_, num); +CONFIGFS_ATTR(o2nm_node_, ipv4_port); +CONFIGFS_ATTR(o2nm_node_, ipv4_address); +CONFIGFS_ATTR(o2nm_node_, local); static struct configfs_attribute *o2nm_node_attrs[] = { - [O2NM_NODE_ATTR_NUM] = &o2nm_node_attr_num.attr, - [O2NM_NODE_ATTR_PORT] = &o2nm_node_attr_ipv4_port.attr, - [O2NM_NODE_ATTR_ADDRESS] = &o2nm_node_attr_ipv4_address.attr, - [O2NM_NODE_ATTR_LOCAL] = &o2nm_node_attr_local.attr, + &o2nm_node_attr_num, + &o2nm_node_attr_ipv4_port, + &o2nm_node_attr_ipv4_address, + &o2nm_node_attr_local, NULL, }; -static int o2nm_attr_index(struct configfs_attribute *attr) -{ - int i; - for (i = 0; i < ARRAY_SIZE(o2nm_node_attrs); i++) { - if (attr == o2nm_node_attrs[i]) - return i; - } - BUG(); - return 0; -} - -static ssize_t o2nm_node_show(struct config_item *item, - struct configfs_attribute *attr, - char *page) -{ - struct o2nm_node *node = to_o2nm_node(item); - struct o2nm_node_attribute *o2nm_node_attr = - container_of(attr, struct o2nm_node_attribute, attr); - ssize_t ret = 0; - - if (o2nm_node_attr->show) - ret = o2nm_node_attr->show(node, page); - return ret; -} - -static ssize_t o2nm_node_store(struct config_item *item, - struct configfs_attribute *attr, - const char *page, size_t count) -{ - struct o2nm_node *node = to_o2nm_node(item); - struct o2nm_node_attribute *o2nm_node_attr = - container_of(attr, struct o2nm_node_attribute, attr); - ssize_t ret; - int attr_index = o2nm_attr_index(attr); - - if (o2nm_node_attr->store == NULL) { - ret = -EINVAL; - goto out; - } - - if (test_bit(attr_index, &node->nd_set_attributes)) - return -EBUSY; - - ret = o2nm_node_attr->store(node, page, count); - if (ret < count) - goto out; - - set_bit(attr_index, &node->nd_set_attributes); -out: - return ret; -} - static struct configfs_item_operations o2nm_node_item_ops = { .release = o2nm_node_release, - .show_attribute = o2nm_node_show, - .store_attribute = o2nm_node_store, }; static struct config_item_type o2nm_node_type = { @@ -475,12 +400,6 @@ static struct o2nm_node_group *to_o2nm_node_group(struct config_group *group) } #endif -struct o2nm_cluster_attribute { - struct configfs_attribute attr; - ssize_t (*show)(struct o2nm_cluster *, char *); - ssize_t (*store)(struct o2nm_cluster *, const char *, size_t); -}; - static ssize_t o2nm_cluster_attr_write(const char *page, ssize_t count, unsigned int *val) { @@ -501,15 +420,16 @@ static ssize_t o2nm_cluster_attr_write(const char *page, ssize_t count, return count; } -static ssize_t o2nm_cluster_attr_idle_timeout_ms_read( - struct o2nm_cluster *cluster, char *page) +static ssize_t o2nm_cluster_idle_timeout_ms_show(struct config_item *item, + char *page) { - return sprintf(page, "%u\n", cluster->cl_idle_timeout_ms); + return sprintf(page, "%u\n", to_o2nm_cluster(item)->cl_idle_timeout_ms); } -static ssize_t o2nm_cluster_attr_idle_timeout_ms_write( - struct o2nm_cluster *cluster, const char *page, size_t count) +static ssize_t o2nm_cluster_idle_timeout_ms_store(struct config_item *item, + const char *page, size_t count) { + struct o2nm_cluster *cluster = to_o2nm_cluster(item); ssize_t ret; unsigned int val; @@ -536,15 +456,17 @@ static ssize_t o2nm_cluster_attr_idle_timeout_ms_write( return ret; } -static ssize_t o2nm_cluster_attr_keepalive_delay_ms_read( - struct o2nm_cluster *cluster, char *page) +static ssize_t o2nm_cluster_keepalive_delay_ms_show( + struct config_item *item, char *page) { - return sprintf(page, "%u\n", cluster->cl_keepalive_delay_ms); + return sprintf(page, "%u\n", + to_o2nm_cluster(item)->cl_keepalive_delay_ms); } -static ssize_t o2nm_cluster_attr_keepalive_delay_ms_write( - struct o2nm_cluster *cluster, const char *page, size_t count) +static ssize_t o2nm_cluster_keepalive_delay_ms_store( + struct config_item *item, const char *page, size_t count) { + struct o2nm_cluster *cluster = to_o2nm_cluster(item); ssize_t ret; unsigned int val; @@ -571,22 +493,24 @@ static ssize_t o2nm_cluster_attr_keepalive_delay_ms_write( return ret; } -static ssize_t o2nm_cluster_attr_reconnect_delay_ms_read( - struct o2nm_cluster *cluster, char *page) +static ssize_t o2nm_cluster_reconnect_delay_ms_show( + struct config_item *item, char *page) { - return sprintf(page, "%u\n", cluster->cl_reconnect_delay_ms); + return sprintf(page, "%u\n", + to_o2nm_cluster(item)->cl_reconnect_delay_ms); } -static ssize_t o2nm_cluster_attr_reconnect_delay_ms_write( - struct o2nm_cluster *cluster, const char *page, size_t count) +static ssize_t o2nm_cluster_reconnect_delay_ms_store( + struct config_item *item, const char *page, size_t count) { return o2nm_cluster_attr_write(page, count, - &cluster->cl_reconnect_delay_ms); + &to_o2nm_cluster(item)->cl_reconnect_delay_ms); } -static ssize_t o2nm_cluster_attr_fence_method_read( - struct o2nm_cluster *cluster, char *page) +static ssize_t o2nm_cluster_fence_method_show( + struct config_item *item, char *page) { + struct o2nm_cluster *cluster = to_o2nm_cluster(item); ssize_t ret = 0; if (cluster) @@ -595,8 +519,8 @@ static ssize_t o2nm_cluster_attr_fence_method_read( return ret; } -static ssize_t o2nm_cluster_attr_fence_method_write( - struct o2nm_cluster *cluster, const char *page, size_t count) +static ssize_t o2nm_cluster_fence_method_store( + struct config_item *item, const char *page, size_t count) { unsigned int i; @@ -608,10 +532,10 @@ static ssize_t o2nm_cluster_attr_fence_method_write( continue; if (strncasecmp(page, o2nm_fence_method_desc[i], count - 1)) continue; - if (cluster->cl_fence_method != i) { + if (to_o2nm_cluster(item)->cl_fence_method != i) { printk(KERN_INFO "ocfs2: Changing fence method to %s\n", o2nm_fence_method_desc[i]); - cluster->cl_fence_method = i; + to_o2nm_cluster(item)->cl_fence_method = i; } return count; } @@ -620,79 +544,18 @@ bail: return -EINVAL; } -static struct o2nm_cluster_attribute o2nm_cluster_attr_idle_timeout_ms = { - .attr = { .ca_owner = THIS_MODULE, - .ca_name = "idle_timeout_ms", - .ca_mode = S_IRUGO | S_IWUSR }, - .show = o2nm_cluster_attr_idle_timeout_ms_read, - .store = o2nm_cluster_attr_idle_timeout_ms_write, -}; - -static struct o2nm_cluster_attribute o2nm_cluster_attr_keepalive_delay_ms = { - .attr = { .ca_owner = THIS_MODULE, - .ca_name = "keepalive_delay_ms", - .ca_mode = S_IRUGO | S_IWUSR }, - .show = o2nm_cluster_attr_keepalive_delay_ms_read, - .store = o2nm_cluster_attr_keepalive_delay_ms_write, -}; - -static struct o2nm_cluster_attribute o2nm_cluster_attr_reconnect_delay_ms = { - .attr = { .ca_owner = THIS_MODULE, - .ca_name = "reconnect_delay_ms", - .ca_mode = S_IRUGO | S_IWUSR }, - .show = o2nm_cluster_attr_reconnect_delay_ms_read, - .store = o2nm_cluster_attr_reconnect_delay_ms_write, -}; - -static struct o2nm_cluster_attribute o2nm_cluster_attr_fence_method = { - .attr = { .ca_owner = THIS_MODULE, - .ca_name = "fence_method", - .ca_mode = S_IRUGO | S_IWUSR }, - .show = o2nm_cluster_attr_fence_method_read, - .store = o2nm_cluster_attr_fence_method_write, -}; +CONFIGFS_ATTR(o2nm_cluster_, idle_timeout_ms); +CONFIGFS_ATTR(o2nm_cluster_, keepalive_delay_ms); +CONFIGFS_ATTR(o2nm_cluster_, reconnect_delay_ms); +CONFIGFS_ATTR(o2nm_cluster_, fence_method); static struct configfs_attribute *o2nm_cluster_attrs[] = { - &o2nm_cluster_attr_idle_timeout_ms.attr, - &o2nm_cluster_attr_keepalive_delay_ms.attr, - &o2nm_cluster_attr_reconnect_delay_ms.attr, - &o2nm_cluster_attr_fence_method.attr, + &o2nm_cluster_attr_idle_timeout_ms, + &o2nm_cluster_attr_keepalive_delay_ms, + &o2nm_cluster_attr_reconnect_delay_ms, + &o2nm_cluster_attr_fence_method, NULL, }; -static ssize_t o2nm_cluster_show(struct config_item *item, - struct configfs_attribute *attr, - char *page) -{ - struct o2nm_cluster *cluster = to_o2nm_cluster(item); - struct o2nm_cluster_attribute *o2nm_cluster_attr = - container_of(attr, struct o2nm_cluster_attribute, attr); - ssize_t ret = 0; - - if (o2nm_cluster_attr->show) - ret = o2nm_cluster_attr->show(cluster, page); - return ret; -} - -static ssize_t o2nm_cluster_store(struct config_item *item, - struct configfs_attribute *attr, - const char *page, size_t count) -{ - struct o2nm_cluster *cluster = to_o2nm_cluster(item); - struct o2nm_cluster_attribute *o2nm_cluster_attr = - container_of(attr, struct o2nm_cluster_attribute, attr); - ssize_t ret; - - if (o2nm_cluster_attr->store == NULL) { - ret = -EINVAL; - goto out; - } - - ret = o2nm_cluster_attr->store(cluster, page, count); - if (ret < count) - goto out; -out: - return ret; -} static struct config_item *o2nm_node_group_make_item(struct config_group *group, const char *name) @@ -773,8 +636,6 @@ static void o2nm_cluster_release(struct config_item *item) static struct configfs_item_operations o2nm_cluster_item_ops = { .release = o2nm_cluster_release, - .show_attribute = o2nm_cluster_show, - .store_attribute = o2nm_cluster_store, }; static struct config_item_type o2nm_cluster_type = { diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 3b48ac25d8a7..a03f6f433075 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -372,6 +372,8 @@ static int ocfs2_mknod(struct inode *dir, mlog_errno(status); goto leave; } + /* update inode->i_mode after mask with "umask". */ + inode->i_mode = mode; handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb, S_ISDIR(mode), diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index ebfdea78659b..e9164f09841b 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -7229,9 +7229,10 @@ leave: /* * 'security' attributes support */ -static size_t ocfs2_xattr_security_list(struct dentry *dentry, char *list, +static size_t ocfs2_xattr_security_list(const struct xattr_handler *handler, + struct dentry *dentry, char *list, size_t list_size, const char *name, - size_t name_len, int type) + size_t name_len) { const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN; const size_t total_len = prefix_len + name_len + 1; @@ -7244,8 +7245,9 @@ static size_t ocfs2_xattr_security_list(struct dentry *dentry, char *list, return total_len; } -static int ocfs2_xattr_security_get(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) +static int ocfs2_xattr_security_get(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + void *buffer, size_t size) { if (strcmp(name, "") == 0) return -EINVAL; @@ -7253,8 +7255,9 @@ static int ocfs2_xattr_security_get(struct dentry *dentry, const char *name, name, buffer, size); } -static int ocfs2_xattr_security_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, int type) +static int ocfs2_xattr_security_set(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) { if (strcmp(name, "") == 0) return -EINVAL; @@ -7319,9 +7322,10 @@ const struct xattr_handler ocfs2_xattr_security_handler = { /* * 'trusted' attributes support */ -static size_t ocfs2_xattr_trusted_list(struct dentry *dentry, char *list, +static size_t ocfs2_xattr_trusted_list(const struct xattr_handler *handler, + struct dentry *dentry, char *list, size_t list_size, const char *name, - size_t name_len, int type) + size_t name_len) { const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN; const size_t total_len = prefix_len + name_len + 1; @@ -7337,8 +7341,9 @@ static size_t ocfs2_xattr_trusted_list(struct dentry *dentry, char *list, return total_len; } -static int ocfs2_xattr_trusted_get(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) +static int ocfs2_xattr_trusted_get(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + void *buffer, size_t size) { if (strcmp(name, "") == 0) return -EINVAL; @@ -7346,8 +7351,9 @@ static int ocfs2_xattr_trusted_get(struct dentry *dentry, const char *name, name, buffer, size); } -static int ocfs2_xattr_trusted_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, int type) +static int ocfs2_xattr_trusted_set(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) { if (strcmp(name, "") == 0) return -EINVAL; @@ -7366,9 +7372,10 @@ const struct xattr_handler ocfs2_xattr_trusted_handler = { /* * 'user' attributes support */ -static size_t ocfs2_xattr_user_list(struct dentry *dentry, char *list, +static size_t ocfs2_xattr_user_list(const struct xattr_handler *handler, + struct dentry *dentry, char *list, size_t list_size, const char *name, - size_t name_len, int type) + size_t name_len) { const size_t prefix_len = XATTR_USER_PREFIX_LEN; const size_t total_len = prefix_len + name_len + 1; @@ -7385,8 +7392,9 @@ static size_t ocfs2_xattr_user_list(struct dentry *dentry, char *list, return total_len; } -static int ocfs2_xattr_user_get(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) +static int ocfs2_xattr_user_get(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + void *buffer, size_t size) { struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); @@ -7398,8 +7406,9 @@ static int ocfs2_xattr_user_get(struct dentry *dentry, const char *name, buffer, size); } -static int ocfs2_xattr_user_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, int type) +static int ocfs2_xattr_user_set(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) { struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); diff --git a/fs/pipe.c b/fs/pipe.c index 8865f7963700..42cf8ddf0e55 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -366,18 +366,17 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) int offset = buf->offset + buf->len; if (ops->can_merge && offset + chars <= PAGE_SIZE) { - int error = ops->confirm(pipe, buf); - if (error) + ret = ops->confirm(pipe, buf); + if (ret) goto out; ret = copy_page_from_iter(buf->page, offset, chars, from); if (unlikely(ret < chars)) { - error = -EFAULT; + ret = -EFAULT; goto out; } do_wakeup = 1; - buf->len += chars; - ret = chars; + buf->len += ret; if (!iov_iter_count(from)) goto out; } @@ -693,17 +692,20 @@ int create_pipe_files(struct file **res, int flags) d_instantiate(path.dentry, inode); - err = -ENFILE; f = alloc_file(&path, FMODE_WRITE, &pipefifo_fops); - if (IS_ERR(f)) + if (IS_ERR(f)) { + err = PTR_ERR(f); goto err_dentry; + } f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)); f->private_data = inode->i_pipe; res[0] = alloc_file(&path, FMODE_READ, &pipefifo_fops); - if (IS_ERR(res[0])) + if (IS_ERR(res[0])) { + err = PTR_ERR(res[0]); goto err_file; + } path_get(&path); res[0]->private_data = inode->i_pipe; diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 4fb17ded7d47..4adde1e2cbec 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -762,18 +762,21 @@ posix_acl_to_xattr(struct user_namespace *user_ns, const struct posix_acl *acl, EXPORT_SYMBOL (posix_acl_to_xattr); static int -posix_acl_xattr_get(struct dentry *dentry, const char *name, - void *value, size_t size, int type) +posix_acl_xattr_get(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + void *value, size_t size) { struct posix_acl *acl; int error; + if (strcmp(name, "") != 0) + return -EINVAL; if (!IS_POSIXACL(d_backing_inode(dentry))) return -EOPNOTSUPP; if (d_is_symlink(dentry)) return -EOPNOTSUPP; - acl = get_acl(d_backing_inode(dentry), type); + acl = get_acl(d_backing_inode(dentry), handler->flags); if (IS_ERR(acl)) return PTR_ERR(acl); if (acl == NULL) @@ -786,19 +789,22 @@ posix_acl_xattr_get(struct dentry *dentry, const char *name, } static int -posix_acl_xattr_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, int type) +posix_acl_xattr_set(const struct xattr_handler *handler, + struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) { struct inode *inode = d_backing_inode(dentry); struct posix_acl *acl = NULL; int ret; + if (strcmp(name, "") != 0) + return -EINVAL; if (!IS_POSIXACL(inode)) return -EOPNOTSUPP; if (!inode->i_op->set_acl) return -EOPNOTSUPP; - if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) + if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) return value ? -EACCES : 0; if (!inode_owner_or_capable(inode)) return -EPERM; @@ -815,28 +821,22 @@ posix_acl_xattr_set(struct dentry *dentry, const char *name, } } - ret = inode->i_op->set_acl(inode, acl, type); + ret = inode->i_op->set_acl(inode, acl, handler->flags); out: posix_acl_release(acl); return ret; } static size_t -posix_acl_xattr_list(struct dentry *dentry, char *list, size_t list_size, - const char *name, size_t name_len, int type) +posix_acl_xattr_list(const struct xattr_handler *handler, + struct dentry *dentry, char *list, size_t list_size, + const char *name, size_t name_len) { - const char *xname; + const char *xname = handler->prefix; size_t size; if (!IS_POSIXACL(d_backing_inode(dentry))) - return -EOPNOTSUPP; - if (d_is_symlink(dentry)) - return -EOPNOTSUPP; - - if (type == ACL_TYPE_ACCESS) - xname = POSIX_ACL_XATTR_ACCESS; - else - xname = POSIX_ACL_XATTR_DEFAULT; + return 0; size = strlen(xname) + 1; if (list && size <= list_size) diff --git a/fs/proc/array.c b/fs/proc/array.c index eed2050db9be..d73291f5f0fc 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -91,18 +91,18 @@ static inline void task_name(struct seq_file *m, struct task_struct *p) { char *buf; + size_t size; char tcomm[sizeof(p->comm)]; + int ret; get_task_comm(tcomm, p); seq_puts(m, "Name:\t"); - buf = m->buf + m->count; - /* Ignore error for now */ - buf += string_escape_str(tcomm, buf, m->size - m->count, - ESCAPE_SPACE | ESCAPE_SPECIAL, "\n\\"); + size = seq_get_buf(m, &buf); + ret = string_escape_str(tcomm, buf, size, ESCAPE_SPACE | ESCAPE_SPECIAL, "\n\\"); + seq_commit(m, ret < size ? ret : -1); - m->count = buf - m->buf; seq_putc(m, '\n'); } diff --git a/fs/proc/fd.c b/fs/proc/fd.c index 6e5fcd00733e..3c2a915c695a 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -291,11 +291,19 @@ static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry, */ int proc_fd_permission(struct inode *inode, int mask) { - int rv = generic_permission(inode, mask); + struct task_struct *p; + int rv; + + rv = generic_permission(inode, mask); if (rv == 0) - return 0; - if (task_tgid(current) == proc_pid(inode)) + return rv; + + rcu_read_lock(); + p = pid_task(proc_pid(inode), PIDTYPE_PID); + if (p && same_thread_group(p, current)) rv = 0; + rcu_read_unlock(); + return rv; } diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index fdda62e6115e..fe5b6e6c4671 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -948,7 +948,7 @@ static struct ctl_dir *get_subdir(struct ctl_dir *dir, found: subdir->header.nreg++; failed: - if (unlikely(IS_ERR(subdir))) { + if (IS_ERR(subdir)) { pr_err("sysctl could not get directory: "); sysctl_print_dir(dir); pr_cont("/%*.*s %ld\n", diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index 5f1c9c29eb8c..47f96988fdd4 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -712,9 +712,6 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) + REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb)); - if (!new_valid_dev(rdev)) - return -EINVAL; - retval = dquot_initialize(dir); if (retval) return retval; diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index e87f9b52bf06..66b26fdfff8d 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -778,7 +778,7 @@ reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer, if (!handler || get_inode_sd_version(d_inode(dentry)) == STAT_DATA_V1) return -EOPNOTSUPP; - return handler->get(dentry, name, buffer, size, handler->flags); + return handler->get(handler, dentry, name, buffer, size); } /* @@ -797,7 +797,7 @@ reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value, if (!handler || get_inode_sd_version(d_inode(dentry)) == STAT_DATA_V1) return -EOPNOTSUPP; - return handler->set(dentry, name, value, size, flags, handler->flags); + return handler->set(handler, dentry, name, value, size, flags); } /* @@ -814,7 +814,7 @@ int reiserfs_removexattr(struct dentry *dentry, const char *name) if (!handler || get_inode_sd_version(d_inode(dentry)) == STAT_DATA_V1) return -EOPNOTSUPP; - return handler->set(dentry, name, NULL, 0, XATTR_REPLACE, handler->flags); + return handler->set(handler, dentry, name, NULL, 0, XATTR_REPLACE); } struct listxattr_buf { @@ -842,14 +842,14 @@ static int listxattr_filler(struct dir_context *ctx, const char *name, if (!handler) /* Unsupported xattr name */ return 0; if (b->buf) { - size = handler->list(b->dentry, b->buf + b->pos, - b->size, name, namelen, - handler->flags); + size = handler->list(handler, b->dentry, + b->buf + b->pos, b->size, name, + namelen); if (size > b->size) return -ERANGE; } else { - size = handler->list(b->dentry, NULL, 0, name, - namelen, handler->flags); + size = handler->list(handler, b->dentry, + NULL, 0, name, namelen); } b->pos += size; diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c index 9a3b0616f283..ac659af431ae 100644 --- a/fs/reiserfs/xattr_security.c +++ b/fs/reiserfs/xattr_security.c @@ -9,8 +9,8 @@ #include <linux/uaccess.h> static int -security_get(struct dentry *dentry, const char *name, void *buffer, size_t size, - int handler_flags) +security_get(const struct xattr_handler *handler, struct dentry *dentry, + const char *name, void *buffer, size_t size) { if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX)) return -EINVAL; @@ -22,8 +22,8 @@ security_get(struct dentry *dentry, const char *name, void *buffer, size_t size, } static int -security_set(struct dentry *dentry, const char *name, const void *buffer, - size_t size, int flags, int handler_flags) +security_set(const struct xattr_handler *handler, struct dentry *dentry, + const char *name, const void *buffer, size_t size, int flags) { if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX)) return -EINVAL; @@ -34,8 +34,9 @@ security_set(struct dentry *dentry, const char *name, const void *buffer, return reiserfs_xattr_set(d_inode(dentry), name, buffer, size, flags); } -static size_t security_list(struct dentry *dentry, char *list, size_t list_len, - const char *name, size_t namelen, int handler_flags) +static size_t security_list(const struct xattr_handler *handler, + struct dentry *dentry, char *list, size_t list_len, + const char *name, size_t namelen) { const size_t len = namelen + 1; diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c index e4f1343714e0..a338adf1b8b4 100644 --- a/fs/reiserfs/xattr_trusted.c +++ b/fs/reiserfs/xattr_trusted.c @@ -8,8 +8,8 @@ #include <linux/uaccess.h> static int -trusted_get(struct dentry *dentry, const char *name, void *buffer, size_t size, - int handler_flags) +trusted_get(const struct xattr_handler *handler, struct dentry *dentry, + const char *name, void *buffer, size_t size) { if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX)) return -EINVAL; @@ -21,8 +21,8 @@ trusted_get(struct dentry *dentry, const char *name, void *buffer, size_t size, } static int -trusted_set(struct dentry *dentry, const char *name, const void *buffer, - size_t size, int flags, int handler_flags) +trusted_set(const struct xattr_handler *handler, struct dentry *dentry, + const char *name, const void *buffer, size_t size, int flags) { if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX)) return -EINVAL; @@ -33,8 +33,9 @@ trusted_set(struct dentry *dentry, const char *name, const void *buffer, return reiserfs_xattr_set(d_inode(dentry), name, buffer, size, flags); } -static size_t trusted_list(struct dentry *dentry, char *list, size_t list_size, - const char *name, size_t name_len, int handler_flags) +static size_t trusted_list(const struct xattr_handler *handler, + struct dentry *dentry, char *list, size_t list_size, + const char *name, size_t name_len) { const size_t len = name_len + 1; diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c index d0b08d3e5689..39c9667191c5 100644 --- a/fs/reiserfs/xattr_user.c +++ b/fs/reiserfs/xattr_user.c @@ -7,8 +7,8 @@ #include <linux/uaccess.h> static int -user_get(struct dentry *dentry, const char *name, void *buffer, size_t size, - int handler_flags) +user_get(const struct xattr_handler *handler, struct dentry *dentry, + const char *name, void *buffer, size_t size) { if (strlen(name) < sizeof(XATTR_USER_PREFIX)) @@ -19,8 +19,8 @@ user_get(struct dentry *dentry, const char *name, void *buffer, size_t size, } static int -user_set(struct dentry *dentry, const char *name, const void *buffer, - size_t size, int flags, int handler_flags) +user_set(const struct xattr_handler *handler, struct dentry *dentry, + const char *name, const void *buffer, size_t size, int flags) { if (strlen(name) < sizeof(XATTR_USER_PREFIX)) return -EINVAL; @@ -30,8 +30,9 @@ user_set(struct dentry *dentry, const char *name, const void *buffer, return reiserfs_xattr_set(d_inode(dentry), name, buffer, size, flags); } -static size_t user_list(struct dentry *dentry, char *list, size_t list_size, - const char *name, size_t name_len, int handler_flags) +static size_t user_list(const struct xattr_handler *handler, + struct dentry *dentry, char *list, size_t list_size, + const char *name, size_t name_len) { const size_t len = name_len + 1; diff --git a/fs/seq_file.c b/fs/seq_file.c index 225586e141ca..e85664b7c7d9 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -13,6 +13,7 @@ #include <linux/cred.h> #include <linux/mm.h> #include <linux/printk.h> +#include <linux/string_helpers.h> #include <asm/uaccess.h> #include <asm/page.h> @@ -25,12 +26,17 @@ static void seq_set_overflow(struct seq_file *m) static void *seq_buf_alloc(unsigned long size) { void *buf; + gfp_t gfp = GFP_KERNEL; /* - * __GFP_NORETRY to avoid oom-killings with high-order allocations - - * it's better to fall back to vmalloc() than to kill things. + * For high order allocations, use __GFP_NORETRY to avoid oom-killing - + * it's better to fall back to vmalloc() than to kill things. For small + * allocations, just use GFP_KERNEL which will oom kill, thus no need + * for vmalloc fallback. */ - buf = kmalloc(size, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); + if (size > PAGE_SIZE) + gfp |= __GFP_NORETRY | __GFP_NOWARN; + buf = kmalloc(size, gfp); if (!buf && size > PAGE_SIZE) buf = vmalloc(size); return buf; @@ -377,26 +383,12 @@ EXPORT_SYMBOL(seq_release); */ void seq_escape(struct seq_file *m, const char *s, const char *esc) { - char *end = m->buf + m->size; - char *p; - char c; + char *buf; + size_t size = seq_get_buf(m, &buf); + int ret; - for (p = m->buf + m->count; (c = *s) != '\0' && p < end; s++) { - if (!strchr(esc, c)) { - *p++ = c; - continue; - } - if (p + 3 < end) { - *p++ = '\\'; - *p++ = '0' + ((c & 0300) >> 6); - *p++ = '0' + ((c & 070) >> 3); - *p++ = '0' + (c & 07); - continue; - } - seq_set_overflow(m); - return; - } - m->count = p - m->buf; + ret = string_escape_str(s, buf, size, ESCAPE_OCTAL, esc); + seq_commit(m, ret < size ? ret : -1); } EXPORT_SYMBOL(seq_escape); @@ -773,6 +765,8 @@ void seq_hex_dump(struct seq_file *m, const char *prefix_str, int prefix_type, { const u8 *ptr = buf; int i, linelen, remaining = len; + char *buffer; + size_t size; int ret; if (rowsize != 16 && rowsize != 32) @@ -794,15 +788,12 @@ void seq_hex_dump(struct seq_file *m, const char *prefix_str, int prefix_type, break; } + size = seq_get_buf(m, &buffer); ret = hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize, - m->buf + m->count, m->size - m->count, - ascii); - if (ret >= m->size - m->count) { - seq_set_overflow(m); - } else { - m->count += ret; - seq_putc(m, '\n'); - } + buffer, size, ascii); + seq_commit(m, ret < size ? ret : -1); + + seq_putc(m, '\n'); } } EXPORT_SYMBOL(seq_hex_dump); diff --git a/fs/splice.c b/fs/splice.c index 5fc1e50a7f30..4cf700d50b40 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -360,7 +360,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, break; error = add_to_page_cache_lru(page, mapping, index, - GFP_KERNEL & mapping_gfp_mask(mapping)); + mapping_gfp_constraint(mapping, GFP_KERNEL)); if (unlikely(error)) { page_cache_release(page); if (error == -EEXIST) @@ -809,6 +809,13 @@ static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_des */ static int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd) { + /* + * Check for signal early to make process killable when there are + * always buffers available + */ + if (signal_pending(current)) + return -ERESTARTSYS; + while (!pipe->nrbufs) { if (!pipe->writers) return 0; @@ -884,6 +891,7 @@ ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd, splice_from_pipe_begin(sd); do { + cond_resched(); ret = splice_from_pipe_next(pipe, sd); if (ret > 0) ret = splice_from_pipe_feed(pipe, sd, actor); diff --git a/fs/squashfs/xattr.c b/fs/squashfs/xattr.c index e5e0ddf5b143..6a4cc344085c 100644 --- a/fs/squashfs/xattr.c +++ b/fs/squashfs/xattr.c @@ -68,8 +68,8 @@ ssize_t squashfs_listxattr(struct dentry *d, char *buffer, name_size = le16_to_cpu(entry.size); handler = squashfs_xattr_handler(le16_to_cpu(entry.type)); if (handler) - prefix_size = handler->list(d, buffer, rest, NULL, - name_size, handler->flags); + prefix_size = handler->list(handler, d, buffer, rest, + NULL, name_size); if (prefix_size) { if (buffer) { if (prefix_size + name_size + 1 > rest) { @@ -212,88 +212,68 @@ failed: } -/* - * User namespace support - */ -static size_t squashfs_user_list(struct dentry *d, char *list, size_t list_size, - const char *name, size_t name_len, int type) +static size_t squashfs_xattr_handler_list(const struct xattr_handler *handler, + struct dentry *d, char *list, + size_t list_size, const char *name, + size_t name_len) { - if (list && XATTR_USER_PREFIX_LEN <= list_size) - memcpy(list, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); - return XATTR_USER_PREFIX_LEN; + int len = strlen(handler->prefix); + + if (list && len <= list_size) + memcpy(list, handler->prefix, len); + return len; } -static int squashfs_user_get(struct dentry *d, const char *name, void *buffer, - size_t size, int type) +static int squashfs_xattr_handler_get(const struct xattr_handler *handler, + struct dentry *d, const char *name, + void *buffer, size_t size) { if (name[0] == '\0') return -EINVAL; - return squashfs_xattr_get(d_inode(d), SQUASHFS_XATTR_USER, name, + return squashfs_xattr_get(d_inode(d), handler->flags, name, buffer, size); } +/* + * User namespace support + */ static const struct xattr_handler squashfs_xattr_user_handler = { .prefix = XATTR_USER_PREFIX, - .list = squashfs_user_list, - .get = squashfs_user_get + .flags = SQUASHFS_XATTR_USER, + .list = squashfs_xattr_handler_list, + .get = squashfs_xattr_handler_get }; /* * Trusted namespace support */ -static size_t squashfs_trusted_list(struct dentry *d, char *list, - size_t list_size, const char *name, size_t name_len, int type) +static size_t squashfs_trusted_xattr_handler_list(const struct xattr_handler *handler, + struct dentry *d, char *list, + size_t list_size, const char *name, + size_t name_len) { if (!capable(CAP_SYS_ADMIN)) return 0; - - if (list && XATTR_TRUSTED_PREFIX_LEN <= list_size) - memcpy(list, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN); - return XATTR_TRUSTED_PREFIX_LEN; -} - -static int squashfs_trusted_get(struct dentry *d, const char *name, - void *buffer, size_t size, int type) -{ - if (name[0] == '\0') - return -EINVAL; - - return squashfs_xattr_get(d_inode(d), SQUASHFS_XATTR_TRUSTED, name, - buffer, size); + return squashfs_xattr_handler_list(handler, d, list, list_size, name, + name_len); } static const struct xattr_handler squashfs_xattr_trusted_handler = { .prefix = XATTR_TRUSTED_PREFIX, - .list = squashfs_trusted_list, - .get = squashfs_trusted_get + .flags = SQUASHFS_XATTR_TRUSTED, + .list = squashfs_trusted_xattr_handler_list, + .get = squashfs_xattr_handler_get }; /* * Security namespace support */ -static size_t squashfs_security_list(struct dentry *d, char *list, - size_t list_size, const char *name, size_t name_len, int type) -{ - if (list && XATTR_SECURITY_PREFIX_LEN <= list_size) - memcpy(list, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN); - return XATTR_SECURITY_PREFIX_LEN; -} - -static int squashfs_security_get(struct dentry *d, const char *name, - void *buffer, size_t size, int type) -{ - if (name[0] == '\0') - return -EINVAL; - - return squashfs_xattr_get(d_inode(d), SQUASHFS_XATTR_SECURITY, name, - buffer, size); -} - static const struct xattr_handler squashfs_xattr_security_handler = { .prefix = XATTR_SECURITY_PREFIX, - .list = squashfs_security_list, - .get = squashfs_security_get + .flags = SQUASHFS_XATTR_SECURITY, + .list = squashfs_xattr_handler_list, + .get = squashfs_xattr_handler_get }; static const struct xattr_handler *squashfs_xattr_handler(int type) diff --git a/fs/stat.c b/fs/stat.c index cccc1aab9a8b..d4a61d8dc021 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -367,8 +367,6 @@ static long cp_new_stat64(struct kstat *stat, struct stat64 __user *statbuf) INIT_STRUCT_STAT64_PADDING(tmp); #ifdef CONFIG_MIPS /* mips has weird padding, so we don't get 64 bits there */ - if (!new_valid_dev(stat->dev) || !new_valid_dev(stat->rdev)) - return -EOVERFLOW; tmp.st_dev = new_encode_dev(stat->dev); tmp.st_rdev = new_encode_dev(stat->rdev); #else diff --git a/fs/sync.c b/fs/sync.c index 4ec430ae2b0d..dd5d1711c7ac 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -348,7 +348,8 @@ SYSCALL_DEFINE4(sync_file_range, int, fd, loff_t, offset, loff_t, nbytes, } if (flags & SYNC_FILE_RANGE_WRITE) { - ret = filemap_fdatawrite_range(mapping, offset, endbyte); + ret = __filemap_fdatawrite_range(mapping, offset, endbyte, + WB_SYNC_NONE); if (ret < 0) goto out_put; } diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index e1236594fffe..dc1358b5ec95 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c @@ -73,13 +73,26 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj, } if (grp->bin_attrs) { - for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++) { + for (i = 0, bin_attr = grp->bin_attrs; *bin_attr; i++, bin_attr++) { + umode_t mode = (*bin_attr)->attr.mode; + if (update) kernfs_remove_by_name(parent, (*bin_attr)->attr.name); + if (grp->is_bin_visible) { + mode = grp->is_bin_visible(kobj, *bin_attr, i); + if (!mode) + continue; + } + + WARN(mode & ~(SYSFS_PREALLOC | 0664), + "Attribute %s: Invalid permissions 0%o\n", + (*bin_attr)->attr.name, mode); + + mode &= SYSFS_PREALLOC | 0664; error = sysfs_add_file_mode_ns(parent, &(*bin_attr)->attr, true, - (*bin_attr)->attr.mode, NULL); + mode, NULL); if (error) break; } diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 590ad9206e3f..02fa1dcc5969 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -162,15 +162,8 @@ void sysv_set_inode(struct inode *inode, dev_t rdev) inode->i_fop = &sysv_dir_operations; inode->i_mapping->a_ops = &sysv_aops; } else if (S_ISLNK(inode->i_mode)) { - if (inode->i_blocks) { - inode->i_op = &sysv_symlink_inode_operations; - inode->i_mapping->a_ops = &sysv_aops; - } else { - inode->i_op = &simple_symlink_inode_operations; - inode->i_link = (char *)SYSV_I(inode)->i_data; - nd_terminate_link(inode->i_link, inode->i_size, - sizeof(SYSV_I(inode)->i_data) - 1); - } + inode->i_op = &sysv_symlink_inode_operations; + inode->i_mapping->a_ops = &sysv_aops; } else init_special_inode(inode, inode->i_mode, rdev); } diff --git a/fs/ubifs/Kconfig b/fs/ubifs/Kconfig index ba66d508006a..7ff7712f284e 100644 --- a/fs/ubifs/Kconfig +++ b/fs/ubifs/Kconfig @@ -35,3 +35,18 @@ config UBIFS_FS_ZLIB default y help Zlib compresses better than LZO but it is slower. Say 'Y' if unsure. + +config UBIFS_ATIME_SUPPORT + bool "Access time support" if UBIFS_FS + depends on UBIFS_FS + default n + help + Originally UBIFS did not support atime, because it looked like a bad idea due + increased flash wear. This option adds atime support and it is disabled by default + to preserve the old behavior. If you enable this option, UBIFS starts updating atime, + which means that file-system read operations will cause writes (inode atime + updates). This may affect file-system performance and increase flash device wear, + so be careful. How often atime is updated depends on the selected strategy: + strictatime is the "heavy", relatime is "lighter", etc. + + If unsure, say 'N' diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 4c46a9865fa7..595ca0debe11 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -2573,7 +2573,7 @@ int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf, { int err, failing; - if (c->dbg->pc_happened) + if (dbg_is_power_cut(c)) return -EROFS; failing = power_cut_emulated(c, lnum, 1); @@ -2595,7 +2595,7 @@ int dbg_leb_change(struct ubifs_info *c, int lnum, const void *buf, { int err; - if (c->dbg->pc_happened) + if (dbg_is_power_cut(c)) return -EROFS; if (power_cut_emulated(c, lnum, 1)) return -EROFS; @@ -2611,7 +2611,7 @@ int dbg_leb_unmap(struct ubifs_info *c, int lnum) { int err; - if (c->dbg->pc_happened) + if (dbg_is_power_cut(c)) return -EROFS; if (power_cut_emulated(c, lnum, 0)) return -EROFS; @@ -2627,7 +2627,7 @@ int dbg_leb_map(struct ubifs_info *c, int lnum) { int err; - if (c->dbg->pc_happened) + if (dbg_is_power_cut(c)) return -EROFS; if (power_cut_emulated(c, lnum, 0)) return -EROFS; diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 5c27c66c224a..e49bd2808bf3 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -449,13 +449,14 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx) } out: + kfree(file->private_data); + file->private_data = NULL; + if (err != -ENOENT) { ubifs_err(c, "cannot find next direntry, error %d", err); return err; } - kfree(file->private_data); - file->private_data = NULL; /* 2 is a special value indicating that there are no more direntries */ ctx->pos = 2; return 0; @@ -787,9 +788,6 @@ static int ubifs_mknod(struct inode *dir, struct dentry *dentry, dbg_gen("dent '%pd' in dir ino %lu", dentry, dir->i_ino); - if (!new_valid_dev(rdev)) - return -EINVAL; - if (S_ISBLK(mode) || S_ISCHR(mode)) { dev = kmalloc(sizeof(union ubifs_dev_desc), GFP_NOFS); if (!dev) @@ -1188,6 +1186,9 @@ const struct inode_operations ubifs_dir_inode_operations = { .getxattr = ubifs_getxattr, .listxattr = ubifs_listxattr, .removexattr = ubifs_removexattr, +#ifdef CONFIG_UBIFS_ATIME_SUPPORT + .update_time = ubifs_update_time, +#endif }; const struct file_operations ubifs_dir_operations = { diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index a3dfe2ae79f2..0edc12856147 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1354,6 +1354,47 @@ static inline int mctime_update_needed(const struct inode *inode, return 0; } +#ifdef CONFIG_UBIFS_ATIME_SUPPORT +/** + * ubifs_update_time - update time of inode. + * @inode: inode to update + * + * This function updates time of the inode. + */ +int ubifs_update_time(struct inode *inode, struct timespec *time, + int flags) +{ + struct ubifs_inode *ui = ubifs_inode(inode); + struct ubifs_info *c = inode->i_sb->s_fs_info; + struct ubifs_budget_req req = { .dirtied_ino = 1, + .dirtied_ino_d = ALIGN(ui->data_len, 8) }; + int iflags = I_DIRTY_TIME; + int err, release; + + err = ubifs_budget_space(c, &req); + if (err) + return err; + + mutex_lock(&ui->ui_mutex); + if (flags & S_ATIME) + inode->i_atime = *time; + if (flags & S_CTIME) + inode->i_ctime = *time; + if (flags & S_MTIME) + inode->i_mtime = *time; + + if (!(inode->i_sb->s_flags & MS_LAZYTIME)) + iflags |= I_DIRTY_SYNC; + + release = ui->dirty; + __mark_inode_dirty(inode, iflags); + mutex_unlock(&ui->ui_mutex); + if (release) + ubifs_release_budget(c, &req); + return 0; +} +#endif + /** * update_ctime - update mtime and ctime of an inode. * @inode: inode to update @@ -1537,6 +1578,9 @@ static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma) if (err) return err; vma->vm_ops = &ubifs_file_vm_ops; +#ifdef CONFIG_UBIFS_ATIME_SUPPORT + file_accessed(file); +#endif return 0; } @@ -1557,6 +1601,9 @@ const struct inode_operations ubifs_file_inode_operations = { .getxattr = ubifs_getxattr, .listxattr = ubifs_listxattr, .removexattr = ubifs_removexattr, +#ifdef CONFIG_UBIFS_ATIME_SUPPORT + .update_time = ubifs_update_time, +#endif }; const struct inode_operations ubifs_symlink_inode_operations = { @@ -1568,6 +1615,9 @@ const struct inode_operations ubifs_symlink_inode_operations = { .getxattr = ubifs_getxattr, .listxattr = ubifs_listxattr, .removexattr = ubifs_removexattr, +#ifdef CONFIG_UBIFS_ATIME_SUPPORT + .update_time = ubifs_update_time, +#endif }; const struct file_operations ubifs_file_operations = { diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c index dc9f27e9d61b..9a517109da0f 100644 --- a/fs/ubifs/lpt.c +++ b/fs/ubifs/lpt.c @@ -1498,11 +1498,10 @@ static struct ubifs_nnode *dirty_cow_nnode(struct ubifs_info *c, } /* nnode is being committed, so copy it */ - n = kmalloc(sizeof(struct ubifs_nnode), GFP_NOFS); + n = kmemdup(nnode, sizeof(struct ubifs_nnode), GFP_NOFS); if (unlikely(!n)) return ERR_PTR(-ENOMEM); - memcpy(n, nnode, sizeof(struct ubifs_nnode)); n->cnext = NULL; __set_bit(DIRTY_CNODE, &n->flags); __clear_bit(COW_CNODE, &n->flags); @@ -1549,11 +1548,10 @@ static struct ubifs_pnode *dirty_cow_pnode(struct ubifs_info *c, } /* pnode is being committed, so copy it */ - p = kmalloc(sizeof(struct ubifs_pnode), GFP_NOFS); + p = kmemdup(pnode, sizeof(struct ubifs_pnode), GFP_NOFS); if (unlikely(!p)) return ERR_PTR(-ENOMEM); - memcpy(p, pnode, sizeof(struct ubifs_pnode)); p->cnext = NULL; __set_bit(DIRTY_CNODE, &p->flags); __clear_bit(COW_CNODE, &p->flags); diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h index ee7cb5ebb6e8..8ece6ca58c0b 100644 --- a/fs/ubifs/misc.h +++ b/fs/ubifs/misc.h @@ -155,13 +155,8 @@ static inline int ubifs_wbuf_sync(struct ubifs_wbuf *wbuf) */ static inline int ubifs_encode_dev(union ubifs_dev_desc *dev, dev_t rdev) { - if (new_valid_dev(rdev)) { - dev->new = cpu_to_le32(new_encode_dev(rdev)); - return sizeof(dev->new); - } else { - dev->huge = cpu_to_le64(huge_encode_dev(rdev)); - return sizeof(dev->huge); - } + dev->new = cpu_to_le32(new_encode_dev(rdev)); + return sizeof(dev->new); } /** diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index 695fc71d5244..586d59347fff 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -789,7 +789,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, corrupted_rescan: /* Re-scan the corrupted data with verbose messages */ ubifs_err(c, "corruption %d", ret); - ubifs_scan_a_node(c, buf, len, lnum, offs, 1); + ubifs_scan_a_node(c, buf, len, lnum, offs, 0); corrupted: ubifs_scanned_corruption(c, lnum, offs, buf); err = -EUCLEAN; @@ -1331,8 +1331,7 @@ void ubifs_destroy_size_tree(struct ubifs_info *c) struct size_entry *e, *n; rbtree_postorder_for_each_entry_safe(e, n, &c->size_tree, rb) { - if (e->inode) - iput(e->inode); + iput(e->inode); kfree(e); } @@ -1533,8 +1532,7 @@ int ubifs_recover_size(struct ubifs_info *c) err = fix_size_in_place(c, e); if (err) return err; - if (e->inode) - iput(e->inode); + iput(e->inode); } } diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 9547a27868ad..1fd90c079537 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -128,7 +128,10 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum) if (err) goto out_ino; - inode->i_flags |= (S_NOCMTIME | S_NOATIME); + inode->i_flags |= S_NOCMTIME; +#ifndef CONFIG_UBIFS_ATIME_SUPPORT + inode->i_flags |= S_NOATIME; +#endif set_nlink(inode, le32_to_cpu(ino->nlink)); i_uid_write(inode, le32_to_cpu(ino->uid)); i_gid_write(inode, le32_to_cpu(ino->gid)); @@ -2037,7 +2040,6 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) if (c->max_inode_sz > MAX_LFS_FILESIZE) sb->s_maxbytes = c->max_inode_sz = MAX_LFS_FILESIZE; sb->s_op = &ubifs_super_operations; - sb->s_xattr = ubifs_xattr_handlers; mutex_lock(&c->umount_mutex); err = mount_ubifs(c); @@ -2139,7 +2141,12 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags, if (err) goto out_deact; /* We do not support atime */ - sb->s_flags |= MS_ACTIVE | MS_NOATIME; + sb->s_flags |= MS_ACTIVE; +#ifndef CONFIG_UBIFS_ATIME_SUPPORT + sb->s_flags |= MS_NOATIME; +#else + ubifs_msg(c, "full atime support is enabled."); +#endif } /* 'fill_super()' opens ubi again so we must close it here */ diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index 957f5757f374..fa9a20cc60d6 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -198,11 +198,10 @@ static struct ubifs_znode *copy_znode(struct ubifs_info *c, { struct ubifs_znode *zn; - zn = kmalloc(c->max_znode_sz, GFP_NOFS); + zn = kmemdup(znode, c->max_znode_sz, GFP_NOFS); if (unlikely(!zn)) return ERR_PTR(-ENOMEM); - memcpy(zn, znode, c->max_znode_sz); zn->cnext = NULL; __set_bit(DIRTY_ZNODE, &zn->flags); __clear_bit(COW_ZNODE, &zn->flags); diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index de759022f3d6..a5697de763f5 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -858,9 +858,9 @@ struct ubifs_compressor { * @mod_dent: non-zero if the operation removes or modifies an existing * directory entry * @new_ino: non-zero if the operation adds a new inode - * @new_ino_d: now much data newly created inode contains + * @new_ino_d: how much data newly created inode contains * @dirtied_ino: how many inodes the operation makes dirty - * @dirtied_ino_d: now much data dirtied inode contains + * @dirtied_ino_d: how much data dirtied inode contains * @idx_growth: how much the index will supposedly grow * @data_growth: how much new data the operation will supposedly add * @dd_growth: how much data that makes other data dirty the operation will @@ -1470,7 +1470,6 @@ extern spinlock_t ubifs_infos_lock; extern atomic_long_t ubifs_clean_zn_cnt; extern struct kmem_cache *ubifs_inode_slab; extern const struct super_operations ubifs_super_operations; -extern const struct xattr_handler *ubifs_xattr_handlers[]; extern const struct address_space_operations ubifs_file_address_operations; extern const struct file_operations ubifs_file_operations; extern const struct inode_operations ubifs_file_inode_operations; @@ -1746,6 +1745,9 @@ int ubifs_calc_dark(const struct ubifs_info *c, int spc); /* file.c */ int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync); int ubifs_setattr(struct dentry *dentry, struct iattr *attr); +#ifdef CONFIG_UBIFS_ATIME_SUPPORT +int ubifs_update_time(struct inode *inode, struct timespec *time, int flags); +#endif /* dir.c */ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir, diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index fd65b3f1923c..e8b01b721e99 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -200,6 +200,7 @@ static int change_xattr(struct ubifs_info *c, struct inode *host, int err; struct ubifs_inode *host_ui = ubifs_inode(host); struct ubifs_inode *ui = ubifs_inode(inode); + void *buf = NULL; struct ubifs_budget_req req = { .dirtied_ino = 2, .dirtied_ino_d = ALIGN(size, 8) + ALIGN(host_ui->data_len, 8) }; @@ -208,14 +209,17 @@ static int change_xattr(struct ubifs_info *c, struct inode *host, if (err) return err; - kfree(ui->data); - ui->data = kmemdup(value, size, GFP_NOFS); - if (!ui->data) { + buf = kmemdup(value, size, GFP_NOFS); + if (!buf) { err = -ENOMEM; goto out_free; } + mutex_lock(&ui->ui_mutex); + kfree(ui->data); + ui->data = buf; inode->i_size = ui->ui_size = size; ui->data_len = size; + mutex_unlock(&ui->ui_mutex); mutex_lock(&host_ui->ui_mutex); host->i_ctime = ubifs_current_time(host); @@ -409,6 +413,7 @@ ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf, ubifs_assert(inode->i_size == ui->data_len); ubifs_assert(ubifs_inode(host)->xattr_size > ui->data_len); + mutex_lock(&ui->ui_mutex); if (buf) { /* If @buf is %NULL we are supposed to return the length */ if (ui->data_len > size) { @@ -423,6 +428,7 @@ ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf, err = ui->data_len; out_iput: + mutex_unlock(&ui->ui_mutex); iput(inode); out_unlock: kfree(xent); @@ -582,46 +588,6 @@ out_free: return err; } -static size_t security_listxattr(struct dentry *d, char *list, size_t list_size, - const char *name, size_t name_len, int flags) -{ - const int prefix_len = XATTR_SECURITY_PREFIX_LEN; - const size_t total_len = prefix_len + name_len + 1; - - if (list && total_len <= list_size) { - memcpy(list, XATTR_SECURITY_PREFIX, prefix_len); - memcpy(list + prefix_len, name, name_len); - list[prefix_len + name_len] = '\0'; - } - - return total_len; -} - -static int security_getxattr(struct dentry *d, const char *name, void *buffer, - size_t size, int flags) -{ - return ubifs_getxattr(d, name, buffer, size); -} - -static int security_setxattr(struct dentry *d, const char *name, - const void *value, size_t size, int flags, - int handler_flags) -{ - return ubifs_setxattr(d, name, value, size, flags); -} - -static const struct xattr_handler ubifs_xattr_security_handler = { - .prefix = XATTR_SECURITY_PREFIX, - .list = security_listxattr, - .get = security_getxattr, - .set = security_setxattr, -}; - -const struct xattr_handler *ubifs_xattr_handlers[] = { - &ubifs_xattr_security_handler, - NULL, -}; - static int init_xattrs(struct inode *inode, const struct xattr *xattr_array, void *fs_info) { diff --git a/fs/xattr.c b/fs/xattr.c index 072fee1258dd..9b932b95d74e 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -720,7 +720,7 @@ generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t s handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name); if (!handler) return -EOPNOTSUPP; - return handler->get(dentry, name, buffer, size, handler->flags); + return handler->get(handler, dentry, name, buffer, size); } /* @@ -735,15 +735,15 @@ generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) if (!buffer) { for_each_xattr_handler(handlers, handler) { - size += handler->list(dentry, NULL, 0, NULL, 0, - handler->flags); + size += handler->list(handler, dentry, NULL, 0, + NULL, 0); } } else { char *buf = buffer; for_each_xattr_handler(handlers, handler) { - size = handler->list(dentry, buf, buffer_size, - NULL, 0, handler->flags); + size = handler->list(handler, dentry, buf, buffer_size, + NULL, 0); if (size > buffer_size) return -ERANGE; buf += size; @@ -767,7 +767,7 @@ generic_setxattr(struct dentry *dentry, const char *name, const void *value, siz handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name); if (!handler) return -EOPNOTSUPP; - return handler->set(dentry, name, value, size, flags, handler->flags); + return handler->set(handler, dentry, name, value, size, flags); } /* @@ -782,8 +782,7 @@ generic_removexattr(struct dentry *dentry, const char *name) handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name); if (!handler) return -EOPNOTSUPP; - return handler->set(dentry, name, NULL, 0, - XATTR_REPLACE, handler->flags); + return handler->set(handler, dentry, name, NULL, 0, XATTR_REPLACE); } EXPORT_SYMBOL(generic_getxattr); @@ -791,6 +790,30 @@ EXPORT_SYMBOL(generic_listxattr); EXPORT_SYMBOL(generic_setxattr); EXPORT_SYMBOL(generic_removexattr); +/** + * xattr_full_name - Compute full attribute name from suffix + * + * @handler: handler of the xattr_handler operation + * @name: name passed to the xattr_handler operation + * + * The get and set xattr handler operations are called with the remainder of + * the attribute name after skipping the handler's prefix: for example, "foo" + * is passed to the get operation of a handler with prefix "user." to get + * attribute "user.foo". The full name is still "there" in the name though. + * + * Note: the list xattr handler operation when called from the vfs is passed a + * NULL name; some file systems use this operation internally, with varying + * semantics. + */ +const char *xattr_full_name(const struct xattr_handler *handler, + const char *name) +{ + size_t prefix_len = strlen(handler->prefix); + + return name - prefix_len; +} +EXPORT_SYMBOL(xattr_full_name); + /* * Allocate new xattr and copy in the value; but leave the name to callers. */ diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index a096841bd06c..f64639176670 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -84,6 +84,7 @@ xfs-y += xfs_aops.o \ xfs_message.o \ xfs_mount.o \ xfs_mru_cache.o \ + xfs_stats.o \ xfs_super.o \ xfs_symlink.o \ xfs_sysfs.o \ @@ -118,7 +119,6 @@ xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \ xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o -xfs-$(CONFIG_PROC_FS) += xfs_stats.o xfs-$(CONFIG_SYSCTL) += xfs_sysctl.o xfs-$(CONFIG_COMPAT) += xfs_ioctl32.o xfs-$(CONFIG_NFSD_PNFS) += xfs_pnfs.o diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c index a7a3a63bb360..686ba6fb20dd 100644 --- a/fs/xfs/kmem.c +++ b/fs/xfs/kmem.c @@ -55,8 +55,9 @@ kmem_alloc(size_t size, xfs_km_flags_t flags) return ptr; if (!(++retries % 100)) xfs_err(NULL, - "possible memory allocation deadlock in %s (mode:0x%x)", - __func__, lflags); + "%s(%u) possible memory allocation deadlock size %u in %s (mode:0x%x)", + current->comm, current->pid, + (unsigned int)size, __func__, lflags); congestion_wait(BLK_RW_ASYNC, HZ/50); } while (1); } @@ -120,8 +121,9 @@ kmem_zone_alloc(kmem_zone_t *zone, xfs_km_flags_t flags) return ptr; if (!(++retries % 100)) xfs_err(NULL, - "possible memory allocation deadlock in %s (mode:0x%x)", - __func__, lflags); + "%s(%u) possible memory allocation deadlock in %s (mode:0x%x)", + current->comm, current->pid, + __func__, lflags); congestion_wait(BLK_RW_ASYNC, HZ/50); } while (1); } diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index ffad7f20342f..3479294c1d58 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -482,7 +482,9 @@ xfs_agfl_verify( be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks) return false; } - return true; + + return xfs_log_check_lsn(mp, + be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn)); } static void @@ -651,8 +653,8 @@ xfs_alloc_ag_vextent( -((long)(args->len))); } - XFS_STATS_INC(xs_allocx); - XFS_STATS_ADD(xs_allocb, args->len); + XFS_STATS_INC(args->mp, xs_allocx); + XFS_STATS_ADD(args->mp, xs_allocb, args->len); return error; } @@ -1808,8 +1810,8 @@ xfs_free_ag_extent( if (!isfl) xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len); - XFS_STATS_INC(xs_freex); - XFS_STATS_ADD(xs_freeb, len); + XFS_STATS_INC(mp, xs_freex); + XFS_STATS_ADD(mp, xs_freeb, len); trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright); @@ -2259,9 +2261,13 @@ xfs_agf_verify( { struct xfs_agf *agf = XFS_BUF_TO_AGF(bp); - if (xfs_sb_version_hascrc(&mp->m_sb) && - !uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid)) + if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid)) return false; + if (!xfs_log_check_lsn(mp, + be64_to_cpu(XFS_BUF_TO_AGF(bp)->agf_lsn))) + return false; + } if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) && XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) && @@ -2503,7 +2509,7 @@ xfs_alloc_vextent( * Try near allocation first, then anywhere-in-ag after * the first a.g. fails. */ - if ((args->userdata == XFS_ALLOC_INITIAL_USER_DATA) && + if ((args->userdata & XFS_ALLOC_INITIAL_USER_DATA) && (mp->m_flags & XFS_MOUNT_32BITINODES)) { args->fsbno = XFS_AGB_TO_FSB(mp, ((mp->m_agfrotor / rotorstep) % @@ -2634,6 +2640,14 @@ xfs_alloc_vextent( XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno), args->len); #endif + + /* Zero the extent if we were asked to do so */ + if (args->userdata & XFS_ALLOC_USERDATA_ZERO) { + error = xfs_zero_extent(args->ip, args->fsbno, args->len); + if (error) + goto error0; + } + } xfs_perag_put(args->pag); return 0; diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index ca1c8168373a..0ecde4d5cac8 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -101,6 +101,7 @@ typedef struct xfs_alloc_arg { struct xfs_mount *mp; /* file system mount point */ struct xfs_buf *agbp; /* buffer for a.g. freelist header */ struct xfs_perag *pag; /* per-ag struct for this agno */ + struct xfs_inode *ip; /* for userdata zeroing method */ xfs_fsblock_t fsbno; /* file system block number */ xfs_agnumber_t agno; /* allocation group number */ xfs_agblock_t agbno; /* allocation group-relative block # */ @@ -120,15 +121,16 @@ typedef struct xfs_alloc_arg { char wasdel; /* set if allocation was prev delayed */ char wasfromfl; /* set if allocation is from freelist */ char isfl; /* set if is freelist blocks - !acctg */ - char userdata; /* set if this is user data */ + char userdata; /* mask defining userdata treatment */ xfs_fsblock_t firstblock; /* io first block allocated */ } xfs_alloc_arg_t; /* * Defines for userdata */ -#define XFS_ALLOC_USERDATA 1 /* allocation is for user data*/ -#define XFS_ALLOC_INITIAL_USER_DATA 2 /* special case start of file */ +#define XFS_ALLOC_USERDATA (1 << 0)/* allocation is for user data*/ +#define XFS_ALLOC_INITIAL_USER_DATA (1 << 1)/* special case start of file */ +#define XFS_ALLOC_USERDATA_ZERO (1 << 2)/* zero extent on allocation */ xfs_extlen_t xfs_alloc_longest_free_extent(struct xfs_mount *mp, struct xfs_perag *pag, xfs_extlen_t need); diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index ff065578969f..f949818fa1c7 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -125,7 +125,7 @@ xfs_attr_get( uint lock_mode; int error; - XFS_STATS_INC(xs_attr_get); + XFS_STATS_INC(ip->i_mount, xs_attr_get); if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return -EIO; @@ -209,7 +209,7 @@ xfs_attr_set( int rsvd = (flags & ATTR_ROOT) != 0; int error, err2, committed, local; - XFS_STATS_INC(xs_attr_set); + XFS_STATS_INC(mp, xs_attr_set); if (XFS_FORCED_SHUTDOWN(dp->i_mount)) return -EIO; @@ -412,7 +412,7 @@ xfs_attr_remove( xfs_fsblock_t firstblock; int error; - XFS_STATS_INC(xs_attr_remove); + XFS_STATS_INC(mp, xs_attr_remove); if (XFS_FORCED_SHUTDOWN(dp->i_mount)) return -EIO; diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 33df52d97ec7..aa187f7ba2dd 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -41,6 +41,7 @@ #include "xfs_buf_item.h" #include "xfs_cksum.h" #include "xfs_dir2.h" +#include "xfs_log.h" /* @@ -266,6 +267,8 @@ xfs_attr3_leaf_verify( return false; if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn) return false; + if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->info.lsn))) + return false; } else { if (ichdr.magic != XFS_ATTR_LEAF_MAGIC) return false; diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index f38f9bd81557..5ab95ffa4ae9 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -107,7 +107,7 @@ xfs_attr3_rmt_verify( if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt)) return false; if (be32_to_cpu(rmt->rm_offset) + - be32_to_cpu(rmt->rm_bytes) > XATTR_SIZE_MAX) + be32_to_cpu(rmt->rm_bytes) > XFS_XATTR_SIZE_MAX) return false; if (rmt->rm_owner == 0) return false; diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 8e2010d53b07..119c2422aac7 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -948,14 +948,16 @@ xfs_bmap_local_to_extents( bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0); /* - * Initialise the block and copy the data + * Initialize the block, copy the data and log the remote buffer. * - * Note: init_fn must set the buffer log item type correctly! + * The callout is responsible for logging because the remote format + * might differ from the local format and thus we don't know how much to + * log here. Note that init_fn must also set the buffer log item type + * correctly. */ init_fn(tp, bp, ip, ifp); - /* account for the change in fork size and log everything */ - xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1); + /* account for the change in fork size */ xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); xfs_bmap_local_to_extents_empty(ip, whichfork); flags |= XFS_ILOG_CORE; @@ -1435,7 +1437,7 @@ xfs_bmap_search_extents( xfs_ifork_t *ifp; /* inode fork pointer */ xfs_bmbt_rec_host_t *ep; /* extent record pointer */ - XFS_STATS_INC(xs_look_exlist); + XFS_STATS_INC(ip->i_mount, xs_look_exlist); ifp = XFS_IFORK_PTR(ip, fork); ep = xfs_bmap_search_multi_extents(ifp, bno, eofp, lastxp, gotp, prevp); @@ -1732,7 +1734,7 @@ xfs_bmap_add_extent_delay_real( ASSERT(!bma->cur || (bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); - XFS_STATS_INC(xs_add_exlist); + XFS_STATS_INC(mp, xs_add_exlist); #define LEFT r[0] #define RIGHT r[1] @@ -2286,7 +2288,7 @@ xfs_bmap_add_extent_unwritten_real( ASSERT(*idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); ASSERT(!isnullstartblock(new->br_startblock)); - XFS_STATS_INC(xs_add_exlist); + XFS_STATS_INC(mp, xs_add_exlist); #define LEFT r[0] #define RIGHT r[1] @@ -2946,7 +2948,7 @@ xfs_bmap_add_extent_hole_real( ASSERT(!bma->cur || !(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); - XFS_STATS_INC(xs_add_exlist); + XFS_STATS_INC(mp, xs_add_exlist); state = 0; if (whichfork == XFS_ATTR_FORK) @@ -3800,8 +3802,13 @@ xfs_bmap_btalloc( args.wasdel = ap->wasdel; args.isfl = 0; args.userdata = ap->userdata; - if ((error = xfs_alloc_vextent(&args))) + if (ap->userdata & XFS_ALLOC_USERDATA_ZERO) + args.ip = ap->ip; + + error = xfs_alloc_vextent(&args); + if (error) return error; + if (tryagain && args.fsbno == NULLFSBLOCK) { /* * Exact allocation failed. Now try with alignment @@ -4036,7 +4043,7 @@ xfs_bmapi_read( if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; - XFS_STATS_INC(xs_blk_mapr); + XFS_STATS_INC(mp, xs_blk_mapr); ifp = XFS_IFORK_PTR(ip, whichfork); @@ -4221,7 +4228,7 @@ xfs_bmapi_delay( if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; - XFS_STATS_INC(xs_blk_mapw); + XFS_STATS_INC(mp, xs_blk_mapw); if (!(ifp->if_flags & XFS_IFEXTENTS)) { error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK); @@ -4300,11 +4307,14 @@ xfs_bmapi_allocate( /* * Indicate if this is the first user data in the file, or just any - * user data. + * user data. And if it is userdata, indicate whether it needs to + * be initialised to zero during allocation. */ if (!(bma->flags & XFS_BMAPI_METADATA)) { bma->userdata = (bma->offset == 0) ? XFS_ALLOC_INITIAL_USER_DATA : XFS_ALLOC_USERDATA; + if (bma->flags & XFS_BMAPI_ZERO) + bma->userdata |= XFS_ALLOC_USERDATA_ZERO; } bma->minlen = (bma->flags & XFS_BMAPI_CONTIG) ? bma->length : 1; @@ -4419,6 +4429,17 @@ xfs_bmapi_convert_unwritten( mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN) ? XFS_EXT_NORM : XFS_EXT_UNWRITTEN; + /* + * Before insertion into the bmbt, zero the range being converted + * if required. + */ + if (flags & XFS_BMAPI_ZERO) { + error = xfs_zero_extent(bma->ip, mval->br_startblock, + mval->br_blockcount); + if (error) + return error; + } + error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx, &bma->cur, mval, bma->firstblock, bma->flist, &tmp_logflags); @@ -4512,6 +4533,18 @@ xfs_bmapi_write( ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + /* zeroing is for currently only for data extents, not metadata */ + ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) != + (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)); + /* + * we can allocate unwritten extents or pre-zero allocated blocks, + * but it makes no sense to do both at once. This would result in + * zeroing the unwritten extent twice, but it still being an + * unwritten extent.... + */ + ASSERT((flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)) != + (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)); + if (unlikely(XFS_TEST_ERROR( (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), @@ -4525,7 +4558,7 @@ xfs_bmapi_write( ifp = XFS_IFORK_PTR(ip, whichfork); - XFS_STATS_INC(xs_blk_mapw); + XFS_STATS_INC(mp, xs_blk_mapw); if (*firstblock == NULLFSBLOCK) { if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE) @@ -4718,12 +4751,12 @@ xfs_bmap_del_extent( xfs_filblks_t temp2; /* for indirect length calculations */ int state = 0; - XFS_STATS_INC(xs_del_exlist); + mp = ip->i_mount; + XFS_STATS_INC(mp, xs_del_exlist); if (whichfork == XFS_ATTR_FORK) state |= BMAP_ATTRFORK; - mp = ip->i_mount; ifp = XFS_IFORK_PTR(ip, whichfork); ASSERT((*idx >= 0) && (*idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))); @@ -5070,7 +5103,7 @@ xfs_bunmapi( *done = 1; return 0; } - XFS_STATS_INC(xs_blk_unmap); + XFS_STATS_INC(mp, xs_blk_unmap); isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip); start = bno; bno = start + len - 1; diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index 6aaa0c1c7200..a160f8a5a3fc 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -52,9 +52,9 @@ struct xfs_bmalloca { xfs_extlen_t minleft; /* amount must be left after alloc */ bool eof; /* set if allocating past last extent */ bool wasdel; /* replacing a delayed allocation */ - bool userdata;/* set if is user data */ bool aeof; /* allocated space at eof */ bool conv; /* overwriting unwritten extents */ + char userdata;/* userdata mask */ int flags; }; @@ -109,6 +109,14 @@ typedef struct xfs_bmap_free */ #define XFS_BMAPI_CONVERT 0x040 +/* + * allocate zeroed extents - this requires all newly allocated user data extents + * to be initialised to zero. It will be ignored if XFS_BMAPI_METADATA is set. + * Use in conjunction with XFS_BMAPI_CONVERT to convert unwritten extents found + * during the allocation range to zeroed written extents. + */ +#define XFS_BMAPI_ZERO 0x080 + #define XFS_BMAPI_FLAGS \ { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ { XFS_BMAPI_METADATA, "METADATA" }, \ @@ -116,7 +124,8 @@ typedef struct xfs_bmap_free { XFS_BMAPI_PREALLOC, "PREALLOC" }, \ { XFS_BMAPI_IGSTATE, "IGSTATE" }, \ { XFS_BMAPI_CONTIG, "CONTIG" }, \ - { XFS_BMAPI_CONVERT, "CONVERT" } + { XFS_BMAPI_CONVERT, "CONVERT" }, \ + { XFS_BMAPI_ZERO, "ZERO" } static inline int xfs_bmapi_aflag(int w) diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index f7d7ee7a2607..af1bbee5586e 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -32,6 +32,7 @@ #include "xfs_trace.h" #include "xfs_cksum.h" #include "xfs_alloc.h" +#include "xfs_log.h" /* * Cursor allocation zone. @@ -222,7 +223,7 @@ xfs_btree_check_ptr( * long-form btree header. * * Prior to calculting the CRC, pull the LSN out of the buffer log item and put - * it into the buffer so recovery knows what the last modifcation was that made + * it into the buffer so recovery knows what the last modification was that made * it to disk. */ void @@ -243,8 +244,14 @@ bool xfs_btree_lblock_verify_crc( struct xfs_buf *bp) { - if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + struct xfs_mount *mp = bp->b_target->bt_mount; + + if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.l.bb_lsn))) + return false; return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); + } return true; } @@ -254,7 +261,7 @@ xfs_btree_lblock_verify_crc( * short-form btree header. * * Prior to calculting the CRC, pull the LSN out of the buffer log item and put - * it into the buffer so recovery knows what the last modifcation was that made + * it into the buffer so recovery knows what the last modification was that made * it to disk. */ void @@ -275,8 +282,14 @@ bool xfs_btree_sblock_verify_crc( struct xfs_buf *bp) { - if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + struct xfs_mount *mp = bp->b_target->bt_mount; + + if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn))) + return false; return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); + } return true; } diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index 8f18bab73ea5..992dec0638f3 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -84,31 +84,38 @@ union xfs_btree_rec { /* * Generic stats interface */ -#define __XFS_BTREE_STATS_INC(type, stat) \ - XFS_STATS_INC(xs_ ## type ## _2_ ## stat) -#define XFS_BTREE_STATS_INC(cur, stat) \ +#define __XFS_BTREE_STATS_INC(mp, type, stat) \ + XFS_STATS_INC(mp, xs_ ## type ## _2_ ## stat) +#define XFS_BTREE_STATS_INC(cur, stat) \ do { \ + struct xfs_mount *__mp = cur->bc_mp; \ switch (cur->bc_btnum) { \ - case XFS_BTNUM_BNO: __XFS_BTREE_STATS_INC(abtb, stat); break; \ - case XFS_BTNUM_CNT: __XFS_BTREE_STATS_INC(abtc, stat); break; \ - case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(bmbt, stat); break; \ - case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(ibt, stat); break; \ - case XFS_BTNUM_FINO: __XFS_BTREE_STATS_INC(fibt, stat); break; \ + case XFS_BTNUM_BNO: __XFS_BTREE_STATS_INC(__mp, abtb, stat); break; \ + case XFS_BTNUM_CNT: __XFS_BTREE_STATS_INC(__mp, abtc, stat); break; \ + case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(__mp, bmbt, stat); break; \ + case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(__mp, ibt, stat); break; \ + case XFS_BTNUM_FINO: __XFS_BTREE_STATS_INC(__mp, fibt, stat); break; \ case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ } \ } while (0) -#define __XFS_BTREE_STATS_ADD(type, stat, val) \ - XFS_STATS_ADD(xs_ ## type ## _2_ ## stat, val) +#define __XFS_BTREE_STATS_ADD(mp, type, stat, val) \ + XFS_STATS_ADD(mp, xs_ ## type ## _2_ ## stat, val) #define XFS_BTREE_STATS_ADD(cur, stat, val) \ do { \ + struct xfs_mount *__mp = cur->bc_mp; \ switch (cur->bc_btnum) { \ - case XFS_BTNUM_BNO: __XFS_BTREE_STATS_ADD(abtb, stat, val); break; \ - case XFS_BTNUM_CNT: __XFS_BTREE_STATS_ADD(abtc, stat, val); break; \ - case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_ADD(bmbt, stat, val); break; \ - case XFS_BTNUM_INO: __XFS_BTREE_STATS_ADD(ibt, stat, val); break; \ - case XFS_BTNUM_FINO: __XFS_BTREE_STATS_ADD(fibt, stat, val); break; \ - case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ + case XFS_BTNUM_BNO: \ + __XFS_BTREE_STATS_ADD(__mp, abtb, stat, val); break; \ + case XFS_BTNUM_CNT: \ + __XFS_BTREE_STATS_ADD(__mp, abtc, stat, val); break; \ + case XFS_BTNUM_BMAP: \ + __XFS_BTREE_STATS_ADD(__mp, bmbt, stat, val); break; \ + case XFS_BTNUM_INO: \ + __XFS_BTREE_STATS_ADD(__mp, ibt, stat, val); break; \ + case XFS_BTNUM_FINO: \ + __XFS_BTREE_STATS_ADD(__mp, fibt, stat, val); break; \ + case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ } \ } while (0) diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index be43248a5822..e89a0f8f827c 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -39,6 +39,7 @@ #include "xfs_trace.h" #include "xfs_cksum.h" #include "xfs_buf_item.h" +#include "xfs_log.h" /* * xfs_da_btree.c @@ -150,6 +151,8 @@ xfs_da3_node_verify( return false; if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn) return false; + if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->info.lsn))) + return false; } else { if (ichdr.magic != XFS_DA_NODE_MAGIC) return false; @@ -322,6 +325,7 @@ xfs_da3_node_create( if (xfs_sb_version_hascrc(&mp->m_sb)) { struct xfs_da3_node_hdr *hdr3 = bp->b_addr; + memset(hdr3, 0, sizeof(struct xfs_da3_node_hdr)); ichdr.magic = XFS_DA3_NODE_MAGIC; hdr3->info.blkno = cpu_to_be64(bp->b_bn); hdr3->info.owner = cpu_to_be64(args->dp->i_ino); diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c index 9de401d297e5..2fb53a5c0a74 100644 --- a/fs/xfs/libxfs/xfs_dir2.c +++ b/fs/xfs/libxfs/xfs_dir2.c @@ -271,7 +271,7 @@ xfs_dir_createname( rval = xfs_dir_ino_validate(tp->t_mountp, inum); if (rval) return rval; - XFS_STATS_INC(xs_dir_create); + XFS_STATS_INC(dp->i_mount, xs_dir_create); } args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); @@ -365,7 +365,7 @@ xfs_dir_lookup( int lock_mode; ASSERT(S_ISDIR(dp->i_d.di_mode)); - XFS_STATS_INC(xs_dir_lookup); + XFS_STATS_INC(dp->i_mount, xs_dir_lookup); /* * We need to use KM_NOFS here so that lockdep will not throw false @@ -444,7 +444,7 @@ xfs_dir_removename( int v; /* type-checking value */ ASSERT(S_ISDIR(dp->i_d.di_mode)); - XFS_STATS_INC(xs_dir_remove); + XFS_STATS_INC(dp->i_mount, xs_dir_remove); args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); if (!args) diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c index 4778d1dd511a..9c10e2b8cfcb 100644 --- a/fs/xfs/libxfs/xfs_dir2_block.c +++ b/fs/xfs/libxfs/xfs_dir2_block.c @@ -34,6 +34,7 @@ #include "xfs_error.h" #include "xfs_trace.h" #include "xfs_cksum.h" +#include "xfs_log.h" /* * Local function prototypes. @@ -71,6 +72,8 @@ xfs_dir3_block_verify( return false; if (be64_to_cpu(hdr3->blkno) != bp->b_bn) return false; + if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn))) + return false; } else { if (hdr3->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) return false; diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c index 824131e71bc5..af71a84f343c 100644 --- a/fs/xfs/libxfs/xfs_dir2_data.c +++ b/fs/xfs/libxfs/xfs_dir2_data.c @@ -31,6 +31,7 @@ #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_cksum.h" +#include "xfs_log.h" /* * Check the consistency of the data block. @@ -224,6 +225,8 @@ xfs_dir3_data_verify( return false; if (be64_to_cpu(hdr3->blkno) != bp->b_bn) return false; + if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn))) + return false; } else { if (hdr3->magic != cpu_to_be32(XFS_DIR2_DATA_MAGIC)) return false; diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c index f300240ebb8d..3923e1f94697 100644 --- a/fs/xfs/libxfs/xfs_dir2_leaf.c +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c @@ -33,6 +33,7 @@ #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_cksum.h" +#include "xfs_log.h" /* * Local function declarations. @@ -164,6 +165,8 @@ xfs_dir3_leaf_verify( return false; if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn) return false; + if (!xfs_log_check_lsn(mp, be64_to_cpu(leaf3->info.lsn))) + return false; } else { if (leaf->hdr.info.magic != cpu_to_be16(magic)) return false; diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c index cc28e924545b..70b0cb2fd556 100644 --- a/fs/xfs/libxfs/xfs_dir2_node.c +++ b/fs/xfs/libxfs/xfs_dir2_node.c @@ -33,6 +33,7 @@ #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_cksum.h" +#include "xfs_log.h" /* * Function declarations. @@ -97,6 +98,8 @@ xfs_dir3_free_verify( return false; if (be64_to_cpu(hdr3->blkno) != bp->b_bn) return false; + if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn))) + return false; } else { if (hdr->magic != cpu_to_be32(XFS_DIR2_FREE_MAGIC)) return false; diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 9590a069e556..8774498ce0ff 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -60,6 +60,14 @@ struct xfs_ifork; #define XFS_SB_VERSION_MOREBITSBIT 0x8000 /* + * The size of a single extended attribute on disk is limited by + * the size of index values within the attribute entries themselves. + * These are be16 fields, so we can only support attribute data + * sizes up to 2^16 bytes in length. + */ +#define XFS_XATTR_SIZE_MAX (1 << 16) + +/* * Supported feature bit list is just all bits in the versionnum field because * we've used them all up and understand them all. Except, of course, for the * shared superblock bit, which nobody knows what it does and so is unsupported. @@ -1483,13 +1491,17 @@ struct xfs_acl { */ #define XFS_ACL_MAX_ENTRIES(mp) \ (xfs_sb_version_hascrc(&mp->m_sb) \ - ? (XATTR_SIZE_MAX - sizeof(struct xfs_acl)) / \ + ? (XFS_XATTR_SIZE_MAX - sizeof(struct xfs_acl)) / \ sizeof(struct xfs_acl_entry) \ : 25) -#define XFS_ACL_MAX_SIZE(mp) \ +#define XFS_ACL_SIZE(cnt) \ (sizeof(struct xfs_acl) + \ - sizeof(struct xfs_acl_entry) * XFS_ACL_MAX_ENTRIES((mp))) + sizeof(struct xfs_acl_entry) * cnt) + +#define XFS_ACL_MAX_SIZE(mp) \ + XFS_ACL_SIZE(XFS_ACL_MAX_ENTRIES((mp))) + /* On-disk XFS extended attribute names */ #define SGI_ACL_FILE "SGI_ACL_FILE" diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index 89689c6a43e2..b2b73a998d42 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -490,6 +490,16 @@ typedef struct xfs_swapext #define XFS_FSOP_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */ /* + * ioctl limits + */ +#ifdef XATTR_LIST_MAX +# define XFS_XATTR_LIST_MAX XATTR_LIST_MAX +#else +# define XFS_XATTR_LIST_MAX 65536 +#endif + + +/* * ioctl commands that are used by Linux filesystems */ #define XFS_IOC_GETXFLAGS FS_IOC_GETFLAGS diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 54deb2d12ac6..70c1db99f6a7 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -38,6 +38,7 @@ #include "xfs_icreate_item.h" #include "xfs_icache.h" #include "xfs_trace.h" +#include "xfs_log.h" /* @@ -2500,9 +2501,14 @@ xfs_agi_verify( struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_agi *agi = XFS_BUF_TO_AGI(bp); - if (xfs_sb_version_hascrc(&mp->m_sb) && - !uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid)) + if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (!uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid)) + return false; + if (!xfs_log_check_lsn(mp, + be64_to_cpu(XFS_BUF_TO_AGI(bp)->agi_lsn))) return false; + } + /* * Validate the magic number of the agi block. */ diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 47425140f343..a0b071d881a0 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -35,6 +35,7 @@ #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" +#include "xfs_log.h" /* * Physical superblock buffer manipulations. Shared with libxfs in userspace. @@ -163,6 +164,15 @@ xfs_mount_validate_sb( "Filesystem can not be safely mounted by this kernel."); return -EINVAL; } + } else if (xfs_sb_version_hascrc(sbp)) { + /* + * We can't read verify the sb LSN because the read verifier is + * called before the log is allocated and processed. We know the + * log is set up before write verifier (!check_version) calls, + * so just check it here. + */ + if (!xfs_log_check_lsn(mp, sbp->sb_lsn)) + return -EFSCORRUPTED; } if (xfs_sb_version_has_pquotino(sbp)) { diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c index 8f8af05b3f13..cb6fd20a4d3d 100644 --- a/fs/xfs/libxfs/xfs_symlink_remote.c +++ b/fs/xfs/libxfs/xfs_symlink_remote.c @@ -31,6 +31,7 @@ #include "xfs_cksum.h" #include "xfs_trans.h" #include "xfs_buf_item.h" +#include "xfs_log.h" /* @@ -60,6 +61,7 @@ xfs_symlink_hdr_set( if (!xfs_sb_version_hascrc(&mp->m_sb)) return 0; + memset(dsl, 0, sizeof(struct xfs_dsymlink_hdr)); dsl->sl_magic = cpu_to_be32(XFS_SYMLINK_MAGIC); dsl->sl_offset = cpu_to_be32(offset); dsl->sl_bytes = cpu_to_be32(size); @@ -116,6 +118,8 @@ xfs_symlink_verify( return false; if (dsl->sl_owner == 0) return false; + if (!xfs_log_check_lsn(mp, be64_to_cpu(dsl->sl_lsn))) + return false; return true; } @@ -183,6 +187,7 @@ xfs_symlink_local_to_remote( if (!xfs_sb_version_hascrc(&mp->m_sb)) { bp->b_ops = NULL; memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes); + xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1); return; } @@ -198,4 +203,6 @@ xfs_symlink_local_to_remote( buf = bp->b_addr; buf += xfs_symlink_hdr_set(mp, ip->i_ino, 0, ifp->if_bytes, bp); memcpy(buf, ifp->if_u1.if_data, ifp->if_bytes); + xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsymlink_hdr) + + ifp->if_bytes - 1); } diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 4b641676f258..6bb470fbb8e8 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -37,16 +37,19 @@ STATIC struct posix_acl * xfs_acl_from_disk( - struct xfs_acl *aclp, - int max_entries) + const struct xfs_acl *aclp, + int len, + int max_entries) { struct posix_acl_entry *acl_e; struct posix_acl *acl; - struct xfs_acl_entry *ace; + const struct xfs_acl_entry *ace; unsigned int count, i; + if (len < sizeof(*aclp)) + return ERR_PTR(-EFSCORRUPTED); count = be32_to_cpu(aclp->acl_cnt); - if (count > max_entries) + if (count > max_entries || XFS_ACL_SIZE(count) != len) return ERR_PTR(-EFSCORRUPTED); acl = posix_acl_alloc(count, GFP_KERNEL); @@ -160,10 +163,11 @@ xfs_get_acl(struct inode *inode, int type) */ if (error == -ENOATTR) goto out_update_cache; + acl = ERR_PTR(error); goto out; } - acl = xfs_acl_from_disk(xfs_acl, XFS_ACL_MAX_ENTRIES(ip->i_mount)); + acl = xfs_acl_from_disk(xfs_acl, len, XFS_ACL_MAX_ENTRIES(ip->i_mount)); if (IS_ERR(acl)) goto out; diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h index 3841b07f27bf..52f8255d6bdf 100644 --- a/fs/xfs/xfs_acl.h +++ b/fs/xfs/xfs_acl.h @@ -20,7 +20,6 @@ struct inode; struct posix_acl; -struct xfs_inode; #ifdef CONFIG_XFS_POSIX_ACL extern struct posix_acl *xfs_get_acl(struct inode *inode, int type); @@ -36,4 +35,7 @@ static inline struct posix_acl *xfs_get_acl(struct inode *inode, int type) # define posix_acl_access_exists(inode) 0 # define posix_acl_default_exists(inode) 0 #endif /* CONFIG_XFS_POSIX_ACL */ + +extern void xfs_forget_acl(struct inode *inode, const char *name, int xflags); + #endif /* __XFS_ACL_H__ */ diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 50ab2879b9da..29e7e5dd5178 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -172,6 +172,12 @@ xfs_setfilesize_ioend( current_set_flags_nested(&tp->t_pflags, PF_FSTRANS); __sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS); + /* we abort the update if there was an IO error */ + if (ioend->io_error) { + xfs_trans_cancel(tp); + return ioend->io_error; + } + return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size); } @@ -212,14 +218,17 @@ xfs_end_io( ioend->io_error = -EIO; goto done; } - if (ioend->io_error) - goto done; /* * For unwritten extents we need to issue transactions to convert a * range to normal written extens after the data I/O has finished. + * Detecting and handling completion IO errors is done individually + * for each case as different cleanup operations need to be performed + * on error. */ if (ioend->io_type == XFS_IO_UNWRITTEN) { + if (ioend->io_error) + goto done; error = xfs_iomap_write_unwritten(ip, ioend->io_offset, ioend->io_size); } else if (ioend->io_append_trans) { @@ -1250,13 +1259,28 @@ xfs_vm_releasepage( * the DIO. There is only going to be one reference to the ioend and its life * cycle is constrained by the DIO completion code. hence we don't need * reference counting here. + * + * Note that for DIO, an IO to the highest supported file block offset (i.e. + * 2^63 - 1FSB bytes) will result in the offset + count overflowing a signed 64 + * bit variable. Hence if we see this overflow, we have to assume that the IO is + * extending the file size. We won't know for sure until IO completion is run + * and the actual max write offset is communicated to the IO completion + * routine. + * + * For DAX page faults, we are preparing to never see unwritten extents here, + * nor should we ever extend the inode size. Hence we will soon have nothing to + * do here for this case, ensuring we don't have to provide an IO completion + * callback to free an ioend that we don't actually need for a fault into the + * page at offset (2^63 - 1FSB) bytes. */ + static void xfs_map_direct( struct inode *inode, struct buffer_head *bh_result, struct xfs_bmbt_irec *imap, - xfs_off_t offset) + xfs_off_t offset, + bool dax_fault) { struct xfs_ioend *ioend; xfs_off_t size = bh_result->b_size; @@ -1269,6 +1293,13 @@ xfs_map_direct( trace_xfs_gbmap_direct(XFS_I(inode), offset, size, type, imap); + if (dax_fault) { + ASSERT(type == XFS_IO_OVERWRITE); + trace_xfs_gbmap_direct_none(XFS_I(inode), offset, size, type, + imap); + return; + } + if (bh_result->b_private) { ioend = bh_result->b_private; ASSERT(ioend->io_size > 0); @@ -1283,7 +1314,8 @@ xfs_map_direct( ioend->io_size, ioend->io_type, imap); } else if (type == XFS_IO_UNWRITTEN || - offset + size > i_size_read(inode)) { + offset + size > i_size_read(inode) || + offset + size < 0) { ioend = xfs_alloc_ioend(inode, type); ioend->io_offset = offset; ioend->io_size = size; @@ -1345,7 +1377,8 @@ __xfs_get_blocks( sector_t iblock, struct buffer_head *bh_result, int create, - bool direct) + bool direct, + bool dax_fault) { struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; @@ -1393,18 +1426,20 @@ __xfs_get_blocks( if (error) goto out_unlock; + /* for DAX, we convert unwritten extents directly */ if (create && (!nimaps || (imap.br_startblock == HOLESTARTBLOCK || - imap.br_startblock == DELAYSTARTBLOCK))) { + imap.br_startblock == DELAYSTARTBLOCK) || + (IS_DAX(inode) && ISUNWRITTEN(&imap)))) { if (direct || xfs_get_extsz_hint(ip)) { /* - * Drop the ilock in preparation for starting the block - * allocation transaction. It will be retaken - * exclusively inside xfs_iomap_write_direct for the - * actual allocation. + * xfs_iomap_write_direct() expects the shared lock. It + * is unlocked on return. */ - xfs_iunlock(ip, lockmode); + if (lockmode == XFS_ILOCK_EXCL) + xfs_ilock_demote(ip, lockmode); + error = xfs_iomap_write_direct(ip, offset, size, &imap, nimaps); if (error) @@ -1441,6 +1476,12 @@ __xfs_get_blocks( goto out_unlock; } + if (IS_DAX(inode) && create) { + ASSERT(!ISUNWRITTEN(&imap)); + /* zeroing is not needed at a higher layer */ + new = 0; + } + /* trim mapping down to size requested */ if (direct || size > (1 << inode->i_blkbits)) xfs_map_trim_size(inode, iblock, bh_result, @@ -1458,7 +1499,8 @@ __xfs_get_blocks( set_buffer_unwritten(bh_result); /* direct IO needs special help */ if (create && direct) - xfs_map_direct(inode, bh_result, &imap, offset); + xfs_map_direct(inode, bh_result, &imap, offset, + dax_fault); } /* @@ -1505,7 +1547,7 @@ xfs_get_blocks( struct buffer_head *bh_result, int create) { - return __xfs_get_blocks(inode, iblock, bh_result, create, false); + return __xfs_get_blocks(inode, iblock, bh_result, create, false, false); } int @@ -1515,7 +1557,17 @@ xfs_get_blocks_direct( struct buffer_head *bh_result, int create) { - return __xfs_get_blocks(inode, iblock, bh_result, create, true); + return __xfs_get_blocks(inode, iblock, bh_result, create, true, false); +} + +int +xfs_get_blocks_dax_fault( + struct inode *inode, + sector_t iblock, + struct buffer_head *bh_result, + int create) +{ + return __xfs_get_blocks(inode, iblock, bh_result, create, true, true); } static void @@ -1614,45 +1666,6 @@ xfs_end_io_direct_write( __xfs_end_io_direct_write(inode, ioend, offset, size); } -/* - * For DAX we need a mapping buffer callback for unwritten extent conversion - * when page faults allocate blocks and then zero them. Note that in this - * case the mapping indicated by the ioend may extend beyond EOF. We most - * definitely do not want to extend EOF here, so we trim back the ioend size to - * EOF. - */ -#ifdef CONFIG_FS_DAX -void -xfs_end_io_dax_write( - struct buffer_head *bh, - int uptodate) -{ - struct xfs_ioend *ioend = bh->b_private; - struct inode *inode = ioend->io_inode; - ssize_t size = ioend->io_size; - - ASSERT(IS_DAX(ioend->io_inode)); - - /* if there was an error zeroing, then don't convert it */ - if (!uptodate) - ioend->io_error = -EIO; - - /* - * Trim update to EOF, so we don't extend EOF during unwritten extent - * conversion of partial EOF blocks. - */ - spin_lock(&XFS_I(inode)->i_flags_lock); - if (ioend->io_offset + size > i_size_read(inode)) - size = i_size_read(inode) - ioend->io_offset; - spin_unlock(&XFS_I(inode)->i_flags_lock); - - __xfs_end_io_direct_write(inode, ioend, ioend->io_offset, size); - -} -#else -void xfs_end_io_dax_write(struct buffer_head *bh, int uptodate) { } -#endif - static inline ssize_t xfs_vm_do_dio( struct inode *inode, diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h index 86afd1ac7895..f6ffc9ae5ceb 100644 --- a/fs/xfs/xfs_aops.h +++ b/fs/xfs/xfs_aops.h @@ -58,7 +58,8 @@ int xfs_get_blocks(struct inode *inode, sector_t offset, struct buffer_head *map_bh, int create); int xfs_get_blocks_direct(struct inode *inode, sector_t offset, struct buffer_head *map_bh, int create); -void xfs_end_io_dax_write(struct buffer_head *bh, int uptodate); +int xfs_get_blocks_dax_fault(struct inode *inode, sector_t offset, + struct buffer_head *map_bh, int create); extern void xfs_count_page_state(struct page *, int *, int *); diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c index 65fb37a18e92..0ef7c2ed3f8a 100644 --- a/fs/xfs/xfs_attr_list.c +++ b/fs/xfs/xfs_attr_list.c @@ -511,7 +511,7 @@ xfs_attr_list_int( xfs_inode_t *dp = context->dp; uint lock_mode; - XFS_STATS_INC(xs_attr_list); + XFS_STATS_INC(dp->i_mount, xs_attr_list); if (XFS_FORCED_SHUTDOWN(dp->i_mount)) return -EIO; diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 3bf4ad0d19e4..dbae6490a79a 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -57,6 +57,35 @@ xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb) } /* + * Routine to zero an extent on disk allocated to the specific inode. + * + * The VFS functions take a linearised filesystem block offset, so we have to + * convert the sparse xfs fsb to the right format first. + * VFS types are real funky, too. + */ +int +xfs_zero_extent( + struct xfs_inode *ip, + xfs_fsblock_t start_fsb, + xfs_off_t count_fsb) +{ + struct xfs_mount *mp = ip->i_mount; + xfs_daddr_t sector = xfs_fsb_to_db(ip, start_fsb); + sector_t block = XFS_BB_TO_FSBT(mp, sector); + ssize_t size = XFS_FSB_TO_B(mp, count_fsb); + + if (IS_DAX(VFS_I(ip))) + return dax_clear_blocks(VFS_I(ip), block, size); + + /* + * let the block layer decide on the fastest method of + * implementing the zeroing. + */ + return sb_issue_zeroout(mp->m_super, block, count_fsb, GFP_NOFS); + +} + +/* * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi * caller. Frees all the extents that need freeing, which must be done * last due to locking considerations. We never free any extents in @@ -229,6 +258,13 @@ xfs_bmap_rtalloc( xfs_trans_mod_dquot_byino(ap->tp, ap->ip, ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT : XFS_TRANS_DQ_RTBCOUNT, (long) ralen); + + /* Zero the extent if we were asked to do so */ + if (ap->userdata & XFS_ALLOC_USERDATA_ZERO) { + error = xfs_zero_extent(ap->ip, ap->blkno, ap->length); + if (error) + return error; + } } else { ap->length = 0; } @@ -1027,7 +1063,7 @@ xfs_alloc_file_space( xfs_bmap_init(&free_list, &firstfsb); error = xfs_bmapi_write(tp, ip, startoffset_fsb, allocatesize_fsb, alloc_type, &firstfsb, - 0, imapp, &nimaps, &free_list); + resblks, imapp, &nimaps, &free_list); if (error) { goto error0; } diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 8ecffb35935b..3243cdf97f33 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -201,7 +201,7 @@ _xfs_buf_alloc( atomic_set(&bp->b_pin_count, 0); init_waitqueue_head(&bp->b_waiters); - XFS_STATS_INC(xb_create); + XFS_STATS_INC(target->bt_mount, xb_create); trace_xfs_buf_init(bp, _RET_IP_); return bp; @@ -354,15 +354,16 @@ retry: */ if (!(++retries % 100)) xfs_err(NULL, - "possible memory allocation deadlock in %s (mode:0x%x)", + "%s(%u) possible memory allocation deadlock in %s (mode:0x%x)", + current->comm, current->pid, __func__, gfp_mask); - XFS_STATS_INC(xb_page_retries); + XFS_STATS_INC(bp->b_target->bt_mount, xb_page_retries); congestion_wait(BLK_RW_ASYNC, HZ/50); goto retry; } - XFS_STATS_INC(xb_page_found); + XFS_STATS_INC(bp->b_target->bt_mount, xb_page_found); nbytes = min_t(size_t, size, PAGE_SIZE - offset); size -= nbytes; @@ -516,7 +517,7 @@ _xfs_buf_find( new_bp->b_pag = pag; spin_unlock(&pag->pag_buf_lock); } else { - XFS_STATS_INC(xb_miss_locked); + XFS_STATS_INC(btp->bt_mount, xb_miss_locked); spin_unlock(&pag->pag_buf_lock); xfs_perag_put(pag); } @@ -529,11 +530,11 @@ found: if (!xfs_buf_trylock(bp)) { if (flags & XBF_TRYLOCK) { xfs_buf_rele(bp); - XFS_STATS_INC(xb_busy_locked); + XFS_STATS_INC(btp->bt_mount, xb_busy_locked); return NULL; } xfs_buf_lock(bp); - XFS_STATS_INC(xb_get_locked_waited); + XFS_STATS_INC(btp->bt_mount, xb_get_locked_waited); } /* @@ -549,7 +550,7 @@ found: } trace_xfs_buf_find(bp, flags, _RET_IP_); - XFS_STATS_INC(xb_get_locked); + XFS_STATS_INC(btp->bt_mount, xb_get_locked); return bp; } @@ -603,7 +604,7 @@ found: } } - XFS_STATS_INC(xb_get); + XFS_STATS_INC(target->bt_mount, xb_get); trace_xfs_buf_get(bp, flags, _RET_IP_); return bp; } @@ -643,7 +644,7 @@ xfs_buf_read_map( trace_xfs_buf_read(bp, flags, _RET_IP_); if (!XFS_BUF_ISDONE(bp)) { - XFS_STATS_INC(xb_get_read); + XFS_STATS_INC(target->bt_mount, xb_get_read); bp->b_ops = ops; _xfs_buf_read(bp, flags); } else if (flags & XBF_ASYNC) { diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c index a989a9c7edb7..642d55d10075 100644 --- a/fs/xfs/xfs_dir2_readdir.c +++ b/fs/xfs/xfs_dir2_readdir.c @@ -666,7 +666,7 @@ xfs_readdir( return -EIO; ASSERT(S_ISDIR(dp->i_d.di_mode)); - XFS_STATS_INC(xs_dir_getdents); + XFS_STATS_INC(dp->i_mount, xs_dir_getdents); args.dp = dp; args.geo = dp->i_mount->m_dir_geo; diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 30cb3afb67f0..7ac6c5c586cb 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -75,9 +75,9 @@ xfs_qm_dqdestroy( ASSERT(list_empty(&dqp->q_lru)); mutex_destroy(&dqp->q_qlock); - kmem_zone_free(xfs_qm_dqzone, dqp); - XFS_STATS_DEC(xs_qm_dquot); + XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot); + kmem_zone_free(xfs_qm_dqzone, dqp); } /* @@ -605,7 +605,7 @@ xfs_qm_dqread( break; } - XFS_STATS_INC(xs_qm_dquot); + XFS_STATS_INC(mp, xs_qm_dquot); trace_xfs_dqread(dqp); @@ -747,12 +747,12 @@ restart: mutex_unlock(&qi->qi_tree_lock); trace_xfs_dqget_hit(dqp); - XFS_STATS_INC(xs_qm_dqcachehits); + XFS_STATS_INC(mp, xs_qm_dqcachehits); *O_dqpp = dqp; return 0; } mutex_unlock(&qi->qi_tree_lock); - XFS_STATS_INC(xs_qm_dqcachemisses); + XFS_STATS_INC(mp, xs_qm_dqcachemisses); /* * Dquot cache miss. We don't want to keep the inode lock across @@ -806,7 +806,7 @@ restart: mutex_unlock(&qi->qi_tree_lock); trace_xfs_dqget_dup(dqp); xfs_qm_dqdestroy(dqp); - XFS_STATS_INC(xs_qm_dquot_dups); + XFS_STATS_INC(mp, xs_qm_dquot_dups); goto restart; } @@ -846,7 +846,7 @@ xfs_qm_dqput( trace_xfs_dqput_free(dqp); if (list_lru_add(&qi->qi_lru, &dqp->q_lru)) - XFS_STATS_INC(xs_qm_dquot_unused); + XFS_STATS_INC(dqp->q_mount, xs_qm_dquot_unused); } xfs_dqunlock(dqp); } diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index e78feb400e22..f5392ab2def1 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -242,19 +242,30 @@ xfs_file_fsync( } /* - * All metadata updates are logged, which means that we just have - * to flush the log up to the latest LSN that touched the inode. + * All metadata updates are logged, which means that we just have to + * flush the log up to the latest LSN that touched the inode. If we have + * concurrent fsync/fdatasync() calls, we need them to all block on the + * log force before we clear the ili_fsync_fields field. This ensures + * that we don't get a racing sync operation that does not wait for the + * metadata to hit the journal before returning. If we race with + * clearing the ili_fsync_fields, then all that will happen is the log + * force will do nothing as the lsn will already be on disk. We can't + * race with setting ili_fsync_fields because that is done under + * XFS_ILOCK_EXCL, and that can't happen because we hold the lock shared + * until after the ili_fsync_fields is cleared. */ xfs_ilock(ip, XFS_ILOCK_SHARED); if (xfs_ipincount(ip)) { if (!datasync || - (ip->i_itemp->ili_fields & ~XFS_ILOG_TIMESTAMP)) + (ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP)) lsn = ip->i_itemp->ili_last_lsn; } - xfs_iunlock(ip, XFS_ILOCK_SHARED); - if (lsn) + if (lsn) { error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed); + ip->i_itemp->ili_fsync_fields = 0; + } + xfs_iunlock(ip, XFS_ILOCK_SHARED); /* * If we only have a single device, and the log force about was @@ -287,7 +298,7 @@ xfs_file_read_iter( xfs_fsize_t n; loff_t pos = iocb->ki_pos; - XFS_STATS_INC(xs_read_calls); + XFS_STATS_INC(mp, xs_read_calls); if (unlikely(iocb->ki_flags & IOCB_DIRECT)) ioflags |= XFS_IO_ISDIRECT; @@ -365,7 +376,7 @@ xfs_file_read_iter( ret = generic_file_read_iter(iocb, to); if (ret > 0) - XFS_STATS_ADD(xs_read_bytes, ret); + XFS_STATS_ADD(mp, xs_read_bytes, ret); xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); return ret; @@ -383,7 +394,7 @@ xfs_file_splice_read( int ioflags = 0; ssize_t ret; - XFS_STATS_INC(xs_read_calls); + XFS_STATS_INC(ip->i_mount, xs_read_calls); if (infilp->f_mode & FMODE_NOCMTIME) ioflags |= XFS_IO_INVIS; @@ -401,7 +412,7 @@ xfs_file_splice_read( else ret = generic_file_splice_read(infilp, ppos, pipe, count, flags); if (ret > 0) - XFS_STATS_ADD(xs_read_bytes, ret); + XFS_STATS_ADD(ip->i_mount, xs_read_bytes, ret); xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); return ret; @@ -482,6 +493,8 @@ xfs_zero_eof( ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); ASSERT(offset > isize); + trace_xfs_zero_eof(ip, isize, offset - isize); + /* * First handle zeroing the block on which isize resides. * @@ -574,6 +587,7 @@ xfs_file_aio_write_checks( struct xfs_inode *ip = XFS_I(inode); ssize_t error = 0; size_t count = iov_iter_count(from); + bool drained_dio = false; restart: error = generic_write_checks(iocb, from); @@ -611,12 +625,13 @@ restart: bool zero = false; spin_unlock(&ip->i_flags_lock); - if (*iolock == XFS_IOLOCK_SHARED) { - xfs_rw_iunlock(ip, *iolock); - *iolock = XFS_IOLOCK_EXCL; - xfs_rw_ilock(ip, *iolock); - iov_iter_reexpand(from, count); - + if (!drained_dio) { + if (*iolock == XFS_IOLOCK_SHARED) { + xfs_rw_iunlock(ip, *iolock); + *iolock = XFS_IOLOCK_EXCL; + xfs_rw_ilock(ip, *iolock); + iov_iter_reexpand(from, count); + } /* * We now have an IO submission barrier in place, but * AIO can do EOF updates during IO completion and hence @@ -626,6 +641,7 @@ restart: * no-op. */ inode_dio_wait(inode); + drained_dio = true; goto restart; } error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), &zero); @@ -867,7 +883,7 @@ xfs_file_write_iter( ssize_t ret; size_t ocount = iov_iter_count(from); - XFS_STATS_INC(xs_write_calls); + XFS_STATS_INC(ip->i_mount, xs_write_calls); if (ocount == 0) return 0; @@ -883,7 +899,7 @@ xfs_file_write_iter( if (ret > 0) { ssize_t err; - XFS_STATS_ADD(xs_write_bytes, ret); + XFS_STATS_ADD(ip->i_mount, xs_write_bytes, ret); /* Handle various SYNC-type writes */ err = generic_write_sync(file, iocb->ki_pos - ret, ret); @@ -1477,7 +1493,7 @@ xfs_file_llseek( * * mmap_sem (MM) * sb_start_pagefault(vfs, freeze) - * i_mmap_lock (XFS - truncate serialisation) + * i_mmaplock (XFS - truncate serialisation) * page_lock (MM) * i_lock (XFS - extent map serialisation) */ @@ -1503,10 +1519,9 @@ xfs_filemap_page_mkwrite( xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); if (IS_DAX(inode)) { - ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_direct, - xfs_end_io_dax_write); + ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_dax_fault, NULL); } else { - ret = __block_page_mkwrite(vma, vmf, xfs_get_blocks); + ret = block_page_mkwrite(vma, vmf, xfs_get_blocks); ret = block_page_mkwrite_return(ret); } @@ -1538,7 +1553,7 @@ xfs_filemap_fault( * changes to xfs_get_blocks_direct() to map unwritten extent * ioend for conversion on read-only mappings. */ - ret = __dax_fault(vma, vmf, xfs_get_blocks_direct, NULL); + ret = __dax_fault(vma, vmf, xfs_get_blocks_dax_fault, NULL); } else ret = filemap_fault(vma, vmf); xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); @@ -1546,6 +1561,13 @@ xfs_filemap_fault( return ret; } +/* + * Similar to xfs_filemap_fault(), the DAX fault path can call into here on + * both read and write faults. Hence we need to handle both cases. There is no + * ->pmd_mkwrite callout for huge pages, so we have a single function here to + * handle both cases here. @flags carries the information on the type of fault + * occuring. + */ STATIC int xfs_filemap_pmd_fault( struct vm_area_struct *vma, @@ -1562,15 +1584,54 @@ xfs_filemap_pmd_fault( trace_xfs_filemap_pmd_fault(ip); - sb_start_pagefault(inode->i_sb); - file_update_time(vma->vm_file); + if (flags & FAULT_FLAG_WRITE) { + sb_start_pagefault(inode->i_sb); + file_update_time(vma->vm_file); + } + xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); - ret = __dax_pmd_fault(vma, addr, pmd, flags, xfs_get_blocks_direct, - xfs_end_io_dax_write); + ret = __dax_pmd_fault(vma, addr, pmd, flags, xfs_get_blocks_dax_fault, + NULL); xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); - sb_end_pagefault(inode->i_sb); + if (flags & FAULT_FLAG_WRITE) + sb_end_pagefault(inode->i_sb); + + return ret; +} + +/* + * pfn_mkwrite was originally inteneded to ensure we capture time stamp + * updates on write faults. In reality, it's need to serialise against + * truncate similar to page_mkwrite. Hence we open-code dax_pfn_mkwrite() + * here and cycle the XFS_MMAPLOCK_SHARED to ensure we serialise the fault + * barrier in place. + */ +static int +xfs_filemap_pfn_mkwrite( + struct vm_area_struct *vma, + struct vm_fault *vmf) +{ + + struct inode *inode = file_inode(vma->vm_file); + struct xfs_inode *ip = XFS_I(inode); + int ret = VM_FAULT_NOPAGE; + loff_t size; + + trace_xfs_filemap_pfn_mkwrite(ip); + + sb_start_pagefault(inode->i_sb); + file_update_time(vma->vm_file); + + /* check if the faulting page hasn't raced with truncate */ + xfs_ilock(ip, XFS_MMAPLOCK_SHARED); + size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; + if (vmf->pgoff >= size) + ret = VM_FAULT_SIGBUS; + xfs_iunlock(ip, XFS_MMAPLOCK_SHARED); + sb_end_pagefault(inode->i_sb); return ret; + } static const struct vm_operations_struct xfs_file_vm_ops = { @@ -1578,6 +1639,7 @@ static const struct vm_operations_struct xfs_file_vm_ops = { .pmd_fault = xfs_filemap_pmd_fault, .map_pages = filemap_map_pages, .page_mkwrite = xfs_filemap_page_mkwrite, + .pfn_mkwrite = xfs_filemap_pfn_mkwrite, }; STATIC int diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 0a326bd64d4e..d7a490f24ead 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -63,7 +63,7 @@ xfs_inode_alloc( return NULL; } - XFS_STATS_INC(vn_active); + XFS_STATS_INC(mp, vn_active); ASSERT(atomic_read(&ip->i_pincount) == 0); ASSERT(!spin_is_locked(&ip->i_flags_lock)); ASSERT(!xfs_isiflocked(ip)); @@ -129,7 +129,7 @@ xfs_inode_free( /* asserts to verify all state is correct here */ ASSERT(atomic_read(&ip->i_pincount) == 0); ASSERT(!xfs_isiflocked(ip)); - XFS_STATS_DEC(vn_active); + XFS_STATS_DEC(ip->i_mount, vn_active); call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); } @@ -159,7 +159,7 @@ xfs_iget_cache_hit( spin_lock(&ip->i_flags_lock); if (ip->i_ino != ino) { trace_xfs_iget_skip(ip); - XFS_STATS_INC(xs_ig_frecycle); + XFS_STATS_INC(mp, xs_ig_frecycle); error = -EAGAIN; goto out_error; } @@ -177,7 +177,7 @@ xfs_iget_cache_hit( */ if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) { trace_xfs_iget_skip(ip); - XFS_STATS_INC(xs_ig_frecycle); + XFS_STATS_INC(mp, xs_ig_frecycle); error = -EAGAIN; goto out_error; } @@ -259,7 +259,7 @@ xfs_iget_cache_hit( xfs_ilock(ip, lock_flags); xfs_iflags_clear(ip, XFS_ISTALE | XFS_IDONTCACHE); - XFS_STATS_INC(xs_ig_found); + XFS_STATS_INC(mp, xs_ig_found); return 0; @@ -342,7 +342,7 @@ xfs_iget_cache_miss( error = radix_tree_insert(&pag->pag_ici_root, agino, ip); if (unlikely(error)) { WARN_ON(error != -EEXIST); - XFS_STATS_INC(xs_ig_dup); + XFS_STATS_INC(mp, xs_ig_dup); error = -EAGAIN; goto out_preload_end; } @@ -412,7 +412,7 @@ xfs_iget( if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) return -EINVAL; - XFS_STATS_INC(xs_ig_attempts); + XFS_STATS_INC(mp, xs_ig_attempts); /* get the perag structure and ensure that it's inode capable */ pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino)); @@ -429,7 +429,7 @@ again: goto out_error_or_again; } else { rcu_read_unlock(); - XFS_STATS_INC(xs_ig_missed); + XFS_STATS_INC(mp, xs_ig_missed); error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, flags, lock_flags); @@ -965,7 +965,7 @@ reclaim: xfs_ifunlock(ip); xfs_iunlock(ip, XFS_ILOCK_EXCL); - XFS_STATS_INC(xs_ig_reclaims); + XFS_STATS_INC(ip->i_mount, xs_ig_reclaims); /* * Remove the inode from the per-AG radix tree. * diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index dc40a6d5ae0d..8ee393996b7d 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2365,6 +2365,7 @@ retry: iip->ili_last_fields = iip->ili_fields; iip->ili_fields = 0; + iip->ili_fsync_fields = 0; iip->ili_logged = 1; xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, &iip->ili_item.li_lsn); @@ -3271,8 +3272,8 @@ xfs_iflush_cluster( } if (clcount) { - XFS_STATS_INC(xs_icluster_flushcnt); - XFS_STATS_ADD(xs_icluster_flushinode, clcount); + XFS_STATS_INC(mp, xs_icluster_flushcnt); + XFS_STATS_ADD(mp, xs_icluster_flushinode, clcount); } out_free: @@ -3345,7 +3346,7 @@ xfs_iflush( struct xfs_dinode *dip; int error; - XFS_STATS_INC(xs_iflush_count); + XFS_STATS_INC(mp, xs_iflush_count); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); ASSERT(xfs_isiflocked(ip)); @@ -3560,6 +3561,7 @@ xfs_iflush_int( */ iip->ili_last_fields = iip->ili_fields; iip->ili_fields = 0; + iip->ili_fsync_fields = 0; iip->ili_logged = 1; xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 62bd80f4edd9..d14b12b8cfef 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -719,6 +719,7 @@ xfs_iflush_abort( * attempted. */ iip->ili_fields = 0; + iip->ili_fsync_fields = 0; } /* * Release the inode's flush lock since we're done with it. diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h index 488d81254e28..4c7722e325b3 100644 --- a/fs/xfs/xfs_inode_item.h +++ b/fs/xfs/xfs_inode_item.h @@ -34,6 +34,7 @@ typedef struct xfs_inode_log_item { unsigned short ili_logged; /* flushed logged data */ unsigned int ili_last_fields; /* fields when flushed */ unsigned int ili_fields; /* fields to be logged */ + unsigned int ili_fsync_fields; /* logged since last fsync */ } xfs_inode_log_item_t; static inline int xfs_inode_clean(xfs_inode_t *ip) diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index ea7d85af5310..d42738deec6d 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -40,6 +40,7 @@ #include "xfs_symlink.h" #include "xfs_trans.h" #include "xfs_pnfs.h" +#include "xfs_acl.h" #include <linux/capability.h> #include <linux/dcache.h> @@ -411,7 +412,7 @@ xfs_attrlist_by_handle( if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t))) return -EFAULT; if (al_hreq.buflen < sizeof(struct attrlist) || - al_hreq.buflen > XATTR_LIST_MAX) + al_hreq.buflen > XFS_XATTR_LIST_MAX) return -EINVAL; /* @@ -455,7 +456,7 @@ xfs_attrmulti_attr_get( unsigned char *kbuf; int error = -EFAULT; - if (*len > XATTR_SIZE_MAX) + if (*len > XFS_XATTR_SIZE_MAX) return -EINVAL; kbuf = kmem_zalloc_large(*len, KM_SLEEP); if (!kbuf) @@ -482,17 +483,22 @@ xfs_attrmulti_attr_set( __uint32_t flags) { unsigned char *kbuf; + int error; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) return -EPERM; - if (len > XATTR_SIZE_MAX) + if (len > XFS_XATTR_SIZE_MAX) return -EINVAL; kbuf = memdup_user(ubuf, len); if (IS_ERR(kbuf)) return PTR_ERR(kbuf); - return xfs_attr_set(XFS_I(inode), name, kbuf, len, flags); + error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags); + if (!error) + xfs_forget_acl(inode, name, flags); + kfree(kbuf); + return error; } int @@ -501,9 +507,14 @@ xfs_attrmulti_attr_remove( unsigned char *name, __uint32_t flags) { + int error; + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) return -EPERM; - return xfs_attr_remove(XFS_I(inode), name, flags); + error = xfs_attr_remove(XFS_I(inode), name, flags); + if (!error) + xfs_forget_acl(inode, name, flags); + return error; } STATIC int @@ -1028,7 +1039,7 @@ xfs_ioctl_setattr_xflags( xfs_diflags_to_linux(ip); xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - XFS_STATS_INC(xs_ig_attrchg); + XFS_STATS_INC(mp, xs_ig_attrchg); return 0; } diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index b88bdc85dd3d..1a05d8ae327d 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -356,7 +356,7 @@ xfs_compat_attrlist_by_handle( sizeof(compat_xfs_fsop_attrlist_handlereq_t))) return -EFAULT; if (al_hreq.buflen < sizeof(struct attrlist) || - al_hreq.buflen > XATTR_LIST_MAX) + al_hreq.buflen > XFS_XATTR_LIST_MAX) return -EINVAL; /* diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 1f86033171c8..f4f5b43cf647 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -131,20 +131,30 @@ xfs_iomap_write_direct( uint qblocks, resblks, resrtextents; int committed; int error; - - error = xfs_qm_dqattach(ip, 0); - if (error) - return error; + int lockmode; + int bmapi_flags = XFS_BMAPI_PREALLOC; rt = XFS_IS_REALTIME_INODE(ip); extsz = xfs_get_extsz_hint(ip); + lockmode = XFS_ILOCK_SHARED; /* locked by caller */ + + ASSERT(xfs_isilocked(ip, lockmode)); offset_fsb = XFS_B_TO_FSBT(mp, offset); last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); if ((offset + count) > XFS_ISIZE(ip)) { + /* + * Assert that the in-core extent list is present since this can + * call xfs_iread_extents() and we only have the ilock shared. + * This should be safe because the lock was held around a bmapi + * call in the caller and we only need it to access the in-core + * list. + */ + ASSERT(XFS_IFORK_PTR(ip, XFS_DATA_FORK)->if_flags & + XFS_IFEXTENTS); error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb); if (error) - return error; + goto out_unlock; } else { if (nmaps && (imap->br_startblock == HOLESTARTBLOCK)) last_fsb = MIN(last_fsb, (xfs_fileoff_t) @@ -174,9 +184,35 @@ xfs_iomap_write_direct( } /* + * Drop the shared lock acquired by the caller, attach the dquot if + * necessary and move on to transaction setup. + */ + xfs_iunlock(ip, lockmode); + error = xfs_qm_dqattach(ip, 0); + if (error) + return error; + + /* * Allocate and setup the transaction */ tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); + + /* + * For DAX, we do not allocate unwritten extents, but instead we zero + * the block before we commit the transaction. Ideally we'd like to do + * this outside the transaction context, but if we commit and then crash + * we may not have zeroed the blocks and this will be exposed on + * recovery of the allocation. Hence we must zero before commit. + * Further, if we are mapping unwritten extents here, we need to zero + * and convert them to written so that we don't need an unwritten extent + * callback for DAX. This also means that we need to be able to dip into + * the reserve block pool if there is no space left but we need to do + * unwritten extent conversion. + */ + if (IS_DAX(VFS_I(ip))) { + bmapi_flags = XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO; + tp->t_flags |= XFS_TRANS_RESERVE; + } error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, resrtextents); /* @@ -187,7 +223,8 @@ xfs_iomap_write_direct( return error; } - xfs_ilock(ip, XFS_ILOCK_EXCL); + lockmode = XFS_ILOCK_EXCL; + xfs_ilock(ip, lockmode); error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag); if (error) @@ -202,8 +239,8 @@ xfs_iomap_write_direct( xfs_bmap_init(&free_list, &firstfsb); nimaps = 1; error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, - XFS_BMAPI_PREALLOC, &firstfsb, 0, - imap, &nimaps, &free_list); + bmapi_flags, &firstfsb, resblks, imap, + &nimaps, &free_list); if (error) goto out_bmap_cancel; @@ -213,6 +250,7 @@ xfs_iomap_write_direct( error = xfs_bmap_finish(&tp, &free_list, &committed); if (error) goto out_bmap_cancel; + error = xfs_trans_commit(tp); if (error) goto out_unlock; @@ -229,7 +267,7 @@ xfs_iomap_write_direct( error = xfs_alert_fsblock_zero(ip, imap); out_unlock: - xfs_iunlock(ip, XFS_ILOCK_EXCL); + xfs_iunlock(ip, lockmode); return error; out_bmap_cancel: @@ -670,7 +708,7 @@ xfs_iomap_write_allocate( count_fsb = imap->br_blockcount; map_start_fsb = imap->br_startoff; - XFS_STATS_ADD(xs_xstrat_bytes, XFS_FSB_TO_B(mp, count_fsb)); + XFS_STATS_ADD(mp, xs_xstrat_bytes, XFS_FSB_TO_B(mp, count_fsb)); while (count_fsb != 0) { /* @@ -750,9 +788,9 @@ xfs_iomap_write_allocate( * pointer that the caller gave to us. */ error = xfs_bmapi_write(tp, ip, map_start_fsb, - count_fsb, 0, - &first_block, 1, - imap, &nimaps, &free_list); + count_fsb, 0, &first_block, + nres, imap, &nimaps, + &free_list); if (error) goto trans_cancel; @@ -777,7 +815,7 @@ xfs_iomap_write_allocate( if ((offset_fsb >= imap->br_startoff) && (offset_fsb < (imap->br_startoff + imap->br_blockcount))) { - XFS_STATS_INC(xs_xstrat_quick); + XFS_STATS_INC(mp, xs_xstrat_quick); return 0; } @@ -866,8 +904,8 @@ xfs_iomap_write_unwritten( xfs_bmap_init(&free_list, &firstfsb); nimaps = 1; error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, - XFS_BMAPI_CONVERT, &firstfsb, - 1, &imap, &nimaps, &free_list); + XFS_BMAPI_CONVERT, &firstfsb, resblks, + &imap, &nimaps, &free_list); if (error) goto error_on_bmapi_transaction; diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 8294132e6a3c..245268a0cdf0 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -695,7 +695,7 @@ xfs_setattr_nonsize( xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - XFS_STATS_INC(xs_ig_attrchg); + XFS_STATS_INC(mp, xs_ig_attrchg); if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(tp); @@ -922,7 +922,7 @@ xfs_setattr_size( xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - XFS_STATS_INC(xs_ig_attrchg); + XFS_STATS_INC(mp, xs_ig_attrchg); if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(tp); diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index 85f883dd6207..ec0e239a0fa9 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h @@ -171,6 +171,13 @@ struct xfs_kobj { struct completion complete; }; +struct xstats { + struct xfsstats __percpu *xs_stats; + struct xfs_kobj xs_kobj; +}; + +extern struct xstats xfsstats; + /* Kernel uid/gid conversion. These are used to convert to/from the on disk * uid_t/gid_t types to the kuid_t/kgid_t types that the kernel uses internally. * The conversion here is type only, the value will remain the same since we diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index aaadee0969c9..f52c72a1a06f 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -268,7 +268,7 @@ xlog_grant_head_wait( __set_current_state(TASK_UNINTERRUPTIBLE); spin_unlock(&head->lock); - XFS_STATS_INC(xs_sleep_logspace); + XFS_STATS_INC(log->l_mp, xs_sleep_logspace); trace_xfs_log_grant_sleep(log, tic); schedule(); @@ -379,7 +379,7 @@ xfs_log_regrant( if (XLOG_FORCED_SHUTDOWN(log)) return -EIO; - XFS_STATS_INC(xs_try_logspace); + XFS_STATS_INC(mp, xs_try_logspace); /* * This is a new transaction on the ticket, so we need to change the @@ -448,7 +448,7 @@ xfs_log_reserve( if (XLOG_FORCED_SHUTDOWN(log)) return -EIO; - XFS_STATS_INC(xs_try_logspace); + XFS_STATS_INC(mp, xs_try_logspace); ASSERT(*ticp == NULL); tic = xlog_ticket_alloc(log, unit_bytes, cnt, client, permanent, @@ -1768,7 +1768,7 @@ xlog_sync( int v2 = xfs_sb_version_haslogv2(&log->l_mp->m_sb); int size; - XFS_STATS_INC(xs_log_writes); + XFS_STATS_INC(log->l_mp, xs_log_writes); ASSERT(atomic_read(&iclog->ic_refcnt) == 0); /* Add for LR header */ @@ -1805,7 +1805,7 @@ xlog_sync( bp = iclog->ic_bp; XFS_BUF_SET_ADDR(bp, BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn))); - XFS_STATS_ADD(xs_log_blocks, BTOBB(count)); + XFS_STATS_ADD(log->l_mp, xs_log_blocks, BTOBB(count)); /* Do we need to split this write into 2 parts? */ if (XFS_BUF_ADDR(bp) + BTOBB(count) > log->l_logBBsize) { @@ -2422,11 +2422,20 @@ xlog_write( &partial_copy_len); xlog_verify_dest_ptr(log, ptr); - /* copy region */ + /* + * Copy region. + * + * Unmount records just log an opheader, so can have + * empty payloads with no data region to copy. Hence we + * only copy the payload if the vector says it has data + * to copy. + */ ASSERT(copy_len >= 0); - memcpy(ptr, reg->i_addr + copy_off, copy_len); - xlog_write_adv_cnt(&ptr, &len, &log_offset, copy_len); - + if (copy_len > 0) { + memcpy(ptr, reg->i_addr + copy_off, copy_len); + xlog_write_adv_cnt(&ptr, &len, &log_offset, + copy_len); + } copy_len += start_rec_copy + sizeof(xlog_op_header_t); record_cnt++; data_cnt += contwr ? copy_len : 0; @@ -2913,7 +2922,7 @@ restart: iclog = log->l_iclog; if (iclog->ic_state != XLOG_STATE_ACTIVE) { - XFS_STATS_INC(xs_log_noiclogs); + XFS_STATS_INC(log->l_mp, xs_log_noiclogs); /* Wait for log writes to have flushed */ xlog_wait(&log->l_flush_wait, &log->l_icloglock); @@ -3165,11 +3174,19 @@ xlog_state_switch_iclogs( } if (log->l_curr_block >= log->l_logBBsize) { + /* + * Rewind the current block before the cycle is bumped to make + * sure that the combined LSN never transiently moves forward + * when the log wraps to the next cycle. This is to support the + * unlocked sample of these fields from xlog_valid_lsn(). Most + * other cases should acquire l_icloglock. + */ + log->l_curr_block -= log->l_logBBsize; + ASSERT(log->l_curr_block >= 0); + smp_wmb(); log->l_curr_cycle++; if (log->l_curr_cycle == XLOG_HEADER_MAGIC_NUM) log->l_curr_cycle++; - log->l_curr_block -= log->l_logBBsize; - ASSERT(log->l_curr_block >= 0); } ASSERT(iclog == log->l_iclog); log->l_iclog = iclog->ic_next; @@ -3212,7 +3229,7 @@ _xfs_log_force( struct xlog_in_core *iclog; xfs_lsn_t lsn; - XFS_STATS_INC(xs_log_force); + XFS_STATS_INC(mp, xs_log_force); xlog_cil_force(log); @@ -3297,7 +3314,7 @@ maybe_sleep: spin_unlock(&log->l_icloglock); return -EIO; } - XFS_STATS_INC(xs_log_force_sleep); + XFS_STATS_INC(mp, xs_log_force_sleep); xlog_wait(&iclog->ic_force_wait, &log->l_icloglock); /* * No need to grab the log lock here since we're @@ -3362,7 +3379,7 @@ _xfs_log_force_lsn( ASSERT(lsn != 0); - XFS_STATS_INC(xs_log_force); + XFS_STATS_INC(mp, xs_log_force); lsn = xlog_cil_force_lsn(log, lsn); if (lsn == NULLCOMMITLSN) @@ -3411,7 +3428,7 @@ try_again: (XLOG_STATE_WANT_SYNC | XLOG_STATE_SYNCING))) { ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR)); - XFS_STATS_INC(xs_log_force_sleep); + XFS_STATS_INC(mp, xs_log_force_sleep); xlog_wait(&iclog->ic_prev->ic_write_wait, &log->l_icloglock); @@ -3441,7 +3458,7 @@ try_again: spin_unlock(&log->l_icloglock); return -EIO; } - XFS_STATS_INC(xs_log_force_sleep); + XFS_STATS_INC(mp, xs_log_force_sleep); xlog_wait(&iclog->ic_force_wait, &log->l_icloglock); /* * No need to grab the log lock here since we're @@ -4023,3 +4040,45 @@ xlog_iclogs_empty( return 1; } +/* + * Verify that an LSN stamped into a piece of metadata is valid. This is + * intended for use in read verifiers on v5 superblocks. + */ +bool +xfs_log_check_lsn( + struct xfs_mount *mp, + xfs_lsn_t lsn) +{ + struct xlog *log = mp->m_log; + bool valid; + + /* + * norecovery mode skips mount-time log processing and unconditionally + * resets the in-core LSN. We can't validate in this mode, but + * modifications are not allowed anyways so just return true. + */ + if (mp->m_flags & XFS_MOUNT_NORECOVERY) + return true; + + /* + * Some metadata LSNs are initialized to NULL (e.g., the agfl). This is + * handled by recovery and thus safe to ignore here. + */ + if (lsn == NULLCOMMITLSN) + return true; + + valid = xlog_valid_lsn(mp->m_log, lsn); + + /* warn the user about what's gone wrong before verifier failure */ + if (!valid) { + spin_lock(&log->l_icloglock); + xfs_warn(mp, +"Corruption warning: Metadata has LSN (%d:%d) ahead of current LSN (%d:%d). " +"Please unmount and run xfs_repair (>= v4.3) to resolve.", + CYCLE_LSN(lsn), BLOCK_LSN(lsn), + log->l_curr_cycle, log->l_curr_block); + spin_unlock(&log->l_icloglock); + } + + return valid; +} diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 09d91d3166cd..aa533a7d50f2 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -181,5 +181,6 @@ bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip); void xfs_log_work_queue(struct xfs_mount *mp); void xfs_log_worker(struct work_struct *work); void xfs_log_quiesce(struct xfs_mount *mp); +bool xfs_log_check_lsn(struct xfs_mount *, xfs_lsn_t); #endif /* __XFS_LOG_H__ */ diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 950f3f94720c..8daba7491b13 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -560,4 +560,55 @@ static inline void xlog_wait(wait_queue_head_t *wq, spinlock_t *lock) remove_wait_queue(wq, &wait); } +/* + * The LSN is valid so long as it is behind the current LSN. If it isn't, this + * means that the next log record that includes this metadata could have a + * smaller LSN. In turn, this means that the modification in the log would not + * replay. + */ +static inline bool +xlog_valid_lsn( + struct xlog *log, + xfs_lsn_t lsn) +{ + int cur_cycle; + int cur_block; + bool valid = true; + + /* + * First, sample the current lsn without locking to avoid added + * contention from metadata I/O. The current cycle and block are updated + * (in xlog_state_switch_iclogs()) and read here in a particular order + * to avoid false negatives (e.g., thinking the metadata LSN is valid + * when it is not). + * + * The current block is always rewound before the cycle is bumped in + * xlog_state_switch_iclogs() to ensure the current LSN is never seen in + * a transiently forward state. Instead, we can see the LSN in a + * transiently behind state if we happen to race with a cycle wrap. + */ + cur_cycle = ACCESS_ONCE(log->l_curr_cycle); + smp_rmb(); + cur_block = ACCESS_ONCE(log->l_curr_block); + + if ((CYCLE_LSN(lsn) > cur_cycle) || + (CYCLE_LSN(lsn) == cur_cycle && BLOCK_LSN(lsn) > cur_block)) { + /* + * If the metadata LSN appears invalid, it's possible the check + * above raced with a wrap to the next log cycle. Grab the lock + * to check for sure. + */ + spin_lock(&log->l_icloglock); + cur_cycle = log->l_curr_cycle; + cur_block = log->l_curr_block; + spin_unlock(&log->l_icloglock); + + if ((CYCLE_LSN(lsn) > cur_cycle) || + (CYCLE_LSN(lsn) == cur_cycle && BLOCK_LSN(lsn) > cur_block)) + valid = false; + } + + return valid; +} + #endif /* __XFS_LOG_PRIV_H__ */ diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 512a0945d52a..c5ecaacdd218 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -3431,7 +3431,7 @@ xlog_recover_add_to_cont_trans( * previous record. Copy the rest of the header. */ if (list_empty(&trans->r_itemq)) { - ASSERT(len < sizeof(struct xfs_trans_header)); + ASSERT(len <= sizeof(struct xfs_trans_header)); if (len > sizeof(struct xfs_trans_header)) { xfs_warn(log->l_mp, "%s: bad header length", __func__); return -EIO; @@ -4609,9 +4609,19 @@ xlog_recover( int error; /* find the tail of the log */ - if ((error = xlog_find_tail(log, &head_blk, &tail_blk))) + error = xlog_find_tail(log, &head_blk, &tail_blk); + if (error) return error; + /* + * The superblock was read before the log was available and thus the LSN + * could not be verified. Check the superblock LSN against the current + * LSN now that it's known. + */ + if (xfs_sb_version_hascrc(&log->l_mp->m_sb) && + !xfs_log_check_lsn(log->l_mp, log->l_mp->m_sb.sb_lsn)) + return -EINVAL; + if (tail_blk != head_blk) { /* There used to be a comment here: * diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c index d8b67547ab34..11792d888e4e 100644 --- a/fs/xfs/xfs_message.c +++ b/fs/xfs/xfs_message.c @@ -17,6 +17,7 @@ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_error.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" @@ -43,6 +44,7 @@ void func(const struct xfs_mount *mp, const char *fmt, ...) \ { \ struct va_format vaf; \ va_list args; \ + int level; \ \ va_start(args, fmt); \ \ @@ -51,6 +53,11 @@ void func(const struct xfs_mount *mp, const char *fmt, ...) \ \ __xfs_printk(kern_level, mp, &vaf); \ va_end(args); \ + \ + if (!kstrtoint(kern_level, 0, &level) && \ + level <= LOGLEVEL_ERR && \ + xfs_error_level >= XFS_ERRLEVEL_HIGH) \ + xfs_stack_trace(); \ } \ define_xfs_printk_level(xfs_emerg, KERN_EMERG); diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index bf92e0c037c7..bb753b359bee 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -47,6 +47,16 @@ static DEFINE_MUTEX(xfs_uuid_table_mutex); static int xfs_uuid_table_size; static uuid_t *xfs_uuid_table; +void +xfs_uuid_table_free(void) +{ + if (xfs_uuid_table_size == 0) + return; + kmem_free(xfs_uuid_table); + xfs_uuid_table = NULL; + xfs_uuid_table_size = 0; +} + /* * See if the UUID is unique among mounted XFS filesystems. * Mount fails if UUID is nil or a FS with the same UUID is already mounted. @@ -693,10 +703,15 @@ xfs_mountfs( if (error) goto out; - error = xfs_uuid_mount(mp); + error = xfs_sysfs_init(&mp->m_stats.xs_kobj, &xfs_stats_ktype, + &mp->m_kobj, "stats"); if (error) goto out_remove_sysfs; + error = xfs_uuid_mount(mp); + if (error) + goto out_del_stats; + /* * Set the minimum read and write sizes */ @@ -971,6 +986,8 @@ xfs_mountfs( xfs_da_unmount(mp); out_remove_uuid: xfs_uuid_unmount(mp); + out_del_stats: + xfs_sysfs_del(&mp->m_stats.xs_kobj); out_remove_sysfs: xfs_sysfs_del(&mp->m_kobj); out: @@ -1047,6 +1064,7 @@ xfs_unmountfs( xfs_warn(mp, "Unable to update superblock counters. " "Freespace may not be correct on next mount."); + xfs_log_unmount(mp); xfs_da_unmount(mp); xfs_uuid_unmount(mp); @@ -1056,6 +1074,7 @@ xfs_unmountfs( #endif xfs_free_perag(mp); + xfs_sysfs_del(&mp->m_stats.xs_kobj); xfs_sysfs_del(&mp->m_kobj); } diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 7999e91cd49a..b57098481c10 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -127,6 +127,7 @@ typedef struct xfs_mount { int64_t m_low_space[XFS_LOWSP_MAX]; /* low free space thresholds */ struct xfs_kobj m_kobj; + struct xstats m_stats; /* per-fs stats */ struct workqueue_struct *m_buf_workqueue; struct workqueue_struct *m_data_workqueue; @@ -312,6 +313,7 @@ typedef struct xfs_perag { int pagb_count; /* pagb slots in use */ } xfs_perag_t; +extern void xfs_uuid_table_free(void); extern int xfs_log_sbcount(xfs_mount_t *); extern __uint64_t xfs_default_resblks(xfs_mount_t *mp); extern int xfs_mountfs(xfs_mount_t *mp); @@ -336,4 +338,7 @@ extern int xfs_dev_is_read_only(struct xfs_mount *, char *); extern void xfs_set_low_space_thresholds(struct xfs_mount *); +int xfs_zero_extent(struct xfs_inode *ip, xfs_fsblock_t start_fsb, + xfs_off_t count_fsb); + #endif /* __XFS_MOUNT_H__ */ diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c index ab4a6066f7ca..dc6221942b85 100644 --- a/fs/xfs/xfs_pnfs.c +++ b/fs/xfs/xfs_pnfs.c @@ -181,6 +181,11 @@ xfs_fs_map_blocks( ASSERT(imap.br_startblock != DELAYSTARTBLOCK); if (!nimaps || imap.br_startblock == HOLESTARTBLOCK) { + /* + * xfs_iomap_write_direct() expects to take ownership of + * the shared ilock. + */ + xfs_ilock(ip, XFS_ILOCK_SHARED); error = xfs_iomap_write_direct(ip, offset, length, &imap, nimaps); if (error) diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index eac9549efd52..532ab79d38fe 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -184,7 +184,7 @@ xfs_qm_dqpurge( */ ASSERT(!list_empty(&dqp->q_lru)); list_lru_del(&qi->qi_lru, &dqp->q_lru); - XFS_STATS_DEC(xs_qm_dquot_unused); + XFS_STATS_DEC(mp, xs_qm_dquot_unused); xfs_qm_dqdestroy(dqp); return 0; @@ -448,11 +448,11 @@ xfs_qm_dquot_isolate( */ if (dqp->q_nrefs) { xfs_dqunlock(dqp); - XFS_STATS_INC(xs_qm_dqwants); + XFS_STATS_INC(dqp->q_mount, xs_qm_dqwants); trace_xfs_dqreclaim_want(dqp); list_lru_isolate(lru, &dqp->q_lru); - XFS_STATS_DEC(xs_qm_dquot_unused); + XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot_unused); return LRU_REMOVED; } @@ -496,19 +496,19 @@ xfs_qm_dquot_isolate( ASSERT(dqp->q_nrefs == 0); list_lru_isolate_move(lru, &dqp->q_lru, &isol->dispose); - XFS_STATS_DEC(xs_qm_dquot_unused); + XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot_unused); trace_xfs_dqreclaim_done(dqp); - XFS_STATS_INC(xs_qm_dqreclaims); + XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaims); return LRU_REMOVED; out_miss_busy: trace_xfs_dqreclaim_busy(dqp); - XFS_STATS_INC(xs_qm_dqreclaim_misses); + XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses); return LRU_SKIP; out_unlock_dirty: trace_xfs_dqreclaim_busy(dqp); - XFS_STATS_INC(xs_qm_dqreclaim_misses); + XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses); xfs_dqunlock(dqp); spin_lock(lru_lock); return LRU_RETRY; @@ -525,7 +525,7 @@ xfs_qm_shrink_scan( unsigned long freed; int error; - if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT)) + if ((sc->gfp_mask & (__GFP_FS|__GFP_DIRECT_RECLAIM)) != (__GFP_FS|__GFP_DIRECT_RECLAIM)) return 0; INIT_LIST_HEAD(&isol.buffers); diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c index f2240383d4bb..8686df6c7609 100644 --- a/fs/xfs/xfs_stats.c +++ b/fs/xfs/xfs_stats.c @@ -18,20 +18,21 @@ #include "xfs.h" #include <linux/proc_fs.h> -DEFINE_PER_CPU(struct xfsstats, xfsstats); +struct xstats xfsstats; -static int counter_val(int idx) +static int counter_val(struct xfsstats __percpu *stats, int idx) { int val = 0, cpu; for_each_possible_cpu(cpu) - val += *(((__u32 *)&per_cpu(xfsstats, cpu) + idx)); + val += *(((__u32 *)per_cpu_ptr(stats, cpu) + idx)); return val; } -static int xfs_stat_proc_show(struct seq_file *m, void *v) +int xfs_stats_format(struct xfsstats __percpu *stats, char *buf) { int i, j; + int len = 0; __uint64_t xs_xstrat_bytes = 0; __uint64_t xs_write_bytes = 0; __uint64_t xs_read_bytes = 0; @@ -65,54 +66,59 @@ static int xfs_stat_proc_show(struct seq_file *m, void *v) }; /* Loop over all stats groups */ + for (i = j = 0; i < ARRAY_SIZE(xstats); i++) { - seq_printf(m, "%s", xstats[i].desc); + len += snprintf(buf + len, PATH_MAX - len, "%s", + xstats[i].desc); /* inner loop does each group */ for (; j < xstats[i].endpoint; j++) - seq_printf(m, " %u", counter_val(j)); - seq_putc(m, '\n'); + len += snprintf(buf + len, PATH_MAX - len, " %u", + counter_val(stats, j)); + len += snprintf(buf + len, PATH_MAX - len, "\n"); } /* extra precision counters */ for_each_possible_cpu(i) { - xs_xstrat_bytes += per_cpu(xfsstats, i).xs_xstrat_bytes; - xs_write_bytes += per_cpu(xfsstats, i).xs_write_bytes; - xs_read_bytes += per_cpu(xfsstats, i).xs_read_bytes; + xs_xstrat_bytes += per_cpu_ptr(stats, i)->xs_xstrat_bytes; + xs_write_bytes += per_cpu_ptr(stats, i)->xs_write_bytes; + xs_read_bytes += per_cpu_ptr(stats, i)->xs_read_bytes; } - seq_printf(m, "xpc %Lu %Lu %Lu\n", + len += snprintf(buf + len, PATH_MAX-len, "xpc %Lu %Lu %Lu\n", xs_xstrat_bytes, xs_write_bytes, xs_read_bytes); - seq_printf(m, "debug %u\n", + len += snprintf(buf + len, PATH_MAX-len, "debug %u\n", #if defined(DEBUG) 1); #else 0); #endif - return 0; + + return len; } -static int xfs_stat_proc_open(struct inode *inode, struct file *file) +void xfs_stats_clearall(struct xfsstats __percpu *stats) { - return single_open(file, xfs_stat_proc_show, NULL); + int c; + __uint32_t vn_active; + + xfs_notice(NULL, "Clearing xfsstats"); + for_each_possible_cpu(c) { + preempt_disable(); + /* save vn_active, it's a universal truth! */ + vn_active = per_cpu_ptr(stats, c)->vn_active; + memset(per_cpu_ptr(stats, c), 0, sizeof(*stats)); + per_cpu_ptr(stats, c)->vn_active = vn_active; + preempt_enable(); + } } -static const struct file_operations xfs_stat_proc_fops = { - .owner = THIS_MODULE, - .open = xfs_stat_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - /* legacy quota interfaces */ #ifdef CONFIG_XFS_QUOTA static int xqm_proc_show(struct seq_file *m, void *v) { /* maximum; incore; ratio free to inuse; freelist */ seq_printf(m, "%d\t%d\t%d\t%u\n", - 0, - counter_val(XFSSTAT_END_XQMSTAT), - 0, - counter_val(XFSSTAT_END_XQMSTAT + 1)); + 0, counter_val(xfsstats.xs_stats, XFSSTAT_END_XQMSTAT), + 0, counter_val(xfsstats.xs_stats, XFSSTAT_END_XQMSTAT + 1)); return 0; } @@ -136,7 +142,7 @@ static int xqmstat_proc_show(struct seq_file *m, void *v) seq_printf(m, "qm"); for (j = XFSSTAT_END_IBT_V2; j < XFSSTAT_END_XQMSTAT; j++) - seq_printf(m, " %u", counter_val(j)); + seq_printf(m, " %u", counter_val(xfsstats.xs_stats, j)); seq_putc(m, '\n'); return 0; } @@ -155,44 +161,35 @@ static const struct file_operations xqmstat_proc_fops = { }; #endif /* CONFIG_XFS_QUOTA */ +#ifdef CONFIG_PROC_FS int xfs_init_procfs(void) { if (!proc_mkdir("fs/xfs", NULL)) + return -ENOMEM; + + if (!proc_symlink("fs/xfs/stat", NULL, + "/sys/fs/xfs/stats/stats")) goto out; - if (!proc_create("fs/xfs/stat", 0, NULL, - &xfs_stat_proc_fops)) - goto out_remove_xfs_dir; #ifdef CONFIG_XFS_QUOTA if (!proc_create("fs/xfs/xqmstat", 0, NULL, &xqmstat_proc_fops)) - goto out_remove_stat_file; + goto out; if (!proc_create("fs/xfs/xqm", 0, NULL, &xqm_proc_fops)) - goto out_remove_xqmstat_file; + goto out; #endif return 0; -#ifdef CONFIG_XFS_QUOTA - out_remove_xqmstat_file: - remove_proc_entry("fs/xfs/xqmstat", NULL); - out_remove_stat_file: - remove_proc_entry("fs/xfs/stat", NULL); -#endif - out_remove_xfs_dir: - remove_proc_entry("fs/xfs", NULL); - out: +out: + remove_proc_subtree("fs/xfs", NULL); return -ENOMEM; } void xfs_cleanup_procfs(void) { -#ifdef CONFIG_XFS_QUOTA - remove_proc_entry("fs/xfs/xqm", NULL); - remove_proc_entry("fs/xfs/xqmstat", NULL); -#endif - remove_proc_entry("fs/xfs/stat", NULL); - remove_proc_entry("fs/xfs", NULL); + remove_proc_subtree("fs/xfs", NULL); } +#endif /* CONFIG_PROC_FS */ diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h index c8f238b8299a..483b0eff1988 100644 --- a/fs/xfs/xfs_stats.h +++ b/fs/xfs/xfs_stats.h @@ -19,8 +19,6 @@ #define __XFS_STATS_H__ -#if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF) - #include <linux/percpu.h> /* @@ -215,15 +213,29 @@ struct xfsstats { __uint64_t xs_read_bytes; }; -DECLARE_PER_CPU(struct xfsstats, xfsstats); +int xfs_stats_format(struct xfsstats __percpu *stats, char *buf); +void xfs_stats_clearall(struct xfsstats __percpu *stats); +extern struct xstats xfsstats; -/* - * We don't disable preempt, not too worried about poking the - * wrong CPU's stat for now (also aggregated before reporting). - */ -#define XFS_STATS_INC(v) (per_cpu(xfsstats, current_cpu()).v++) -#define XFS_STATS_DEC(v) (per_cpu(xfsstats, current_cpu()).v--) -#define XFS_STATS_ADD(v, inc) (per_cpu(xfsstats, current_cpu()).v += (inc)) +#define XFS_STATS_INC(mp, v) \ +do { \ + per_cpu_ptr(xfsstats.xs_stats, current_cpu())->v++; \ + per_cpu_ptr(mp->m_stats.xs_stats, current_cpu())->v++; \ +} while (0) + +#define XFS_STATS_DEC(mp, v) \ +do { \ + per_cpu_ptr(xfsstats.xs_stats, current_cpu())->v--; \ + per_cpu_ptr(mp->m_stats.xs_stats, current_cpu())->v--; \ +} while (0) + +#define XFS_STATS_ADD(mp, v, inc) \ +do { \ + per_cpu_ptr(xfsstats.xs_stats, current_cpu())->v += (inc); \ + per_cpu_ptr(mp->m_stats.xs_stats, current_cpu())->v += (inc); \ +} while (0) + +#if defined(CONFIG_PROC_FS) extern int xfs_init_procfs(void); extern void xfs_cleanup_procfs(void); @@ -231,10 +243,6 @@ extern void xfs_cleanup_procfs(void); #else /* !CONFIG_PROC_FS */ -# define XFS_STATS_INC(count) -# define XFS_STATS_DEC(count) -# define XFS_STATS_ADD(count, inc) - static inline int xfs_init_procfs(void) { return 0; diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 904f637cfa5f..36bd8825bfb0 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -838,17 +838,18 @@ xfs_init_mount_workqueues( goto out_destroy_unwritten; mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s", - WQ_FREEZABLE, 0, mp->m_fsname); + WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname); if (!mp->m_reclaim_workqueue) goto out_destroy_cil; mp->m_log_workqueue = alloc_workqueue("xfs-log/%s", - WQ_FREEZABLE|WQ_HIGHPRI, 0, mp->m_fsname); + WQ_MEM_RECLAIM|WQ_FREEZABLE|WQ_HIGHPRI, 0, + mp->m_fsname); if (!mp->m_log_workqueue) goto out_destroy_reclaim; mp->m_eofblocks_workqueue = alloc_workqueue("xfs-eofblocks/%s", - WQ_FREEZABLE, 0, mp->m_fsname); + WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname); if (!mp->m_eofblocks_workqueue) goto out_destroy_log; @@ -922,7 +923,7 @@ xfs_fs_destroy_inode( trace_xfs_destroy_inode(ip); - XFS_STATS_INC(vn_reclaim); + XFS_STATS_INC(ip->i_mount, vn_reclaim); ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); @@ -983,8 +984,8 @@ xfs_fs_evict_inode( truncate_inode_pages_final(&inode->i_data); clear_inode(inode); - XFS_STATS_INC(vn_rele); - XFS_STATS_INC(vn_remove); + XFS_STATS_INC(ip->i_mount, vn_rele); + XFS_STATS_INC(ip->i_mount, vn_remove); xfs_inactive(ip); } @@ -1474,9 +1475,16 @@ xfs_fs_fill_super( if (error) goto out_destroy_workqueues; + /* Allocate stats memory before we do operations that might use it */ + mp->m_stats.xs_stats = alloc_percpu(struct xfsstats); + if (!mp->m_stats.xs_stats) { + error = -ENOMEM; + goto out_destroy_counters; + } + error = xfs_readsb(mp, flags); if (error) - goto out_destroy_counters; + goto out_free_stats; error = xfs_finish_flags(mp); if (error) @@ -1545,9 +1553,11 @@ xfs_fs_fill_super( xfs_filestream_unmount(mp); out_free_sb: xfs_freesb(mp); + out_free_stats: + free_percpu(mp->m_stats.xs_stats); out_destroy_counters: xfs_destroy_percpu_counters(mp); -out_destroy_workqueues: + out_destroy_workqueues: xfs_destroy_mount_workqueues(mp); out_close_devices: xfs_close_devices(mp); @@ -1574,6 +1584,7 @@ xfs_fs_put_super( xfs_unmountfs(mp); xfs_freesb(mp); + free_percpu(mp->m_stats.xs_stats); xfs_destroy_percpu_counters(mp); xfs_destroy_mount_workqueues(mp); xfs_close_devices(mp); @@ -1838,19 +1849,32 @@ init_xfs_fs(void) xfs_kset = kset_create_and_add("xfs", NULL, fs_kobj); if (!xfs_kset) { error = -ENOMEM; - goto out_sysctl_unregister;; + goto out_sysctl_unregister; } + xfsstats.xs_kobj.kobject.kset = xfs_kset; + + xfsstats.xs_stats = alloc_percpu(struct xfsstats); + if (!xfsstats.xs_stats) { + error = -ENOMEM; + goto out_kset_unregister; + } + + error = xfs_sysfs_init(&xfsstats.xs_kobj, &xfs_stats_ktype, NULL, + "stats"); + if (error) + goto out_free_stats; + #ifdef DEBUG xfs_dbg_kobj.kobject.kset = xfs_kset; error = xfs_sysfs_init(&xfs_dbg_kobj, &xfs_dbg_ktype, NULL, "debug"); if (error) - goto out_kset_unregister; + goto out_remove_stats_kobj; #endif error = xfs_qm_init(); if (error) - goto out_remove_kobj; + goto out_remove_dbg_kobj; error = register_filesystem(&xfs_fs_type); if (error) @@ -1859,11 +1883,15 @@ init_xfs_fs(void) out_qm_exit: xfs_qm_exit(); - out_remove_kobj: + out_remove_dbg_kobj: #ifdef DEBUG xfs_sysfs_del(&xfs_dbg_kobj); - out_kset_unregister: + out_remove_stats_kobj: #endif + xfs_sysfs_del(&xfsstats.xs_kobj); + out_free_stats: + free_percpu(xfsstats.xs_stats); + out_kset_unregister: kset_unregister(xfs_kset); out_sysctl_unregister: xfs_sysctl_unregister(); @@ -1889,6 +1917,8 @@ exit_xfs_fs(void) #ifdef DEBUG xfs_sysfs_del(&xfs_dbg_kobj); #endif + xfs_sysfs_del(&xfsstats.xs_kobj); + free_percpu(xfsstats.xs_stats); kset_unregister(xfs_kset); xfs_sysctl_unregister(); xfs_cleanup_procfs(); @@ -1896,6 +1926,7 @@ exit_xfs_fs(void) xfs_mru_cache_uninit(); xfs_destroy_workqueues(); xfs_destroy_zones(); + xfs_uuid_table_free(); } module_init(init_xfs_fs); diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c index a0c8067cea6f..aed74d3f8da9 100644 --- a/fs/xfs/xfs_sysctl.c +++ b/fs/xfs/xfs_sysctl.c @@ -19,6 +19,7 @@ #include <linux/sysctl.h> #include <linux/proc_fs.h> #include "xfs_error.h" +#include "xfs_stats.h" static struct ctl_table_header *xfs_table_header; @@ -31,22 +32,12 @@ xfs_stats_clear_proc_handler( size_t *lenp, loff_t *ppos) { - int c, ret, *valp = ctl->data; - __uint32_t vn_active; + int ret, *valp = ctl->data; ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); if (!ret && write && *valp) { - xfs_notice(NULL, "Clearing xfsstats"); - for_each_possible_cpu(c) { - preempt_disable(); - /* save vn_active, it's a universal truth! */ - vn_active = per_cpu(xfsstats, c).vn_active; - memset(&per_cpu(xfsstats, c), 0, - sizeof(struct xfsstats)); - per_cpu(xfsstats, c).vn_active = vn_active; - preempt_enable(); - } + xfs_stats_clearall(xfsstats.xs_stats); xfs_stats_clear = 0; } diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c index aa03670851d8..ee70f5dec9dc 100644 --- a/fs/xfs/xfs_sysfs.c +++ b/fs/xfs/xfs_sysfs.c @@ -21,11 +21,13 @@ #include "xfs_log_format.h" #include "xfs_log.h" #include "xfs_log_priv.h" +#include "xfs_stats.h" struct xfs_sysfs_attr { struct attribute attr; - ssize_t (*show)(char *buf, void *data); - ssize_t (*store)(const char *buf, size_t count, void *data); + ssize_t (*show)(struct kobject *kobject, char *buf); + ssize_t (*store)(struct kobject *kobject, const char *buf, + size_t count); }; static inline struct xfs_sysfs_attr * @@ -38,6 +40,8 @@ to_attr(struct attribute *attr) static struct xfs_sysfs_attr xfs_sysfs_attr_##name = __ATTR_RW(name) #define XFS_SYSFS_ATTR_RO(name) \ static struct xfs_sysfs_attr xfs_sysfs_attr_##name = __ATTR_RO(name) +#define XFS_SYSFS_ATTR_WO(name) \ + static struct xfs_sysfs_attr xfs_sysfs_attr_##name = __ATTR_WO(name) #define ATTR_LIST(name) &xfs_sysfs_attr_##name.attr @@ -51,14 +55,42 @@ struct kobj_type xfs_mp_ktype = { .release = xfs_sysfs_release, }; +STATIC ssize_t +xfs_sysfs_object_show( + struct kobject *kobject, + struct attribute *attr, + char *buf) +{ + struct xfs_sysfs_attr *xfs_attr = to_attr(attr); + + return xfs_attr->show ? xfs_attr->show(kobject, buf) : 0; +} + +STATIC ssize_t +xfs_sysfs_object_store( + struct kobject *kobject, + struct attribute *attr, + const char *buf, + size_t count) +{ + struct xfs_sysfs_attr *xfs_attr = to_attr(attr); + + return xfs_attr->store ? xfs_attr->store(kobject, buf, count) : 0; +} + +static const struct sysfs_ops xfs_sysfs_ops = { + .show = xfs_sysfs_object_show, + .store = xfs_sysfs_object_store, +}; + #ifdef DEBUG /* debug */ STATIC ssize_t log_recovery_delay_store( + struct kobject *kobject, const char *buf, - size_t count, - void *data) + size_t count) { int ret; int val; @@ -77,8 +109,8 @@ log_recovery_delay_store( STATIC ssize_t log_recovery_delay_show( - char *buf, - void *data) + struct kobject *kobject, + char *buf) { return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.log_recovery_delay); } @@ -89,52 +121,87 @@ static struct attribute *xfs_dbg_attrs[] = { NULL, }; +struct kobj_type xfs_dbg_ktype = { + .release = xfs_sysfs_release, + .sysfs_ops = &xfs_sysfs_ops, + .default_attrs = xfs_dbg_attrs, +}; + +#endif /* DEBUG */ + +/* stats */ + +static inline struct xstats * +to_xstats(struct kobject *kobject) +{ + struct xfs_kobj *kobj = to_kobj(kobject); + + return container_of(kobj, struct xstats, xs_kobj); +} + STATIC ssize_t -xfs_dbg_show( - struct kobject *kobject, - struct attribute *attr, - char *buf) +stats_show( + struct kobject *kobject, + char *buf) { - struct xfs_sysfs_attr *xfs_attr = to_attr(attr); + struct xstats *stats = to_xstats(kobject); - return xfs_attr->show ? xfs_attr->show(buf, NULL) : 0; + return xfs_stats_format(stats->xs_stats, buf); } +XFS_SYSFS_ATTR_RO(stats); STATIC ssize_t -xfs_dbg_store( - struct kobject *kobject, - struct attribute *attr, - const char *buf, - size_t count) +stats_clear_store( + struct kobject *kobject, + const char *buf, + size_t count) { - struct xfs_sysfs_attr *xfs_attr = to_attr(attr); + int ret; + int val; + struct xstats *stats = to_xstats(kobject); + + ret = kstrtoint(buf, 0, &val); + if (ret) + return ret; - return xfs_attr->store ? xfs_attr->store(buf, count, NULL) : 0; + if (val != 1) + return -EINVAL; + + xfs_stats_clearall(stats->xs_stats); + return count; } +XFS_SYSFS_ATTR_WO(stats_clear); -static struct sysfs_ops xfs_dbg_ops = { - .show = xfs_dbg_show, - .store = xfs_dbg_store, +static struct attribute *xfs_stats_attrs[] = { + ATTR_LIST(stats), + ATTR_LIST(stats_clear), + NULL, }; -struct kobj_type xfs_dbg_ktype = { +struct kobj_type xfs_stats_ktype = { .release = xfs_sysfs_release, - .sysfs_ops = &xfs_dbg_ops, - .default_attrs = xfs_dbg_attrs, + .sysfs_ops = &xfs_sysfs_ops, + .default_attrs = xfs_stats_attrs, }; -#endif /* DEBUG */ - /* xlog */ +static inline struct xlog * +to_xlog(struct kobject *kobject) +{ + struct xfs_kobj *kobj = to_kobj(kobject); + + return container_of(kobj, struct xlog, l_kobj); +} + STATIC ssize_t log_head_lsn_show( - char *buf, - void *data) + struct kobject *kobject, + char *buf) { - struct xlog *log = data; int cycle; int block; + struct xlog *log = to_xlog(kobject); spin_lock(&log->l_icloglock); cycle = log->l_curr_cycle; @@ -147,12 +214,12 @@ XFS_SYSFS_ATTR_RO(log_head_lsn); STATIC ssize_t log_tail_lsn_show( - char *buf, - void *data) + struct kobject *kobject, + char *buf) { - struct xlog *log = data; int cycle; int block; + struct xlog *log = to_xlog(kobject); xlog_crack_atomic_lsn(&log->l_tail_lsn, &cycle, &block); return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, block); @@ -161,12 +228,13 @@ XFS_SYSFS_ATTR_RO(log_tail_lsn); STATIC ssize_t reserve_grant_head_show( - char *buf, - void *data) + struct kobject *kobject, + char *buf) + { - struct xlog *log = data; int cycle; int bytes; + struct xlog *log = to_xlog(kobject); xlog_crack_grant_head(&log->l_reserve_head.grant, &cycle, &bytes); return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, bytes); @@ -175,12 +243,12 @@ XFS_SYSFS_ATTR_RO(reserve_grant_head); STATIC ssize_t write_grant_head_show( - char *buf, - void *data) + struct kobject *kobject, + char *buf) { - struct xlog *log = data; int cycle; int bytes; + struct xlog *log = to_xlog(kobject); xlog_crack_grant_head(&log->l_write_head.grant, &cycle, &bytes); return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, bytes); @@ -195,45 +263,8 @@ static struct attribute *xfs_log_attrs[] = { NULL, }; -static inline struct xlog * -to_xlog(struct kobject *kobject) -{ - struct xfs_kobj *kobj = to_kobj(kobject); - return container_of(kobj, struct xlog, l_kobj); -} - -STATIC ssize_t -xfs_log_show( - struct kobject *kobject, - struct attribute *attr, - char *buf) -{ - struct xlog *log = to_xlog(kobject); - struct xfs_sysfs_attr *xfs_attr = to_attr(attr); - - return xfs_attr->show ? xfs_attr->show(buf, log) : 0; -} - -STATIC ssize_t -xfs_log_store( - struct kobject *kobject, - struct attribute *attr, - const char *buf, - size_t count) -{ - struct xlog *log = to_xlog(kobject); - struct xfs_sysfs_attr *xfs_attr = to_attr(attr); - - return xfs_attr->store ? xfs_attr->store(buf, count, log) : 0; -} - -static struct sysfs_ops xfs_log_ops = { - .show = xfs_log_show, - .store = xfs_log_store, -}; - struct kobj_type xfs_log_ktype = { .release = xfs_sysfs_release, - .sysfs_ops = &xfs_log_ops, + .sysfs_ops = &xfs_sysfs_ops, .default_attrs = xfs_log_attrs, }; diff --git a/fs/xfs/xfs_sysfs.h b/fs/xfs/xfs_sysfs.h index 240eee35f342..be692e59938d 100644 --- a/fs/xfs/xfs_sysfs.h +++ b/fs/xfs/xfs_sysfs.h @@ -22,6 +22,7 @@ extern struct kobj_type xfs_mp_ktype; /* xfs_mount */ extern struct kobj_type xfs_dbg_ktype; /* debug */ extern struct kobj_type xfs_log_ktype; /* xlog */ +extern struct kobj_type xfs_stats_ktype; /* stats */ static inline struct xfs_kobj * to_kobj(struct kobject *kobject) diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 5ed36b1e04c1..877079eb0f8f 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -689,6 +689,7 @@ DEFINE_INODE_EVENT(xfs_inode_free_eofblocks_invalid); DEFINE_INODE_EVENT(xfs_filemap_fault); DEFINE_INODE_EVENT(xfs_filemap_pmd_fault); DEFINE_INODE_EVENT(xfs_filemap_page_mkwrite); +DEFINE_INODE_EVENT(xfs_filemap_pfn_mkwrite); DECLARE_EVENT_CLASS(xfs_iref_class, TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), @@ -1312,6 +1313,7 @@ DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc); DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert); DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound); DEFINE_SIMPLE_IO_EVENT(xfs_setfilesize); +DEFINE_SIMPLE_IO_EVENT(xfs_zero_eof); DECLARE_EVENT_CLASS(xfs_itrunc_class, TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index a0ab1dae9c31..748b16aff45a 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -930,9 +930,9 @@ __xfs_trans_commit( */ if (sync) { error = _xfs_log_force_lsn(mp, commit_lsn, XFS_LOG_SYNC, NULL); - XFS_STATS_INC(xs_trans_sync); + XFS_STATS_INC(mp, xs_trans_sync); } else { - XFS_STATS_INC(xs_trans_async); + XFS_STATS_INC(mp, xs_trans_async); } return error; @@ -955,7 +955,7 @@ out_unreserve: xfs_trans_free_items(tp, NULLCOMMITLSN, !!error); xfs_trans_free(tp); - XFS_STATS_INC(xs_trans_empty); + XFS_STATS_INC(mp, xs_trans_empty); return error; } diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 1098cf490189..aa67339b9537 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -349,7 +349,7 @@ xfsaild_push( xfs_ail_min_lsn(ailp))) { ailp->xa_log_flush = 0; - XFS_STATS_INC(xs_push_ail_flush); + XFS_STATS_INC(mp, xs_push_ail_flush); xfs_log_force(mp, XFS_LOG_SYNC); } @@ -371,7 +371,7 @@ xfsaild_push( goto out_done; } - XFS_STATS_INC(xs_push_ail); + XFS_STATS_INC(mp, xs_push_ail); lsn = lip->li_lsn; while ((XFS_LSN_CMP(lip->li_lsn, target) <= 0)) { @@ -385,7 +385,7 @@ xfsaild_push( lock_result = lip->li_ops->iop_push(lip, &ailp->xa_buf_list); switch (lock_result) { case XFS_ITEM_SUCCESS: - XFS_STATS_INC(xs_push_ail_success); + XFS_STATS_INC(mp, xs_push_ail_success); trace_xfs_ail_push(lip); ailp->xa_last_pushed_lsn = lsn; @@ -403,7 +403,7 @@ xfsaild_push( * re-try the flushing relatively soon if most of the * AIL is beeing flushed. */ - XFS_STATS_INC(xs_push_ail_flushing); + XFS_STATS_INC(mp, xs_push_ail_flushing); trace_xfs_ail_flushing(lip); flushing++; @@ -411,14 +411,14 @@ xfsaild_push( break; case XFS_ITEM_PINNED: - XFS_STATS_INC(xs_push_ail_pinned); + XFS_STATS_INC(mp, xs_push_ail_pinned); trace_xfs_ail_pinned(lip); stuck++; ailp->xa_log_flush++; break; case XFS_ITEM_LOCKED: - XFS_STATS_INC(xs_push_ail_locked); + XFS_STATS_INC(mp, xs_push_ail_locked); trace_xfs_ail_locked(lip); stuck++; @@ -497,6 +497,7 @@ xfsaild( long tout = 0; /* milliseconds */ current->flags |= PF_MEMALLOC; + set_freezable(); while (!kthread_should_stop()) { if (tout && tout <= 20) diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index 17280cd71934..b97f1df910ab 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c @@ -108,6 +108,15 @@ xfs_trans_log_inode( ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); /* + * Record the specific change for fdatasync optimisation. This + * allows fdatasync to skip log forces for inodes that are only + * timestamp dirty. We do this before the change count so that + * the core being logged in this case does not impact on fdatasync + * behaviour. + */ + ip->i_itemp->ili_fsync_fields |= flags; + + /* * First time we log the inode in a transaction, bump the inode change * counter if it is configured for this to occur. We don't use * inode_inc_version() because there is no need for extra locking around diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index c036815183cb..839b35ca21c6 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -32,9 +32,10 @@ static int -xfs_xattr_get(struct dentry *dentry, const char *name, - void *value, size_t size, int xflags) +xfs_xattr_get(const struct xattr_handler *handler, struct dentry *dentry, + const char *name, void *value, size_t size) { + int xflags = handler->flags; struct xfs_inode *ip = XFS_I(d_inode(dentry)); int error, asize = size; @@ -53,11 +54,35 @@ xfs_xattr_get(struct dentry *dentry, const char *name, return asize; } +void +xfs_forget_acl( + struct inode *inode, + const char *name, + int xflags) +{ + /* + * Invalidate any cached ACLs if the user has bypassed the ACL + * interface. We don't validate the content whatsoever so it is caller + * responsibility to provide data in valid format and ensure i_mode is + * consistent. + */ + if (xflags & ATTR_ROOT) { +#ifdef CONFIG_XFS_POSIX_ACL + if (!strcmp(name, SGI_ACL_FILE)) + forget_cached_acl(inode, ACL_TYPE_ACCESS); + else if (!strcmp(name, SGI_ACL_DEFAULT)) + forget_cached_acl(inode, ACL_TYPE_DEFAULT); +#endif + } +} + static int -xfs_xattr_set(struct dentry *dentry, const char *name, const void *value, - size_t size, int flags, int xflags) +xfs_xattr_set(const struct xattr_handler *handler, struct dentry *dentry, + const char *name, const void *value, size_t size, int flags) { - struct xfs_inode *ip = XFS_I(d_inode(dentry)); + int xflags = handler->flags; + struct xfs_inode *ip = XFS_I(d_inode(dentry)); + int error; if (strcmp(name, "") == 0) return -EINVAL; @@ -70,8 +95,12 @@ xfs_xattr_set(struct dentry *dentry, const char *name, const void *value, if (!value) return xfs_attr_remove(ip, (unsigned char *)name, xflags); - return xfs_attr_set(ip, (unsigned char *)name, + error = xfs_attr_set(ip, (unsigned char *)name, (void *)value, size, xflags); + if (!error) + xfs_forget_acl(d_inode(dentry), name, xflags); + + return error; } static const struct xattr_handler xfs_xattr_user_handler = { |