diff options
Diffstat (limited to 'fs')
312 files changed, 6333 insertions, 5396 deletions
diff --git a/fs/9p/Kconfig b/fs/9p/Kconfig index 09fd4a185fd2..d7bc93447c85 100644 --- a/fs/9p/Kconfig +++ b/fs/9p/Kconfig @@ -2,6 +2,7 @@ config 9P_FS tristate "Plan 9 Resource Sharing Support (9P2000)" depends on INET && NET_9P + select NETFS_SUPPORT help If you say Y here, you will get experimental support for Plan 9 resource sharing via the 9P2000 protocol. diff --git a/fs/9p/acl.c b/fs/9p/acl.c index c381499f5416..4dac4a0dc5f4 100644 --- a/fs/9p/acl.c +++ b/fs/9p/acl.c @@ -1,15 +1,7 @@ +// SPDX-License-Identifier: LGPL-2.1 /* * Copyright IBM Corporation, 2010 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of version 2.1 of the GNU Lesser General Public License - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - * */ #include <linux/module.h> @@ -123,6 +115,7 @@ static int v9fs_set_acl(struct p9_fid *fid, int type, struct posix_acl *acl) char *name; size_t size; void *buffer; + if (!acl) return 0; diff --git a/fs/9p/acl.h b/fs/9p/acl.h index d43c8949e807..ce5175d463dd 100644 --- a/fs/9p/acl.h +++ b/fs/9p/acl.h @@ -1,28 +1,21 @@ +/* SPDX-License-Identifier: LGPL-2.1 */ /* * Copyright IBM Corporation, 2010 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of version 2.1 of the GNU Lesser General Public License - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - * */ #ifndef FS_9P_ACL_H #define FS_9P_ACL_H #ifdef CONFIG_9P_FS_POSIX_ACL -extern int v9fs_get_acl(struct inode *, struct p9_fid *); -extern struct posix_acl *v9fs_iop_get_acl(struct inode *inode, int type, bool rcu); -extern int v9fs_acl_chmod(struct inode *, struct p9_fid *); -extern int v9fs_set_create_acl(struct inode *, struct p9_fid *, - struct posix_acl *, struct posix_acl *); -extern int v9fs_acl_mode(struct inode *dir, umode_t *modep, - struct posix_acl **dpacl, struct posix_acl **pacl); -extern void v9fs_put_acl(struct posix_acl *dacl, struct posix_acl *acl); +int v9fs_get_acl(struct inode *inode, struct p9_fid *fid); +struct posix_acl *v9fs_iop_get_acl(struct inode *inode, int type, + bool rcu); +int v9fs_acl_chmod(struct inode *inode, struct p9_fid *fid); +int v9fs_set_create_acl(struct inode *inode, struct p9_fid *fid, + struct posix_acl *dacl, struct posix_acl *acl); +int v9fs_acl_mode(struct inode *dir, umode_t *modep, + struct posix_acl **dpacl, struct posix_acl **pacl); +void v9fs_put_acl(struct posix_acl *dacl, struct posix_acl *acl); #else #define v9fs_iop_get_acl NULL static inline int v9fs_get_acl(struct inode *inode, struct p9_fid *fid) diff --git a/fs/9p/cache.c b/fs/9p/cache.c index 1769a44f4819..f2ba131cede1 100644 --- a/fs/9p/cache.c +++ b/fs/9p/cache.c @@ -19,8 +19,8 @@ #define CACHETAG_LEN 11 struct fscache_netfs v9fs_cache_netfs = { - .name = "9p", - .version = 0, + .name = "9p", + .version = 0, }; /* @@ -199,140 +199,3 @@ void v9fs_cache_inode_reset_cookie(struct inode *inode) mutex_unlock(&v9inode->fscache_lock); } - -int __v9fs_fscache_release_page(struct page *page, gfp_t gfp) -{ - struct inode *inode = page->mapping->host; - struct v9fs_inode *v9inode = V9FS_I(inode); - - BUG_ON(!v9inode->fscache); - - return fscache_maybe_release_page(v9inode->fscache, page, gfp); -} - -void __v9fs_fscache_invalidate_page(struct page *page) -{ - struct inode *inode = page->mapping->host; - struct v9fs_inode *v9inode = V9FS_I(inode); - - BUG_ON(!v9inode->fscache); - - if (PageFsCache(page)) { - fscache_wait_on_page_write(v9inode->fscache, page); - BUG_ON(!PageLocked(page)); - fscache_uncache_page(v9inode->fscache, page); - } -} - -static void v9fs_vfs_readpage_complete(struct page *page, void *data, - int error) -{ - if (!error) - SetPageUptodate(page); - - unlock_page(page); -} - -/* - * __v9fs_readpage_from_fscache - read a page from cache - * - * Returns 0 if the pages are in cache and a BIO is submitted, - * 1 if the pages are not in cache and -error otherwise. - */ - -int __v9fs_readpage_from_fscache(struct inode *inode, struct page *page) -{ - int ret; - const struct v9fs_inode *v9inode = V9FS_I(inode); - - p9_debug(P9_DEBUG_FSC, "inode %p page %p\n", inode, page); - if (!v9inode->fscache) - return -ENOBUFS; - - ret = fscache_read_or_alloc_page(v9inode->fscache, - page, - v9fs_vfs_readpage_complete, - NULL, - GFP_KERNEL); - switch (ret) { - case -ENOBUFS: - case -ENODATA: - p9_debug(P9_DEBUG_FSC, "page/inode not in cache %d\n", ret); - return 1; - case 0: - p9_debug(P9_DEBUG_FSC, "BIO submitted\n"); - return ret; - default: - p9_debug(P9_DEBUG_FSC, "ret %d\n", ret); - return ret; - } -} - -/* - * __v9fs_readpages_from_fscache - read multiple pages from cache - * - * Returns 0 if the pages are in cache and a BIO is submitted, - * 1 if the pages are not in cache and -error otherwise. - */ - -int __v9fs_readpages_from_fscache(struct inode *inode, - struct address_space *mapping, - struct list_head *pages, - unsigned *nr_pages) -{ - int ret; - const struct v9fs_inode *v9inode = V9FS_I(inode); - - p9_debug(P9_DEBUG_FSC, "inode %p pages %u\n", inode, *nr_pages); - if (!v9inode->fscache) - return -ENOBUFS; - - ret = fscache_read_or_alloc_pages(v9inode->fscache, - mapping, pages, nr_pages, - v9fs_vfs_readpage_complete, - NULL, - mapping_gfp_mask(mapping)); - switch (ret) { - case -ENOBUFS: - case -ENODATA: - p9_debug(P9_DEBUG_FSC, "pages/inodes not in cache %d\n", ret); - return 1; - case 0: - BUG_ON(!list_empty(pages)); - BUG_ON(*nr_pages != 0); - p9_debug(P9_DEBUG_FSC, "BIO submitted\n"); - return ret; - default: - p9_debug(P9_DEBUG_FSC, "ret %d\n", ret); - return ret; - } -} - -/* - * __v9fs_readpage_to_fscache - write a page to the cache - * - */ - -void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page) -{ - int ret; - const struct v9fs_inode *v9inode = V9FS_I(inode); - - p9_debug(P9_DEBUG_FSC, "inode %p page %p\n", inode, page); - ret = fscache_write_page(v9inode->fscache, page, - i_size_read(&v9inode->vfs_inode), GFP_KERNEL); - p9_debug(P9_DEBUG_FSC, "ret = %d\n", ret); - if (ret != 0) - v9fs_uncache_page(inode, page); -} - -/* - * wait for a page to complete writing to the cache - */ -void __v9fs_fscache_wait_on_page_write(struct inode *inode, struct page *page) -{ - const struct v9fs_inode *v9inode = V9FS_I(inode); - p9_debug(P9_DEBUG_FSC, "inode %p page %p\n", inode, page); - if (PageFsCache(page)) - fscache_wait_on_page_write(v9inode->fscache, page); -} diff --git a/fs/9p/cache.h b/fs/9p/cache.h index 00f107af443e..7480b4b49fea 100644 --- a/fs/9p/cache.h +++ b/fs/9p/cache.h @@ -7,9 +7,10 @@ #ifndef _9P_CACHE_H #define _9P_CACHE_H -#ifdef CONFIG_9P_FSCACHE +#define FSCACHE_USE_NEW_IO_API #include <linux/fscache.h> -#include <linux/spinlock.h> + +#ifdef CONFIG_9P_FSCACHE extern struct fscache_netfs v9fs_cache_netfs; extern const struct fscache_cookie_def v9fs_cache_session_index_def; @@ -27,64 +28,6 @@ extern void v9fs_cache_inode_reset_cookie(struct inode *inode); extern int __v9fs_cache_register(void); extern void __v9fs_cache_unregister(void); -extern int __v9fs_fscache_release_page(struct page *page, gfp_t gfp); -extern void __v9fs_fscache_invalidate_page(struct page *page); -extern int __v9fs_readpage_from_fscache(struct inode *inode, - struct page *page); -extern int __v9fs_readpages_from_fscache(struct inode *inode, - struct address_space *mapping, - struct list_head *pages, - unsigned *nr_pages); -extern void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page); -extern void __v9fs_fscache_wait_on_page_write(struct inode *inode, - struct page *page); - -static inline int v9fs_fscache_release_page(struct page *page, - gfp_t gfp) -{ - return __v9fs_fscache_release_page(page, gfp); -} - -static inline void v9fs_fscache_invalidate_page(struct page *page) -{ - __v9fs_fscache_invalidate_page(page); -} - -static inline int v9fs_readpage_from_fscache(struct inode *inode, - struct page *page) -{ - return __v9fs_readpage_from_fscache(inode, page); -} - -static inline int v9fs_readpages_from_fscache(struct inode *inode, - struct address_space *mapping, - struct list_head *pages, - unsigned *nr_pages) -{ - return __v9fs_readpages_from_fscache(inode, mapping, pages, - nr_pages); -} - -static inline void v9fs_readpage_to_fscache(struct inode *inode, - struct page *page) -{ - if (PageFsCache(page)) - __v9fs_readpage_to_fscache(inode, page); -} - -static inline void v9fs_uncache_page(struct inode *inode, struct page *page) -{ - struct v9fs_inode *v9inode = V9FS_I(inode); - fscache_uncache_page(v9inode->fscache, page); - BUG_ON(PageFsCache(page)); -} - -static inline void v9fs_fscache_wait_on_page_write(struct inode *inode, - struct page *page) -{ - return __v9fs_fscache_wait_on_page_write(inode, page); -} - #else /* CONFIG_9P_FSCACHE */ static inline void v9fs_cache_inode_get_cookie(struct inode *inode) @@ -99,39 +42,5 @@ static inline void v9fs_cache_inode_set_cookie(struct inode *inode, struct file { } -static inline int v9fs_fscache_release_page(struct page *page, - gfp_t gfp) { - return 1; -} - -static inline void v9fs_fscache_invalidate_page(struct page *page) {} - -static inline int v9fs_readpage_from_fscache(struct inode *inode, - struct page *page) -{ - return -ENOBUFS; -} - -static inline int v9fs_readpages_from_fscache(struct inode *inode, - struct address_space *mapping, - struct list_head *pages, - unsigned *nr_pages) -{ - return -ENOBUFS; -} - -static inline void v9fs_readpage_to_fscache(struct inode *inode, - struct page *page) -{} - -static inline void v9fs_uncache_page(struct inode *inode, struct page *page) -{} - -static inline void v9fs_fscache_wait_on_page_write(struct inode *inode, - struct page *page) -{ - return; -} - #endif /* CONFIG_9P_FSCACHE */ #endif /* _9P_CACHE_H */ diff --git a/fs/9p/fid.c b/fs/9p/fid.c index b8863dd0de5c..6aab046c98e2 100644 --- a/fs/9p/fid.c +++ b/fs/9p/fid.c @@ -103,6 +103,7 @@ static struct p9_fid *v9fs_fid_find(struct dentry *dentry, kuid_t uid, int any) /* we'll recheck under lock if there's anything to look in */ if (!ret && dentry->d_fsdata) { struct hlist_head *h = (struct hlist_head *)&dentry->d_fsdata; + spin_lock(&dentry->d_lock); hlist_for_each_entry(fid, h, dlist) { if (any || uid_eq(fid->uid, uid)) { @@ -185,7 +186,7 @@ static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry, return ERR_PTR(-EPERM); if (v9fs_proto_dotu(v9ses) || v9fs_proto_dotl(v9ses)) - uname = NULL; + uname = NULL; else uname = v9ses->uname; diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 2e0fa7c932db..e32dd5f7721b 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * linux/fs/9p/v9fs.c - * * This file contains functions assisting in mapping VFS to 9P2000 * * Copyright (C) 2004-2008 by Eric Van Hensbergen <ericvh@gmail.com> @@ -166,7 +164,7 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) substring_t args[MAX_OPT_ARGS]; char *p; int option = 0; - char *s, *e; + char *s; int ret = 0; /* setup defaults */ @@ -190,8 +188,10 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) while ((p = strsep(&options, ",")) != NULL) { int token, r; + if (!*p) continue; + token = match_token(p, tokens, args); switch (token) { case Opt_debug: @@ -321,12 +321,13 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) v9ses->flags |= V9FS_ACCESS_CLIENT; } else { uid_t uid; + v9ses->flags |= V9FS_ACCESS_SINGLE; - uid = simple_strtoul(s, &e, 10); - if (*e != '\0') { - ret = -EINVAL; - pr_info("Unknown access argument %s\n", - s); + r = kstrtouint(s, 10, &uid); + if (r) { + ret = r; + pr_info("Unknown access argument %s: %d\n", + s, r); kfree(s); continue; } @@ -520,7 +521,8 @@ void v9fs_session_close(struct v9fs_session_info *v9ses) * mark transport as disconnected and cancel all pending requests. */ -void v9fs_session_cancel(struct v9fs_session_info *v9ses) { +void v9fs_session_cancel(struct v9fs_session_info *v9ses) +{ p9_debug(P9_DEBUG_ERROR, "cancel session %p\n", v9ses); p9_client_disconnect(v9ses->clnt); } @@ -659,6 +661,7 @@ static void v9fs_destroy_inode_cache(void) static int v9fs_cache_register(void) { int ret; + ret = v9fs_init_inode_cache(); if (ret < 0) return ret; @@ -686,6 +689,7 @@ static void v9fs_cache_unregister(void) static int __init init_v9fs(void) { int err; + pr_info("Installing v9fs 9p2000 file system support\n"); /* TODO: Setup list of registered trasnport modules */ diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h index 4ca56c5dd637..1647a8e63671 100644 --- a/fs/9p/v9fs.h +++ b/fs/9p/v9fs.h @@ -124,15 +124,24 @@ static inline struct v9fs_inode *V9FS_I(const struct inode *inode) return container_of(inode, struct v9fs_inode, vfs_inode); } +static inline struct fscache_cookie *v9fs_inode_cookie(struct v9fs_inode *v9inode) +{ +#ifdef CONFIG_9P_FSCACHE + return v9inode->fscache; +#else + return NULL; +#endif +} + extern int v9fs_show_options(struct seq_file *m, struct dentry *root); -struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *, - char *); +struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, + const char *dev_name, char *data); extern void v9fs_session_close(struct v9fs_session_info *v9ses); extern void v9fs_session_cancel(struct v9fs_session_info *v9ses); extern void v9fs_session_begin_cancel(struct v9fs_session_info *v9ses); extern struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, - unsigned int flags); + unsigned int flags); extern int v9fs_vfs_unlink(struct inode *i, struct dentry *d); extern int v9fs_vfs_rmdir(struct inode *i, struct dentry *d); extern int v9fs_vfs_rename(struct user_namespace *mnt_userns, @@ -158,7 +167,7 @@ extern struct inode *v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses, static inline struct v9fs_session_info *v9fs_inode2v9ses(struct inode *inode) { - return (inode->i_sb->s_fs_info); + return inode->i_sb->s_fs_info; } static inline struct v9fs_session_info *v9fs_dentry2v9ses(struct dentry *dentry) diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h index d44ade76966a..bc417da7e9c1 100644 --- a/fs/9p/v9fs_vfs.h +++ b/fs/9p/v9fs_vfs.h @@ -44,9 +44,10 @@ extern struct kmem_cache *v9fs_inode_cache; struct inode *v9fs_alloc_inode(struct super_block *sb); void v9fs_free_inode(struct inode *inode); -struct inode *v9fs_get_inode(struct super_block *sb, umode_t mode, dev_t); +struct inode *v9fs_get_inode(struct super_block *sb, umode_t mode, + dev_t rdev); int v9fs_init_inode(struct v9fs_session_info *v9ses, - struct inode *inode, umode_t mode, dev_t); + struct inode *inode, umode_t mode, dev_t rdev); void v9fs_evict_inode(struct inode *inode); ino_t v9fs_qid2ino(struct p9_qid *qid); void v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode, @@ -59,8 +60,8 @@ void v9fs_inode2stat(struct inode *inode, struct p9_wstat *stat); int v9fs_uflags2omode(int uflags, int extended); void v9fs_blank_wstat(struct p9_wstat *wstat); -int v9fs_vfs_setattr_dotl(struct user_namespace *, struct dentry *, - struct iattr *); +int v9fs_vfs_setattr_dotl(struct user_namespace *mnt_userns, + struct dentry *dentry, struct iattr *iattr); int v9fs_file_fsync_dotl(struct file *filp, loff_t start, loff_t end, int datasync); int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode); @@ -68,9 +69,9 @@ int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode); static inline void v9fs_invalidate_inode_attr(struct inode *inode) { struct v9fs_inode *v9inode; + v9inode = V9FS_I(inode); v9inode->cache_validity |= V9FS_INO_INVALID_ATTR; - return; } int v9fs_open_to_dotl_flags(int flags); diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index 1c4f1b39cc95..adafdf86f42f 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * linux/fs/9p/vfs_addr.c - * * This file contians vfs address (mmap) ops for 9P2000. * * Copyright (C) 2005 by Eric Van Hensbergen <ericvh@gmail.com> @@ -19,7 +17,7 @@ #include <linux/idr.h> #include <linux/sched.h> #include <linux/uio.h> -#include <linux/bvec.h> +#include <linux/netfs.h> #include <net/9p/9p.h> #include <net/9p/client.h> @@ -29,88 +27,97 @@ #include "fid.h" /** - * v9fs_fid_readpage - read an entire page in from 9P - * @data: Opaque pointer to the fid being read - * @page: structure to page - * + * v9fs_req_issue_op - Issue a read from 9P + * @subreq: The read to make */ -static int v9fs_fid_readpage(void *data, struct page *page) +static void v9fs_req_issue_op(struct netfs_read_subrequest *subreq) { - struct p9_fid *fid = data; - struct inode *inode = page->mapping->host; - struct bio_vec bvec = {.bv_page = page, .bv_len = PAGE_SIZE}; + struct netfs_read_request *rreq = subreq->rreq; + struct p9_fid *fid = rreq->netfs_priv; struct iov_iter to; - int retval, err; + loff_t pos = subreq->start + subreq->transferred; + size_t len = subreq->len - subreq->transferred; + int total, err; - p9_debug(P9_DEBUG_VFS, "\n"); + iov_iter_xarray(&to, READ, &rreq->mapping->i_pages, pos, len); - BUG_ON(!PageLocked(page)); + total = p9_client_read(fid, pos, &to, &err); + netfs_subreq_terminated(subreq, err ?: total, false); +} - retval = v9fs_readpage_from_fscache(inode, page); - if (retval == 0) - return retval; +/** + * v9fs_init_rreq - Initialise a read request + * @rreq: The read request + * @file: The file being read from + */ +static void v9fs_init_rreq(struct netfs_read_request *rreq, struct file *file) +{ + struct p9_fid *fid = file->private_data; - iov_iter_bvec(&to, READ, &bvec, 1, PAGE_SIZE); + refcount_inc(&fid->count); + rreq->netfs_priv = fid; +} - retval = p9_client_read(fid, page_offset(page), &to, &err); - if (err) { - v9fs_uncache_page(inode, page); - retval = err; - goto done; - } +/** + * v9fs_req_cleanup - Cleanup request initialized by v9fs_init_rreq + * @mapping: unused mapping of request to cleanup + * @priv: private data to cleanup, a fid, guaranted non-null. + */ +static void v9fs_req_cleanup(struct address_space *mapping, void *priv) +{ + struct p9_fid *fid = priv; - zero_user(page, retval, PAGE_SIZE - retval); - flush_dcache_page(page); - SetPageUptodate(page); + p9_client_clunk(fid); +} - v9fs_readpage_to_fscache(inode, page); - retval = 0; +/** + * v9fs_is_cache_enabled - Determine if caching is enabled for an inode + * @inode: The inode to check + */ +static bool v9fs_is_cache_enabled(struct inode *inode) +{ + struct fscache_cookie *cookie = v9fs_inode_cookie(V9FS_I(inode)); -done: - unlock_page(page); - return retval; + return fscache_cookie_enabled(cookie) && !hlist_empty(&cookie->backing_objects); } /** + * v9fs_begin_cache_operation - Begin a cache operation for a read + * @rreq: The read request + */ +static int v9fs_begin_cache_operation(struct netfs_read_request *rreq) +{ + struct fscache_cookie *cookie = v9fs_inode_cookie(V9FS_I(rreq->inode)); + + return fscache_begin_read_operation(rreq, cookie); +} + +static const struct netfs_read_request_ops v9fs_req_ops = { + .init_rreq = v9fs_init_rreq, + .is_cache_enabled = v9fs_is_cache_enabled, + .begin_cache_operation = v9fs_begin_cache_operation, + .issue_op = v9fs_req_issue_op, + .cleanup = v9fs_req_cleanup, +}; + +/** * v9fs_vfs_readpage - read an entire page in from 9P - * - * @filp: file being read + * @file: file being read * @page: structure to page * */ - -static int v9fs_vfs_readpage(struct file *filp, struct page *page) +static int v9fs_vfs_readpage(struct file *file, struct page *page) { - return v9fs_fid_readpage(filp->private_data, page); + return netfs_readpage(file, page, &v9fs_req_ops, NULL); } /** - * v9fs_vfs_readpages - read a set of pages from 9P - * - * @filp: file being read - * @mapping: the address space - * @pages: list of pages to read - * @nr_pages: count of pages to read - * + * v9fs_vfs_readahead - read a set of pages from 9P + * @ractl: The readahead parameters */ - -static int v9fs_vfs_readpages(struct file *filp, struct address_space *mapping, - struct list_head *pages, unsigned nr_pages) +static void v9fs_vfs_readahead(struct readahead_control *ractl) { - int ret = 0; - struct inode *inode; - - inode = mapping->host; - p9_debug(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, filp); - - ret = v9fs_readpages_from_fscache(inode, mapping, pages, &nr_pages); - if (ret == 0) - return ret; - - ret = read_cache_pages(mapping, pages, v9fs_fid_readpage, - filp->private_data); - p9_debug(P9_DEBUG_VFS, " = %d\n", ret); - return ret; + netfs_readahead(ractl, &v9fs_req_ops, NULL); } /** @@ -125,7 +132,14 @@ static int v9fs_release_page(struct page *page, gfp_t gfp) { if (PagePrivate(page)) return 0; - return v9fs_fscache_release_page(page, gfp); +#ifdef CONFIG_9P_FSCACHE + if (PageFsCache(page)) { + if (!(gfp & __GFP_DIRECT_RECLAIM) || !(gfp & __GFP_FS)) + return 0; + wait_on_page_fscache(page); + } +#endif + return 1; } /** @@ -138,21 +152,16 @@ static int v9fs_release_page(struct page *page, gfp_t gfp) static void v9fs_invalidate_page(struct page *page, unsigned int offset, unsigned int length) { - /* - * If called with zero offset, we should release - * the private state assocated with the page - */ - if (offset == 0 && length == PAGE_SIZE) - v9fs_fscache_invalidate_page(page); + wait_on_page_fscache(page); } static int v9fs_vfs_writepage_locked(struct page *page) { struct inode *inode = page->mapping->host; struct v9fs_inode *v9inode = V9FS_I(inode); + loff_t start = page_offset(page); loff_t size = i_size_read(inode); struct iov_iter from; - struct bio_vec bvec; int err, len; if (page->index == size >> PAGE_SHIFT) @@ -160,17 +169,14 @@ static int v9fs_vfs_writepage_locked(struct page *page) else len = PAGE_SIZE; - bvec.bv_page = page; - bvec.bv_offset = 0; - bvec.bv_len = len; - iov_iter_bvec(&from, WRITE, &bvec, 1, len); + iov_iter_xarray(&from, WRITE, &page->mapping->i_pages, start, len); /* We should have writeback_fid always set */ BUG_ON(!v9inode->writeback_fid); set_page_writeback(page); - p9_client_write(v9inode->writeback_fid, page_offset(page), &from, &err); + p9_client_write(v9inode->writeback_fid, start, &from, &err); end_page_writeback(page); return err; @@ -208,14 +214,13 @@ static int v9fs_vfs_writepage(struct page *page, struct writeback_control *wbc) static int v9fs_launder_page(struct page *page) { int retval; - struct inode *inode = page->mapping->host; - v9fs_fscache_wait_on_page_write(inode, page); if (clear_page_dirty_for_io(page)) { retval = v9fs_vfs_writepage_locked(page); if (retval) return retval; } + wait_on_page_fscache(page); return 0; } @@ -242,11 +247,13 @@ v9fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) loff_t pos = iocb->ki_pos; ssize_t n; int err = 0; + if (iov_iter_rw(iter) == WRITE) { n = p9_client_write(file->private_data, pos, iter, &err); if (n) { struct inode *inode = file_inode(file); loff_t i_size = i_size_read(inode); + if (pos + n > i_size) inode_add_bytes(inode, pos + n - i_size); } @@ -257,43 +264,32 @@ v9fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) } static int v9fs_write_begin(struct file *filp, struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, + loff_t pos, unsigned int len, unsigned int flags, struct page **pagep, void **fsdata) { - int retval = 0; + int retval; struct page *page; - struct v9fs_inode *v9inode; - pgoff_t index = pos >> PAGE_SHIFT; - struct inode *inode = mapping->host; - + struct v9fs_inode *v9inode = V9FS_I(mapping->host); p9_debug(P9_DEBUG_VFS, "filp %p, mapping %p\n", filp, mapping); - v9inode = V9FS_I(inode); -start: - page = grab_cache_page_write_begin(mapping, index, flags); - if (!page) { - retval = -ENOMEM; - goto out; - } BUG_ON(!v9inode->writeback_fid); - if (PageUptodate(page)) - goto out; - if (len == PAGE_SIZE) - goto out; + /* Prefetch area to be written into the cache if we're caching this + * file. We need to do this before we get a lock on the page in case + * there's more than one writer competing for the same cache block. + */ + retval = netfs_write_begin(filp, mapping, pos, len, flags, &page, fsdata, + &v9fs_req_ops, NULL); + if (retval < 0) + return retval; - retval = v9fs_fid_readpage(v9inode->writeback_fid, page); - put_page(page); - if (!retval) - goto start; -out: - *pagep = page; + *pagep = find_subpage(page, pos / PAGE_SIZE); return retval; } static int v9fs_write_end(struct file *filp, struct address_space *mapping, - loff_t pos, unsigned len, unsigned copied, + loff_t pos, unsigned int len, unsigned int copied, struct page *page, void *fsdata) { loff_t last_pos = pos + copied; @@ -305,10 +301,11 @@ static int v9fs_write_end(struct file *filp, struct address_space *mapping, if (unlikely(copied < len)) { copied = 0; goto out; - } else if (len == PAGE_SIZE) { - SetPageUptodate(page); } + + SetPageUptodate(page); } + /* * No need to use i_size_read() here, the i_size * cannot change under us because we hold the i_mutex. @@ -328,7 +325,7 @@ out: const struct address_space_operations v9fs_addr_operations = { .readpage = v9fs_vfs_readpage, - .readpages = v9fs_vfs_readpages, + .readahead = v9fs_vfs_readahead, .set_page_dirty = __set_page_dirty_nobuffers, .writepage = v9fs_vfs_writepage, .write_begin = v9fs_write_begin, diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c index 4b4292123b3d..1c609e99d280 100644 --- a/fs/9p/vfs_dentry.c +++ b/fs/9p/vfs_dentry.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * linux/fs/9p/vfs_dentry.c - * * This file contians vfs dentry ops for the 9P2000 protocol. * * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> @@ -52,6 +50,7 @@ static int v9fs_cached_dentry_delete(const struct dentry *dentry) static void v9fs_dentry_release(struct dentry *dentry) { struct hlist_node *p, *n; + p9_debug(P9_DEBUG_VFS, " dentry: %pd (%p)\n", dentry, dentry); hlist_for_each_safe(p, n, (struct hlist_head *)&dentry->d_fsdata) @@ -76,6 +75,7 @@ static int v9fs_lookup_revalidate(struct dentry *dentry, unsigned int flags) if (v9inode->cache_validity & V9FS_INO_INVALID_ATTR) { int retval; struct v9fs_session_info *v9ses; + fid = v9fs_fid_lookup(dentry); if (IS_ERR(fid)) return PTR_ERR(fid); diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index b6a5a0be444d..8c854d8cb0cd 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * linux/fs/9p/vfs_dir.c - * * This file contains vfs directory ops for the 9P2000 protocol. * * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> @@ -71,6 +69,7 @@ static inline int dt_type(struct p9_wstat *mistat) static struct p9_rdir *v9fs_alloc_rdir_buf(struct file *filp, int buflen) { struct p9_fid *fid = filp->private_data; + if (!fid->rdir) fid->rdir = kzalloc(sizeof(struct p9_rdir) + buflen, GFP_KERNEL); return fid->rdir; @@ -108,6 +107,7 @@ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx) if (rdir->tail == rdir->head) { struct iov_iter to; int n; + iov_iter_kvec(&to, READ, &kvec, 1, buflen); n = p9_client_read(file->private_data, ctx->pos, &to, &err); @@ -233,5 +233,5 @@ const struct file_operations v9fs_dir_operations_dotl = { .iterate_shared = v9fs_dir_readdir_dotl, .open = v9fs_file_open, .release = v9fs_dir_release, - .fsync = v9fs_file_fsync_dotl, + .fsync = v9fs_file_fsync_dotl, }; diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 246235ebdb70..4244d48398ef 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * linux/fs/9p/vfs_file.c - * * This file contians vfs file ops for 9P2000. * * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> @@ -408,6 +406,7 @@ v9fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct inode *inode = file_inode(file); loff_t i_size; unsigned long pg_start, pg_end; + pg_start = origin >> PAGE_SHIFT; pg_end = (origin + retval - 1) >> PAGE_SHIFT; if (inode->i_mapping && inode->i_mapping->nrpages) @@ -537,14 +536,23 @@ v9fs_vm_page_mkwrite(struct vm_fault *vmf) p9_debug(P9_DEBUG_VFS, "page %p fid %lx\n", page, (unsigned long)filp->private_data); + v9inode = V9FS_I(inode); + + /* Wait for the page to be written to the cache before we allow it to + * be modified. We then assume the entire page will need writing back. + */ +#ifdef CONFIG_9P_FSCACHE + if (PageFsCache(page) && + wait_on_page_fscache_killable(page) < 0) + return VM_FAULT_RETRY; +#endif + /* Update file times before taking page lock */ file_update_time(filp); - v9inode = V9FS_I(inode); - /* make sure the cache has finished storing the page */ - v9fs_fscache_wait_on_page_write(inode, page); BUG_ON(!v9inode->writeback_fid); - lock_page(page); + if (lock_page_killable(page) < 0) + return VM_FAULT_RETRY; if (page->mapping != inode->i_mapping) goto out_unlock; wait_for_stable_page(page); diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 08f48b70a741..328c338ff304 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * linux/fs/9p/vfs_inode.c - * * This file contains vfs inode ops for the 9P2000 protocol. * * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> @@ -49,6 +47,7 @@ static const struct inode_operations v9fs_symlink_inode_operations; static u32 unixmode2p9mode(struct v9fs_session_info *v9ses, umode_t mode) { int res; + res = mode & 0777; if (S_ISDIR(mode)) res |= P9_DMDIR; @@ -110,7 +109,7 @@ static int p9mode2perm(struct v9fs_session_info *v9ses, static umode_t p9mode2unixmode(struct v9fs_session_info *v9ses, struct p9_wstat *stat, dev_t *rdev) { - int res; + int res, r; u32 mode = stat->mode; *rdev = 0; @@ -128,11 +127,16 @@ static umode_t p9mode2unixmode(struct v9fs_session_info *v9ses, res |= S_IFIFO; else if ((mode & P9_DMDEVICE) && (v9fs_proto_dotu(v9ses)) && (v9ses->nodev == 0)) { - char type = 0, ext[32]; + char type = 0; int major = -1, minor = -1; - strlcpy(ext, stat->extension, sizeof(ext)); - sscanf(ext, "%c %i %i", &type, &major, &minor); + r = sscanf(stat->extension, "%c %i %i", &type, &major, &minor); + if (r != 3) { + p9_debug(P9_DEBUG_ERROR, + "invalid device string, umode will be bogus: %s\n", + stat->extension); + return res; + } switch (type) { case 'c': res |= S_IFCHR; @@ -223,6 +227,7 @@ v9fs_blank_wstat(struct p9_wstat *wstat) struct inode *v9fs_alloc_inode(struct super_block *sb) { struct v9fs_inode *v9inode; + v9inode = kmem_cache_alloc(v9fs_inode_cache, GFP_KERNEL); if (!v9inode) return NULL; @@ -251,7 +256,7 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses, { int err = 0; - inode_init_owner(&init_user_ns,inode, NULL, mode); + inode_init_owner(&init_user_ns, inode, NULL, mode); inode->i_blocks = 0; inode->i_rdev = rdev; inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); @@ -440,7 +445,7 @@ static struct inode *v9fs_qid_iget(struct super_block *sb, unsigned long i_ino; struct inode *inode; struct v9fs_session_info *v9ses = sb->s_fs_info; - int (*test)(struct inode *, void *); + int (*test)(struct inode *inode, void *data); if (new) test = v9fs_test_new_inode; @@ -499,8 +504,10 @@ v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, static int v9fs_at_to_dotl_flags(int flags) { int rflags = 0; + if (flags & AT_REMOVEDIR) rflags |= P9_DOTL_AT_REMOVEDIR; + return rflags; } @@ -797,7 +804,7 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, static int v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry, - struct file *file, unsigned flags, umode_t mode) + struct file *file, unsigned int flags, umode_t mode) { int err; u32 perm; @@ -1084,7 +1091,7 @@ static int v9fs_vfs_setattr(struct user_namespace *mnt_userns, fid = v9fs_fid_lookup(dentry); use_dentry = 1; } - if(IS_ERR(fid)) + if (IS_ERR(fid)) return PTR_ERR(fid); v9fs_blank_wstat(&wstat); @@ -1364,7 +1371,7 @@ v9fs_vfs_mknod(struct user_namespace *mnt_userns, struct inode *dir, char name[2 + U32_MAX_DIGITS + 1 + U32_MAX_DIGITS + 1]; u32 perm; - p9_debug(P9_DEBUG_VFS, " %lu,%pd mode: %hx MAJOR: %u MINOR: %u\n", + p9_debug(P9_DEBUG_VFS, " %lu,%pd mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino, dentry, mode, MAJOR(rdev), MINOR(rdev)); diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 01b9e1281a29..7dee89ba32e7 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * linux/fs/9p/vfs_inode_dotl.c - * * This file contains vfs inode ops for the 9P2000.L protocol. * * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> @@ -107,7 +105,7 @@ static struct inode *v9fs_qid_iget_dotl(struct super_block *sb, unsigned long i_ino; struct inode *inode; struct v9fs_session_info *v9ses = sb->s_fs_info; - int (*test)(struct inode *, void *); + int (*test)(struct inode *inode, void *data); if (new) test = v9fs_test_new_inode_dotl; @@ -230,7 +228,7 @@ v9fs_vfs_create_dotl(struct user_namespace *mnt_userns, struct inode *dir, static int v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, - struct file *file, unsigned flags, umode_t omode) + struct file *file, unsigned int flags, umode_t omode) { int err = 0; kgid_t gid; @@ -261,7 +259,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, v9ses = v9fs_inode2v9ses(dir); name = dentry->d_name.name; - p9_debug(P9_DEBUG_VFS, "name:%s flags:0x%x mode:0x%hx\n", + p9_debug(P9_DEBUG_VFS, "name:%s flags:0x%x mode:0x%x\n", name, flags, omode); dfid = v9fs_parent_fid(dentry); @@ -807,6 +805,7 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir, if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) { /* Get the latest stat info from server. */ struct p9_fid *fid; + fid = v9fs_fid_lookup(old_dentry); if (IS_ERR(fid)) return PTR_ERR(fid); @@ -843,7 +842,7 @@ v9fs_vfs_mknod_dotl(struct user_namespace *mnt_userns, struct inode *dir, struct p9_qid qid; struct posix_acl *dacl = NULL, *pacl = NULL; - p9_debug(P9_DEBUG_VFS, " %lu,%pd mode: %hx MAJOR: %u MINOR: %u\n", + p9_debug(P9_DEBUG_VFS, " %lu,%pd mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino, dentry, omode, MAJOR(rdev), MINOR(rdev)); diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 5fce6e30bc5a..b739e02f5ef7 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -1,9 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * linux/fs/9p/vfs_super.c - * - * This file contians superblock ops for 9P2000. It is intended that - * you mount this file system on directories. * * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> @@ -83,6 +79,9 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses, if (!v9ses->cache) { sb->s_bdi->ra_pages = 0; sb->s_bdi->io_pages = 0; + } else { + sb->s_bdi->ra_pages = v9ses->maxdata >> PAGE_SHIFT; + sb->s_bdi->io_pages = v9ses->maxdata >> PAGE_SHIFT; } sb->s_flags |= SB_ACTIVE | SB_DIRSYNC; @@ -113,7 +112,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags, struct inode *inode = NULL; struct dentry *root = NULL; struct v9fs_session_info *v9ses = NULL; - umode_t mode = S_IRWXUGO | S_ISVTX; + umode_t mode = 0777 | S_ISVTX; struct p9_fid *fid; int retval = 0; @@ -157,6 +156,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags, sb->s_root = root; if (v9fs_proto_dotl(v9ses)) { struct p9_stat_dotl *st = NULL; + st = p9_client_getattr_dotl(fid, P9_STATS_BASIC); if (IS_ERR(st)) { retval = PTR_ERR(st); @@ -167,6 +167,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags, kfree(st); } else { struct p9_wstat *st = NULL; + st = p9_client_stat(fid); if (IS_ERR(st)) { retval = PTR_ERR(st); @@ -275,12 +276,13 @@ done: static int v9fs_drop_inode(struct inode *inode) { struct v9fs_session_info *v9ses; + v9ses = v9fs_inode2v9ses(inode); if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) return generic_drop_inode(inode); /* * in case of non cached mode always drop the - * the inode because we want the inode attribute + * inode because we want the inode attribute * to always match that on the server. */ return 1; diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c index ee331845e2c7..a824441b95a2 100644 --- a/fs/9p/xattr.c +++ b/fs/9p/xattr.c @@ -1,15 +1,7 @@ +// SPDX-License-Identifier: LGPL-2.1 /* * Copyright IBM Corporation, 2010 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of version 2.1 of the GNU Lesser General Public License - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - * */ #include <linux/module.h> diff --git a/fs/9p/xattr.h b/fs/9p/xattr.h index c63c3bea5de5..3e11fc3331eb 100644 --- a/fs/9p/xattr.h +++ b/fs/9p/xattr.h @@ -1,15 +1,7 @@ +/* SPDX-License-Identifier: LGPL-2.1 */ /* * Copyright IBM Corporation, 2010 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of version 2.1 of the GNU Lesser General Public License - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - * */ #ifndef FS_9P_XATTR_H #define FS_9P_XATTR_H @@ -22,13 +14,14 @@ extern const struct xattr_handler *v9fs_xattr_handlers[]; extern const struct xattr_handler v9fs_xattr_acl_access_handler; extern const struct xattr_handler v9fs_xattr_acl_default_handler; -extern ssize_t v9fs_fid_xattr_get(struct p9_fid *, const char *, - void *, size_t); -extern ssize_t v9fs_xattr_get(struct dentry *, const char *, - void *, size_t); -extern int v9fs_fid_xattr_set(struct p9_fid *, const char *, - const void *, size_t, int); -extern int v9fs_xattr_set(struct dentry *, const char *, - const void *, size_t, int); -extern ssize_t v9fs_listxattr(struct dentry *, char *, size_t); +ssize_t v9fs_fid_xattr_get(struct p9_fid *fid, const char *name, + void *buffer, size_t buffer_size); +ssize_t v9fs_xattr_get(struct dentry *dentry, const char *name, + void *buffer, size_t buffer_size); +int v9fs_fid_xattr_set(struct p9_fid *fid, const char *name, + const void *value, size_t value_len, int flags); +int v9fs_xattr_set(struct dentry *dentry, const char *name, + const void *value, size_t value_len, int flags); +ssize_t v9fs_listxattr(struct dentry *dentry, char *buffer, + size_t buffer_size); #endif /* FS_9P_XATTR_H */ diff --git a/fs/afs/file.c b/fs/afs/file.c index e6c447ae91f3..eb11d047c0ae 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -19,6 +19,7 @@ static int afs_file_mmap(struct file *file, struct vm_area_struct *vma); static int afs_readpage(struct file *file, struct page *page); +static int afs_symlink_readpage(struct file *file, struct page *page); static void afs_invalidatepage(struct page *page, unsigned int offset, unsigned int length); static int afs_releasepage(struct page *page, gfp_t gfp_flags); @@ -49,7 +50,7 @@ const struct inode_operations afs_file_inode_operations = { .permission = afs_permission, }; -const struct address_space_operations afs_fs_aops = { +const struct address_space_operations afs_file_aops = { .readpage = afs_readpage, .readahead = afs_readahead, .set_page_dirty = afs_set_page_dirty, @@ -62,6 +63,12 @@ const struct address_space_operations afs_fs_aops = { .writepages = afs_writepages, }; +const struct address_space_operations afs_symlink_aops = { + .readpage = afs_symlink_readpage, + .releasepage = afs_releasepage, + .invalidatepage = afs_invalidatepage, +}; + static const struct vm_operations_struct afs_vm_ops = { .open = afs_vm_open, .close = afs_vm_close, @@ -313,7 +320,7 @@ static void afs_req_issue_op(struct netfs_read_subrequest *subreq) afs_put_read(fsreq); } -static int afs_symlink_readpage(struct page *page) +static int afs_symlink_readpage(struct file *file, struct page *page) { struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); struct afs_read *fsreq; @@ -378,9 +385,6 @@ const struct netfs_read_request_ops afs_req_ops = { static int afs_readpage(struct file *file, struct page *page) { - if (!file) - return afs_symlink_readpage(page); - return netfs_readpage(file, page, &afs_req_ops, NULL); } diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 8fcffea2daf5..16906eb592d9 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -95,7 +95,7 @@ static int afs_inode_init_from_status(struct afs_operation *op, inode->i_mode = S_IFREG | (status->mode & S_IALLUGO); inode->i_op = &afs_file_inode_operations; inode->i_fop = &afs_file_operations; - inode->i_mapping->a_ops = &afs_fs_aops; + inode->i_mapping->a_ops = &afs_file_aops; break; case AFS_FTYPE_DIR: inode->i_mode = S_IFDIR | (status->mode & S_IALLUGO); @@ -113,11 +113,11 @@ static int afs_inode_init_from_status(struct afs_operation *op, inode->i_mode = S_IFDIR | 0555; inode->i_op = &afs_mntpt_inode_operations; inode->i_fop = &afs_mntpt_file_operations; - inode->i_mapping->a_ops = &afs_fs_aops; + inode->i_mapping->a_ops = &afs_symlink_aops; } else { inode->i_mode = S_IFLNK | status->mode; inode->i_op = &afs_symlink_inode_operations; - inode->i_mapping->a_ops = &afs_fs_aops; + inode->i_mapping->a_ops = &afs_symlink_aops; } inode_nohighmem(inode); break; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 0ad97a8fc0d4..9357c53faa69 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -1055,7 +1055,8 @@ extern void afs_dynroot_depopulate(struct super_block *); /* * file.c */ -extern const struct address_space_operations afs_fs_aops; +extern const struct address_space_operations afs_file_aops; +extern const struct address_space_operations afs_symlink_aops; extern const struct inode_operations afs_file_inode_operations; extern const struct file_operations afs_file_operations; extern const struct netfs_read_request_ops afs_req_ops; diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c index 2b35cba8ad62..fdc7d675b4b0 100644 --- a/fs/afs/yfsclient.c +++ b/fs/afs/yfsclient.c @@ -83,25 +83,18 @@ static s64 linux_to_yfs_time(const struct timespec64 *t) return (u64)t->tv_sec * 10000000 + t->tv_nsec/100; } -static __be32 *xdr_encode_YFSStoreStatus_mode(__be32 *bp, mode_t mode) -{ - struct yfs_xdr_YFSStoreStatus *x = (void *)bp; - - x->mask = htonl(AFS_SET_MODE); - x->mode = htonl(mode & S_IALLUGO); - x->mtime_client = u64_to_xdr(0); - x->owner = u64_to_xdr(0); - x->group = u64_to_xdr(0); - return bp + xdr_size(x); -} - -static __be32 *xdr_encode_YFSStoreStatus_mtime(__be32 *bp, const struct timespec64 *t) +static __be32 *xdr_encode_YFSStoreStatus(__be32 *bp, mode_t *mode, + const struct timespec64 *t) { struct yfs_xdr_YFSStoreStatus *x = (void *)bp; + mode_t masked_mode = mode ? *mode & S_IALLUGO : 0; s64 mtime = linux_to_yfs_time(t); + u32 mask = AFS_SET_MTIME; - x->mask = htonl(AFS_SET_MTIME); - x->mode = htonl(0); + mask |= mode ? AFS_SET_MODE : 0; + + x->mask = htonl(mask); + x->mode = htonl(masked_mode); x->mtime_client = u64_to_xdr(mtime); x->owner = u64_to_xdr(0); x->group = u64_to_xdr(0); @@ -576,7 +569,7 @@ void yfs_fs_create_file(struct afs_operation *op) bp = xdr_encode_u32(bp, 0); /* RPC flags */ bp = xdr_encode_YFSFid(bp, &dvp->fid); bp = xdr_encode_name(bp, name); - bp = xdr_encode_YFSStoreStatus_mode(bp, op->create.mode); + bp = xdr_encode_YFSStoreStatus(bp, &op->create.mode, &op->mtime); bp = xdr_encode_u32(bp, yfs_LockNone); /* ViceLockType */ yfs_check_req(call, bp); @@ -625,7 +618,7 @@ void yfs_fs_make_dir(struct afs_operation *op) bp = xdr_encode_u32(bp, 0); /* RPC flags */ bp = xdr_encode_YFSFid(bp, &dvp->fid); bp = xdr_encode_name(bp, name); - bp = xdr_encode_YFSStoreStatus_mode(bp, op->create.mode); + bp = xdr_encode_YFSStoreStatus(bp, &op->create.mode, &op->mtime); yfs_check_req(call, bp); trace_afs_make_fs_call1(call, &dvp->fid, name); @@ -946,6 +939,7 @@ void yfs_fs_symlink(struct afs_operation *op) struct afs_vnode_param *dvp = &op->file[0]; struct afs_call *call; size_t contents_sz; + mode_t mode = 0777; __be32 *bp; _enter(""); @@ -972,7 +966,7 @@ void yfs_fs_symlink(struct afs_operation *op) bp = xdr_encode_YFSFid(bp, &dvp->fid); bp = xdr_encode_name(bp, name); bp = xdr_encode_string(bp, op->create.symlink, contents_sz); - bp = xdr_encode_YFSStoreStatus_mode(bp, S_IRWXUGO); + bp = xdr_encode_YFSStoreStatus(bp, &mode, &op->mtime); yfs_check_req(call, bp); trace_afs_make_fs_call1(call, &dvp->fid, name); @@ -1103,7 +1097,7 @@ void yfs_fs_store_data(struct afs_operation *op) bp = xdr_encode_u32(bp, YFSSTOREDATA64); bp = xdr_encode_u32(bp, 0); /* RPC flags */ bp = xdr_encode_YFSFid(bp, &vp->fid); - bp = xdr_encode_YFSStoreStatus_mtime(bp, &op->mtime); + bp = xdr_encode_YFSStoreStatus(bp, NULL, &op->mtime); bp = xdr_encode_u64(bp, op->store.pos); bp = xdr_encode_u64(bp, op->store.size); bp = xdr_encode_u64(bp, op->store.i_size); diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index a813b70f594e..f8c7f26f1fbb 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -156,7 +156,7 @@ static int padzero(unsigned long elf_bss) #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items)) #define STACK_ROUND(sp, items) \ (((unsigned long) (sp - items)) &~ 15UL) -#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; }) +#define STACK_ALLOC(sp, len) (sp -= len) #endif #ifndef ELF_BASE_PLATFORM @@ -1074,20 +1074,26 @@ out_free_interp: vaddr = elf_ppnt->p_vaddr; /* - * If we are loading ET_EXEC or we have already performed - * the ET_DYN load_addr calculations, proceed normally. + * The first time through the loop, load_addr_set is false: + * layout will be calculated. Once set, use MAP_FIXED since + * we know we've already safely mapped the entire region with + * MAP_FIXED_NOREPLACE in the once-per-binary logic following. */ - if (elf_ex->e_type == ET_EXEC || load_addr_set) { + if (load_addr_set) { elf_flags |= MAP_FIXED; + } else if (elf_ex->e_type == ET_EXEC) { + /* + * This logic is run once for the first LOAD Program + * Header for ET_EXEC binaries. No special handling + * is needed. + */ + elf_flags |= MAP_FIXED_NOREPLACE; } else if (elf_ex->e_type == ET_DYN) { /* * This logic is run once for the first LOAD Program * Header for ET_DYN binaries to calculate the * randomization (load_bias) for all the LOAD - * Program Headers, and to calculate the entire - * size of the ELF mapping (total_size). (Note that - * load_addr_set is set to true later once the - * initial mapping is performed.) + * Program Headers. * * There are effectively two types of ET_DYN * binaries: programs (i.e. PIE: ET_DYN with INTERP) @@ -1108,7 +1114,7 @@ out_free_interp: * Therefore, programs are loaded offset from * ELF_ET_DYN_BASE and loaders are loaded into the * independently randomized mmap region (0 load_bias - * without MAP_FIXED). + * without MAP_FIXED nor MAP_FIXED_NOREPLACE). */ if (interpreter) { load_bias = ELF_ET_DYN_BASE; @@ -1117,7 +1123,7 @@ out_free_interp: alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum); if (alignment) load_bias &= ~(alignment - 1); - elf_flags |= MAP_FIXED; + elf_flags |= MAP_FIXED_NOREPLACE; } else load_bias = 0; @@ -1129,7 +1135,14 @@ out_free_interp: * is then page aligned. */ load_bias = ELF_PAGESTART(load_bias - vaddr); + } + /* + * Calculate the entire size of the ELF mapping (total_size). + * (Note that load_addr_set is set to true later once the + * initial mapping is performed.) + */ + if (!load_addr_set) { total_size = total_mapping_size(elf_phdata, elf_ex->e_phnum); if (!total_size) { @@ -1834,7 +1847,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, /* * Allocate a structure for each thread. */ - for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) { + for (ct = &dump_task->signal->core_state->dumper; ct; ct = ct->next) { t = kzalloc(offsetof(struct elf_thread_core_info, notes[info->thread_notes]), GFP_KERNEL); @@ -2024,7 +2037,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, if (!elf_note_info_init(info)) return 0; - for (ct = current->mm->core_state->dumper.next; + for (ct = current->signal->core_state->dumper.next; ct; ct = ct->next) { ets = kzalloc(sizeof(*ets), GFP_KERNEL); if (!ets) diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 6d8fd6030cbb..c6f588dc4a9d 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1494,7 +1494,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) if (dump_vma_snapshot(cprm, &vma_count, &vma_meta, &vma_data_size)) goto end_coredump; - for (ct = current->mm->core_state->dumper.next; + for (ct = current->signal->core_state->dumper.next; ct; ct = ct->next) { tmp = elf_dump_thread_status(cprm->siginfo->si_signo, ct->task, &thread_status_size); diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 9fa930dfd78d..dca42aa87d30 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -38,7 +38,6 @@ #include <linux/key-type.h> #include "cifs_spnego.h" #include "fscache.h" -#include "smb2pdu.h" #ifdef CONFIG_CIFS_DFS_UPCALL #include "dfs_cache.h" #endif diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index e916470468ea..abff31dcd005 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -20,6 +20,7 @@ #include <crypto/internal/hash.h> #include <linux/scatterlist.h> #include <uapi/linux/cifs/cifs_mount.h> +#include "../smbfs_common/smb2pdu.h" #include "smb2pdu.h" #define CIFS_MAGIC_NUMBER 0xFF534D42 /* the first four bytes of SMB PDUs */ @@ -776,7 +777,7 @@ revert_current_mid(struct TCP_Server_Info *server, const unsigned int val) static inline void revert_current_mid_from_hdr(struct TCP_Server_Info *server, - const struct smb2_sync_hdr *shdr) + const struct smb2_hdr *shdr) { unsigned int num = le16_to_cpu(shdr->CreditCharge); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index c3b94c1e4591..0abbff4e4135 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -677,7 +677,7 @@ dequeue_mid(struct mid_q_entry *mid, bool malformed) static unsigned int smb2_get_credits_from_hdr(char *buffer, struct TCP_Server_Info *server) { - struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buffer; + struct smb2_hdr *shdr = (struct smb2_hdr *)buffer; /* * SMB1 does not use credits. @@ -794,7 +794,6 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server) */ } - kfree(server->hostname); kfree(server); length = atomic_dec_return(&tcpSesAllocCount); @@ -878,7 +877,7 @@ cifs_handle_standard(struct TCP_Server_Info *server, struct mid_q_entry *mid) static void smb2_add_credits_from_hdr(char *buffer, struct TCP_Server_Info *server) { - struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buffer; + struct smb2_hdr *shdr = (struct smb2_hdr *)buffer; int scredits, in_flight; /* @@ -1235,6 +1234,9 @@ static int match_server(struct TCP_Server_Info *server, struct smb3_fs_context * if (!net_eq(cifs_net_ns(server), current->nsproxy->net_ns)) return 0; + if (strcasecmp(server->hostname, ctx->server_hostname)) + return 0; + if (!match_address(server, addr, (struct sockaddr *)&ctx->srcaddr)) return 0; @@ -1336,6 +1338,7 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect) kfree(server->session_key.response); server->session_key.response = NULL; server->session_key.len = 0; + kfree(server->hostname); task = xchg(&server->tsk, NULL); if (task) @@ -1361,14 +1364,15 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx) goto out_err; } + tcp_ses->hostname = kstrdup(ctx->server_hostname, GFP_KERNEL); + if (!tcp_ses->hostname) { + rc = -ENOMEM; + goto out_err; + } + tcp_ses->ops = ctx->ops; tcp_ses->vals = ctx->vals; cifs_set_net_ns(tcp_ses, get_net(current->nsproxy->net_ns)); - tcp_ses->hostname = extract_hostname(ctx->UNC); - if (IS_ERR(tcp_ses->hostname)) { - rc = PTR_ERR(tcp_ses->hostname); - goto out_err_crypto_release; - } tcp_ses->conn_id = atomic_inc_return(&tcpSesNextId); tcp_ses->noblockcnt = ctx->rootfs; @@ -1497,8 +1501,7 @@ out_err_crypto_release: out_err: if (tcp_ses) { - if (!IS_ERR(tcp_ses->hostname)) - kfree(tcp_ses->hostname); + kfree(tcp_ses->hostname); if (tcp_ses->ssocket) sock_release(tcp_ses->ssocket); kfree(tcp_ses); @@ -2646,11 +2649,12 @@ generic_ip_connect(struct TCP_Server_Info *server) rc = 0; if (rc < 0) { cifs_dbg(FYI, "Error %d connecting to server\n", rc); + trace_smb3_connect_err(server->hostname, server->conn_id, &server->dstaddr, rc); sock_release(socket); server->ssocket = NULL; return rc; } - + trace_smb3_connect_done(server->hostname, server->conn_id, &server->dstaddr); if (sport == htons(RFC1001_PORT)) rc = ip_rfc1001_connect(server); diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c index 3109def8e199..38d96a480745 100644 --- a/fs/cifs/fs_context.c +++ b/fs/cifs/fs_context.c @@ -116,6 +116,7 @@ const struct fs_parameter_spec smb3_fs_parameters[] = { fsparam_flag("nosharesock", Opt_nosharesock), fsparam_flag_no("persistenthandles", Opt_persistent), fsparam_flag_no("resilienthandles", Opt_resilient), + fsparam_flag_no("tcpnodelay", Opt_tcp_nodelay), fsparam_flag("domainauto", Opt_domainauto), fsparam_flag("rdma", Opt_rdma), fsparam_flag("modesid", Opt_modesid), @@ -318,6 +319,7 @@ smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx DUP_CTX_STR(mount_options); DUP_CTX_STR(username); DUP_CTX_STR(password); + DUP_CTX_STR(server_hostname); DUP_CTX_STR(UNC); DUP_CTX_STR(source); DUP_CTX_STR(domainname); @@ -456,6 +458,11 @@ smb3_parse_devname(const char *devname, struct smb3_fs_context *ctx) if (!pos) return -EINVAL; + /* record the server hostname */ + ctx->server_hostname = kstrndup(devname + 2, pos - devname - 2, GFP_KERNEL); + if (!ctx->server_hostname) + return -ENOMEM; + /* skip past delimiter */ ++pos; @@ -1383,6 +1390,13 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, } } break; + case Opt_tcp_nodelay: + /* tcp nodelay should not usually be needed since we CORK/UNCORK the socket */ + if (result.negated) + ctx->sockopt_tcp_nodelay = false; + else + ctx->sockopt_tcp_nodelay = true; + break; case Opt_domainauto: ctx->domainauto = true; break; @@ -1496,6 +1510,8 @@ smb3_cleanup_fs_context_contents(struct smb3_fs_context *ctx) ctx->username = NULL; kfree_sensitive(ctx->password); ctx->password = NULL; + kfree(ctx->server_hostname); + ctx->server_hostname = NULL; kfree(ctx->UNC); ctx->UNC = NULL; kfree(ctx->source); diff --git a/fs/cifs/fs_context.h b/fs/cifs/fs_context.h index a42ba71d7a81..b2d22cf9cb18 100644 --- a/fs/cifs/fs_context.h +++ b/fs/cifs/fs_context.h @@ -98,6 +98,7 @@ enum cifs_param { Opt_nosharesock, Opt_persistent, Opt_resilient, + Opt_tcp_nodelay, Opt_domainauto, Opt_rdma, Opt_modesid, @@ -166,6 +167,7 @@ struct smb3_fs_context { char *password; char *domainname; char *source; + char *server_hostname; char *UNC; char *nodename; char *iocharset; /* local code page for mapping to and from Unicode */ diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index bb1185fff8cc..ba2c3e897b29 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -152,7 +152,7 @@ cifs_buf_get(void) * SMB2 header is bigger than CIFS one - no problems to clean some * more bytes for CIFS. */ - size_t buf_size = sizeof(struct smb2_sync_hdr); + size_t buf_size = sizeof(struct smb2_hdr); /* * We could use negotiated size instead of max_msgsize - diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c index 181514b8770d..194799ddd382 100644 --- a/fs/cifs/smb2maperror.c +++ b/fs/cifs/smb2maperror.c @@ -2439,14 +2439,16 @@ smb2_print_status(__le32 status) int map_smb2_to_linux_error(char *buf, bool log_err) { - struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buf; + struct smb2_hdr *shdr = (struct smb2_hdr *)buf; unsigned int i; int rc = -EIO; __le32 smb2err = shdr->Status; if (smb2err == 0) { - trace_smb3_cmd_done(shdr->TreeId, shdr->SessionId, - le16_to_cpu(shdr->Command), le64_to_cpu(shdr->MessageId)); + trace_smb3_cmd_done(le32_to_cpu(shdr->Id.SyncId.TreeId), + le64_to_cpu(shdr->SessionId), + le16_to_cpu(shdr->Command), + le64_to_cpu(shdr->MessageId)); return 0; } @@ -2470,8 +2472,10 @@ map_smb2_to_linux_error(char *buf, bool log_err) cifs_dbg(FYI, "Mapping SMB2 status code 0x%08x to POSIX err %d\n", __le32_to_cpu(smb2err), rc); - trace_smb3_cmd_err(shdr->TreeId, shdr->SessionId, - le16_to_cpu(shdr->Command), - le64_to_cpu(shdr->MessageId), le32_to_cpu(smb2err), rc); + trace_smb3_cmd_err(le32_to_cpu(shdr->Id.SyncId.TreeId), + le64_to_cpu(shdr->SessionId), + le16_to_cpu(shdr->Command), + le64_to_cpu(shdr->MessageId), + le32_to_cpu(smb2err), rc); return rc; } diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index 29b5554f6263..cdcdef32759e 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -8,7 +8,6 @@ * */ #include <linux/ctype.h> -#include "smb2pdu.h" #include "cifsglob.h" #include "cifsproto.h" #include "smb2proto.h" @@ -19,7 +18,7 @@ #include "nterr.h" static int -check_smb2_hdr(struct smb2_sync_hdr *shdr, __u64 mid) +check_smb2_hdr(struct smb2_hdr *shdr, __u64 mid) { __u64 wire_mid = le64_to_cpu(shdr->MessageId); @@ -81,9 +80,9 @@ static const __le16 smb2_rsp_struct_sizes[NUMBER_OF_SMB2_COMMANDS] = { /* SMB2_OPLOCK_BREAK */ cpu_to_le16(24) }; -#define SMB311_NEGPROT_BASE_SIZE (sizeof(struct smb2_sync_hdr) + sizeof(struct smb2_negotiate_rsp)) +#define SMB311_NEGPROT_BASE_SIZE (sizeof(struct smb2_hdr) + sizeof(struct smb2_negotiate_rsp)) -static __u32 get_neg_ctxt_len(struct smb2_sync_hdr *hdr, __u32 len, +static __u32 get_neg_ctxt_len(struct smb2_hdr *hdr, __u32 len, __u32 non_ctxlen) { __u16 neg_count; @@ -135,13 +134,13 @@ static __u32 get_neg_ctxt_len(struct smb2_sync_hdr *hdr, __u32 len, int smb2_check_message(char *buf, unsigned int len, struct TCP_Server_Info *srvr) { - struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buf; - struct smb2_sync_pdu *pdu = (struct smb2_sync_pdu *)shdr; + struct smb2_hdr *shdr = (struct smb2_hdr *)buf; + struct smb2_pdu *pdu = (struct smb2_pdu *)shdr; __u64 mid; __u32 clc_len; /* calculated length */ int command; - int pdu_size = sizeof(struct smb2_sync_pdu); - int hdr_size = sizeof(struct smb2_sync_hdr); + int pdu_size = sizeof(struct smb2_pdu); + int hdr_size = sizeof(struct smb2_hdr); /* * Add function to do table lookup of StructureSize by command @@ -155,7 +154,7 @@ smb2_check_message(char *buf, unsigned int len, struct TCP_Server_Info *srvr) /* decrypt frame now that it is completely read in */ spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(ses, &srvr->smb_ses_list, smb_ses_list) { - if (ses->Suid == thdr->SessionId) + if (ses->Suid == le64_to_cpu(thdr->SessionId)) break; } spin_unlock(&cifs_tcp_ses_lock); @@ -296,7 +295,7 @@ static const bool has_smb2_data_area[NUMBER_OF_SMB2_COMMANDS] = { * area and the offset to it (from the beginning of the smb are also returned. */ char * -smb2_get_data_area_len(int *off, int *len, struct smb2_sync_hdr *shdr) +smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *shdr) { *off = 0; *len = 0; @@ -401,8 +400,8 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_sync_hdr *shdr) unsigned int smb2_calc_size(void *buf, struct TCP_Server_Info *srvr) { - struct smb2_sync_pdu *pdu = (struct smb2_sync_pdu *)buf; - struct smb2_sync_hdr *shdr = &pdu->sync_hdr; + struct smb2_pdu *pdu = (struct smb2_pdu *)buf; + struct smb2_hdr *shdr = &pdu->hdr; int offset; /* the offset from the beginning of SMB to data area */ int data_length; /* the length of the variable length data area */ /* Structure Size has already been checked to make sure it is 64 */ @@ -669,7 +668,7 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) cifs_dbg(FYI, "Checking for oplock break\n"); - if (rsp->sync_hdr.Command != SMB2_OPLOCK_BREAK) + if (rsp->hdr.Command != SMB2_OPLOCK_BREAK) return false; if (rsp->StructureSize != @@ -816,25 +815,25 @@ smb2_handle_cancelled_close(struct cifs_tcon *tcon, __u64 persistent_fid, int smb2_handle_cancelled_mid(struct mid_q_entry *mid, struct TCP_Server_Info *server) { - struct smb2_sync_hdr *sync_hdr = mid->resp_buf; + struct smb2_hdr *hdr = mid->resp_buf; struct smb2_create_rsp *rsp = mid->resp_buf; struct cifs_tcon *tcon; int rc; - if ((mid->optype & CIFS_CP_CREATE_CLOSE_OP) || sync_hdr->Command != SMB2_CREATE || - sync_hdr->Status != STATUS_SUCCESS) + if ((mid->optype & CIFS_CP_CREATE_CLOSE_OP) || hdr->Command != SMB2_CREATE || + hdr->Status != STATUS_SUCCESS) return 0; - tcon = smb2_find_smb_tcon(server, sync_hdr->SessionId, - sync_hdr->TreeId); + tcon = smb2_find_smb_tcon(server, le64_to_cpu(hdr->SessionId), + le32_to_cpu(hdr->Id.SyncId.TreeId)); if (!tcon) return -ENOENT; rc = __smb2_handle_cancelled_cmd(tcon, - le16_to_cpu(sync_hdr->Command), - le64_to_cpu(sync_hdr->MessageId), - rsp->PersistentFileId, - rsp->VolatileFileId); + le16_to_cpu(hdr->Command), + le64_to_cpu(hdr->MessageId), + le64_to_cpu(rsp->PersistentFileId), + le64_to_cpu(rsp->VolatileFileId)); if (rc) cifs_put_tcon(tcon); @@ -856,10 +855,10 @@ smb311_update_preauth_hash(struct cifs_ses *ses, struct kvec *iov, int nvec) { int i, rc; struct sdesc *d; - struct smb2_sync_hdr *hdr; + struct smb2_hdr *hdr; struct TCP_Server_Info *server = cifs_ses_server(ses); - hdr = (struct smb2_sync_hdr *)iov[0].iov_base; + hdr = (struct smb2_hdr *)iov[0].iov_base; /* neg prot are always taken */ if (hdr->Command == SMB2_NEGOTIATE) goto ok; diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index bda606dc72b1..7acf71defea7 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -325,7 +325,7 @@ static struct mid_q_entry * __smb2_find_mid(struct TCP_Server_Info *server, char *buf, bool dequeue) { struct mid_q_entry *mid; - struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buf; + struct smb2_hdr *shdr = (struct smb2_hdr *)buf; __u64 wire_mid = le64_to_cpu(shdr->MessageId); if (shdr->ProtocolId == SMB2_TRANSFORM_PROTO_NUM) { @@ -367,11 +367,11 @@ static void smb2_dump_detail(void *buf, struct TCP_Server_Info *server) { #ifdef CONFIG_CIFS_DEBUG2 - struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buf; + struct smb2_hdr *shdr = (struct smb2_hdr *)buf; cifs_server_dbg(VFS, "Cmd: %d Err: 0x%x Flags: 0x%x Mid: %llu Pid: %d\n", shdr->Command, shdr->Status, shdr->Flags, shdr->MessageId, - shdr->ProcessId); + shdr->Id.SyncId.ProcessId); cifs_server_dbg(VFS, "smb buf %p len %u\n", buf, server->ops->calc_smb_size(buf, server)); #endif @@ -885,10 +885,10 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, atomic_inc(&tcon->num_remote_opens); o_rsp = (struct smb2_create_rsp *)rsp_iov[0].iov_base; - oparms.fid->persistent_fid = o_rsp->PersistentFileId; - oparms.fid->volatile_fid = o_rsp->VolatileFileId; + oparms.fid->persistent_fid = le64_to_cpu(o_rsp->PersistentFileId); + oparms.fid->volatile_fid = le64_to_cpu(o_rsp->VolatileFileId); #ifdef CONFIG_CIFS_DEBUG2 - oparms.fid->mid = le64_to_cpu(o_rsp->sync_hdr.MessageId); + oparms.fid->mid = le64_to_cpu(o_rsp->hdr.MessageId); #endif /* CIFS_DEBUG2 */ tcon->crfid.tcon = tcon; @@ -2391,12 +2391,12 @@ again: /* If the open failed there is nothing to do */ op_rsp = (struct smb2_create_rsp *)rsp_iov[0].iov_base; - if (op_rsp == NULL || op_rsp->sync_hdr.Status != STATUS_SUCCESS) { + if (op_rsp == NULL || op_rsp->hdr.Status != STATUS_SUCCESS) { cifs_dbg(FYI, "query_dir_first: open failed rc=%d\n", rc); goto qdf_free; } - fid->persistent_fid = op_rsp->PersistentFileId; - fid->volatile_fid = op_rsp->VolatileFileId; + fid->persistent_fid = le64_to_cpu(op_rsp->PersistentFileId); + fid->volatile_fid = le64_to_cpu(op_rsp->VolatileFileId); /* Anything else than ENODATA means a genuine error */ if (rc && rc != -ENODATA) { @@ -2410,7 +2410,7 @@ again: atomic_inc(&tcon->num_remote_opens); qd_rsp = (struct smb2_query_directory_rsp *)rsp_iov[1].iov_base; - if (qd_rsp->sync_hdr.Status == STATUS_NO_MORE_FILES) { + if (qd_rsp->hdr.Status == STATUS_NO_MORE_FILES) { trace_smb3_query_dir_done(xid, fid->persistent_fid, tcon->tid, tcon->ses->Suid, 0, 0); srch_inf->endOfSearch = true; @@ -2462,7 +2462,7 @@ smb2_close_dir(const unsigned int xid, struct cifs_tcon *tcon, static bool smb2_is_status_pending(char *buf, struct TCP_Server_Info *server) { - struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buf; + struct smb2_hdr *shdr = (struct smb2_hdr *)buf; int scredits, in_flight; if (shdr->Status != STATUS_PENDING) @@ -2489,13 +2489,14 @@ smb2_is_status_pending(char *buf, struct TCP_Server_Info *server) static bool smb2_is_session_expired(char *buf) { - struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buf; + struct smb2_hdr *shdr = (struct smb2_hdr *)buf; if (shdr->Status != STATUS_NETWORK_SESSION_EXPIRED && shdr->Status != STATUS_USER_SESSION_DELETED) return false; - trace_smb3_ses_expired(shdr->TreeId, shdr->SessionId, + trace_smb3_ses_expired(le32_to_cpu(shdr->Id.SyncId.TreeId), + le64_to_cpu(shdr->SessionId), le16_to_cpu(shdr->Command), le64_to_cpu(shdr->MessageId)); cifs_dbg(FYI, "Session expired or deleted\n"); @@ -2506,7 +2507,7 @@ smb2_is_session_expired(char *buf) static bool smb2_is_status_io_timeout(char *buf) { - struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buf; + struct smb2_hdr *shdr = (struct smb2_hdr *)buf; if (shdr->Status == STATUS_IO_TIMEOUT) return true; @@ -2517,7 +2518,7 @@ smb2_is_status_io_timeout(char *buf) static void smb2_is_network_name_deleted(char *buf, struct TCP_Server_Info *server) { - struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buf; + struct smb2_hdr *shdr = (struct smb2_hdr *)buf; struct list_head *tmp, *tmp1; struct cifs_ses *ses; struct cifs_tcon *tcon; @@ -2530,7 +2531,7 @@ smb2_is_network_name_deleted(char *buf, struct TCP_Server_Info *server) ses = list_entry(tmp, struct cifs_ses, smb_ses_list); list_for_each(tmp1, &ses->tcon_list) { tcon = list_entry(tmp1, struct cifs_tcon, tcon_list); - if (tcon->tid == shdr->TreeId) { + if (tcon->tid == le32_to_cpu(shdr->Id.SyncId.TreeId)) { tcon->need_reconnect = true; spin_unlock(&cifs_tcp_ses_lock); pr_warn_once("Server share %s deleted.\n", @@ -2558,9 +2559,9 @@ smb2_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid, void smb2_set_related(struct smb_rqst *rqst) { - struct smb2_sync_hdr *shdr; + struct smb2_hdr *shdr; - shdr = (struct smb2_sync_hdr *)(rqst->rq_iov[0].iov_base); + shdr = (struct smb2_hdr *)(rqst->rq_iov[0].iov_base); if (shdr == NULL) { cifs_dbg(FYI, "shdr NULL in smb2_set_related\n"); return; @@ -2573,13 +2574,13 @@ char smb2_padding[7] = {0, 0, 0, 0, 0, 0, 0}; void smb2_set_next_command(struct cifs_tcon *tcon, struct smb_rqst *rqst) { - struct smb2_sync_hdr *shdr; + struct smb2_hdr *shdr; struct cifs_ses *ses = tcon->ses; struct TCP_Server_Info *server = ses->server; unsigned long len = smb_rqst_len(server, rqst); int i, num_padding; - shdr = (struct smb2_sync_hdr *)(rqst->rq_iov[0].iov_base); + shdr = (struct smb2_hdr *)(rqst->rq_iov[0].iov_base); if (shdr == NULL) { cifs_dbg(FYI, "shdr NULL in smb2_set_next_command\n"); return; @@ -3124,7 +3125,7 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, resp_buftype, rsp_iov); create_rsp = rsp_iov[0].iov_base; - if (create_rsp && create_rsp->sync_hdr.Status) + if (create_rsp && create_rsp->hdr.Status) err_iov = rsp_iov[0]; ioctl_rsp = rsp_iov[1].iov_base; @@ -4369,8 +4370,8 @@ static void fill_transform_hdr(struct smb2_transform_hdr *tr_hdr, unsigned int orig_len, struct smb_rqst *old_rq, __le16 cipher_type) { - struct smb2_sync_hdr *shdr = - (struct smb2_sync_hdr *)old_rq->rq_iov[0].iov_base; + struct smb2_hdr *shdr = + (struct smb2_hdr *)old_rq->rq_iov[0].iov_base; memset(tr_hdr, 0, sizeof(struct smb2_transform_hdr)); tr_hdr->ProtocolId = SMB2_TRANSFORM_PROTO_NUM; @@ -4496,7 +4497,7 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst, struct crypto_aead *tfm; unsigned int crypt_len = le32_to_cpu(tr_hdr->OriginalMessageSize); - rc = smb2_get_enc_key(server, tr_hdr->SessionId, enc, key); + rc = smb2_get_enc_key(server, le64_to_cpu(tr_hdr->SessionId), enc, key); if (rc) { cifs_server_dbg(VFS, "%s: Could not get %scryption key\n", __func__, enc ? "en" : "de"); @@ -4788,7 +4789,7 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid, unsigned int cur_page_idx; unsigned int pad_len; struct cifs_readdata *rdata = mid->callback_data; - struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buf; + struct smb2_hdr *shdr = (struct smb2_hdr *)buf; struct bio_vec *bvec = NULL; struct iov_iter iter; struct kvec iov; @@ -5117,7 +5118,7 @@ receive_encrypted_standard(struct TCP_Server_Info *server, { int ret, length; char *buf = server->smallbuf; - struct smb2_sync_hdr *shdr; + struct smb2_hdr *shdr; unsigned int pdu_length = server->pdu_size; unsigned int buf_size; struct mid_q_entry *mid_entry; @@ -5147,7 +5148,7 @@ receive_encrypted_standard(struct TCP_Server_Info *server, next_is_large = server->large_buf; one_more: - shdr = (struct smb2_sync_hdr *)buf; + shdr = (struct smb2_hdr *)buf; if (shdr->NextCommand) { if (next_is_large) next_buffer = (char *)cifs_buf_get(); @@ -5213,7 +5214,7 @@ smb3_receive_transform(struct TCP_Server_Info *server, unsigned int orig_len = le32_to_cpu(tr_hdr->OriginalMessageSize); if (pdu_length < sizeof(struct smb2_transform_hdr) + - sizeof(struct smb2_sync_hdr)) { + sizeof(struct smb2_hdr)) { cifs_server_dbg(VFS, "Transform message is too small (%u)\n", pdu_length); cifs_reconnect(server); @@ -5246,7 +5247,7 @@ smb3_handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid) static int smb2_next_header(char *buf) { - struct smb2_sync_hdr *hdr = (struct smb2_sync_hdr *)buf; + struct smb2_hdr *hdr = (struct smb2_hdr *)buf; struct smb2_transform_hdr *t_hdr = (struct smb2_transform_hdr *)buf; if (hdr->ProtocolId == SMB2_TRANSFORM_PROTO_NUM) @@ -5788,7 +5789,7 @@ struct smb_version_values smb20_values = { .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK, .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK, .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK, - .header_size = sizeof(struct smb2_sync_hdr), + .header_size = sizeof(struct smb2_hdr), .header_preamble_size = 0, .max_header_size = MAX_SMB2_HDR_SIZE, .read_rsp_size = sizeof(struct smb2_read_rsp) - 1, @@ -5809,7 +5810,7 @@ struct smb_version_values smb21_values = { .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK, .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK, .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK, - .header_size = sizeof(struct smb2_sync_hdr), + .header_size = sizeof(struct smb2_hdr), .header_preamble_size = 0, .max_header_size = MAX_SMB2_HDR_SIZE, .read_rsp_size = sizeof(struct smb2_read_rsp) - 1, @@ -5830,7 +5831,7 @@ struct smb_version_values smb3any_values = { .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK, .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK, .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK, - .header_size = sizeof(struct smb2_sync_hdr), + .header_size = sizeof(struct smb2_hdr), .header_preamble_size = 0, .max_header_size = MAX_SMB2_HDR_SIZE, .read_rsp_size = sizeof(struct smb2_read_rsp) - 1, @@ -5851,7 +5852,7 @@ struct smb_version_values smbdefault_values = { .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK, .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK, .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK, - .header_size = sizeof(struct smb2_sync_hdr), + .header_size = sizeof(struct smb2_hdr), .header_preamble_size = 0, .max_header_size = MAX_SMB2_HDR_SIZE, .read_rsp_size = sizeof(struct smb2_read_rsp) - 1, @@ -5872,7 +5873,7 @@ struct smb_version_values smb30_values = { .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK, .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK, .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK, - .header_size = sizeof(struct smb2_sync_hdr), + .header_size = sizeof(struct smb2_hdr), .header_preamble_size = 0, .max_header_size = MAX_SMB2_HDR_SIZE, .read_rsp_size = sizeof(struct smb2_read_rsp) - 1, @@ -5893,7 +5894,7 @@ struct smb_version_values smb302_values = { .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK, .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK, .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK, - .header_size = sizeof(struct smb2_sync_hdr), + .header_size = sizeof(struct smb2_hdr), .header_preamble_size = 0, .max_header_size = MAX_SMB2_HDR_SIZE, .read_rsp_size = sizeof(struct smb2_read_rsp) - 1, @@ -5914,7 +5915,7 @@ struct smb_version_values smb311_values = { .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK, .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK, .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK, - .header_size = sizeof(struct smb2_sync_hdr), + .header_size = sizeof(struct smb2_hdr), .header_preamble_size = 0, .max_header_size = MAX_SMB2_HDR_SIZE, .read_rsp_size = sizeof(struct smb2_read_rsp) - 1, diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 7829c590eeac..d2ecb2ea37c0 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -23,7 +23,6 @@ #include <linux/uuid.h> #include <linux/pagemap.h> #include <linux/xattr.h> -#include "smb2pdu.h" #include "cifsglob.h" #include "cifsacl.h" #include "cifsproto.h" @@ -84,7 +83,7 @@ int smb3_encryption_required(const struct cifs_tcon *tcon) } static void -smb2_hdr_assemble(struct smb2_sync_hdr *shdr, __le16 smb2_cmd, +smb2_hdr_assemble(struct smb2_hdr *shdr, __le16 smb2_cmd, const struct cifs_tcon *tcon, struct TCP_Server_Info *server) { @@ -104,7 +103,7 @@ smb2_hdr_assemble(struct smb2_sync_hdr *shdr, __le16 smb2_cmd, } else { shdr->CreditRequest = cpu_to_le16(2); } - shdr->ProcessId = cpu_to_le32((__u16)current->tgid); + shdr->Id.SyncId.ProcessId = cpu_to_le32((__u16)current->tgid); if (!tcon) goto out; @@ -115,10 +114,10 @@ smb2_hdr_assemble(struct smb2_sync_hdr *shdr, __le16 smb2_cmd, shdr->CreditCharge = cpu_to_le16(1); /* else CreditCharge MBZ */ - shdr->TreeId = tcon->tid; + shdr->Id.SyncId.TreeId = cpu_to_le32(tcon->tid); /* Uid is not converted */ if (tcon->ses) - shdr->SessionId = tcon->ses->Suid; + shdr->SessionId = cpu_to_le64(tcon->ses->Suid); /* * If we would set SMB2_FLAGS_DFS_OPERATIONS on open we also would have @@ -331,7 +330,7 @@ fill_small_buf(__le16 smb2_command, struct cifs_tcon *tcon, void *buf, unsigned int *total_len) { - struct smb2_sync_pdu *spdu = (struct smb2_sync_pdu *)buf; + struct smb2_pdu *spdu = (struct smb2_pdu *)buf; /* lookup word count ie StructureSize from table */ __u16 parmsize = smb2_req_struct_sizes[le16_to_cpu(smb2_command)]; @@ -341,10 +340,10 @@ fill_small_buf(__le16 smb2_command, struct cifs_tcon *tcon, */ memset(buf, 0, 256); - smb2_hdr_assemble(&spdu->sync_hdr, smb2_command, tcon, server); + smb2_hdr_assemble(&spdu->hdr, smb2_command, tcon, server); spdu->StructureSize2 = cpu_to_le16(parmsize); - *total_len = parmsize + sizeof(struct smb2_sync_hdr); + *total_len = parmsize + sizeof(struct smb2_hdr); } /* @@ -367,7 +366,7 @@ static int __smb2_plain_req_init(__le16 smb2_command, struct cifs_tcon *tcon, } fill_small_buf(smb2_command, tcon, server, - (struct smb2_sync_hdr *)(*request_buf), + (struct smb2_hdr *)(*request_buf), total_len); if (tcon != NULL) { @@ -414,8 +413,8 @@ build_preauth_ctxt(struct smb2_preauth_neg_context *pneg_ctxt) pneg_ctxt->ContextType = SMB2_PREAUTH_INTEGRITY_CAPABILITIES; pneg_ctxt->DataLength = cpu_to_le16(38); pneg_ctxt->HashAlgorithmCount = cpu_to_le16(1); - pneg_ctxt->SaltLength = cpu_to_le16(SMB311_LINUX_CLIENT_SALT_SIZE); - get_random_bytes(pneg_ctxt->Salt, SMB311_LINUX_CLIENT_SALT_SIZE); + pneg_ctxt->SaltLength = cpu_to_le16(SMB311_SALT_SIZE); + get_random_bytes(pneg_ctxt->Salt, SMB311_SALT_SIZE); pneg_ctxt->HashAlgorithms = SMB2_PREAUTH_INTEGRITY_SHA512; } @@ -857,7 +856,7 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) if (rc) return rc; - req->sync_hdr.SessionId = 0; + req->hdr.SessionId = 0; memset(server->preauth_sha_hash, 0, SMB2_PREAUTH_HASH_SIZE); memset(ses->preauth_sha_hash, 0, SMB2_PREAUTH_HASH_SIZE); @@ -1018,7 +1017,7 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) server->cipher_type = SMB2_ENCRYPTION_AES128_CCM; security_blob = smb2_get_data_area_len(&blob_offset, &blob_length, - (struct smb2_sync_hdr *)rsp); + (struct smb2_hdr *)rsp); /* * See MS-SMB2 section 2.2.4: if no blob, client picks default which * for us will be @@ -1250,23 +1249,23 @@ SMB2_sess_alloc_buffer(struct SMB2_sess_data *sess_data) return rc; if (sess_data->ses->binding) { - req->sync_hdr.SessionId = sess_data->ses->Suid; - req->sync_hdr.Flags |= SMB2_FLAGS_SIGNED; + req->hdr.SessionId = cpu_to_le64(sess_data->ses->Suid); + req->hdr.Flags |= SMB2_FLAGS_SIGNED; req->PreviousSessionId = 0; req->Flags = SMB2_SESSION_REQ_FLAG_BINDING; } else { /* First session, not a reauthenticate */ - req->sync_hdr.SessionId = 0; + req->hdr.SessionId = 0; /* * if reconnect, we need to send previous sess id * otherwise it is 0 */ - req->PreviousSessionId = sess_data->previous_session; + req->PreviousSessionId = cpu_to_le64(sess_data->previous_session); req->Flags = 0; /* MBZ */ } /* enough to enable echos and oplocks and one max size write */ - req->sync_hdr.CreditRequest = cpu_to_le16(130); + req->hdr.CreditRequest = cpu_to_le16(130); /* only one of SMB2 signing flags may be set in SMB2 request */ if (server->sign) @@ -1425,7 +1424,7 @@ SMB2_auth_kerberos(struct SMB2_sess_data *sess_data) rsp = (struct smb2_sess_setup_rsp *)sess_data->iov[0].iov_base; /* keep session id and flags if binding */ if (!ses->binding) { - ses->Suid = rsp->sync_hdr.SessionId; + ses->Suid = le64_to_cpu(rsp->hdr.SessionId); ses->session_flags = le16_to_cpu(rsp->SessionFlags); } @@ -1501,7 +1500,7 @@ SMB2_sess_auth_rawntlmssp_negotiate(struct SMB2_sess_data *sess_data) /* If true, rc here is expected and not an error */ if (sess_data->buf0_type != CIFS_NO_BUFFER && - rsp->sync_hdr.Status == STATUS_MORE_PROCESSING_REQUIRED) + rsp->hdr.Status == STATUS_MORE_PROCESSING_REQUIRED) rc = 0; if (rc) @@ -1523,7 +1522,7 @@ SMB2_sess_auth_rawntlmssp_negotiate(struct SMB2_sess_data *sess_data) /* keep existing ses id and flags if binding */ if (!ses->binding) { - ses->Suid = rsp->sync_hdr.SessionId; + ses->Suid = le64_to_cpu(rsp->hdr.SessionId); ses->session_flags = le16_to_cpu(rsp->SessionFlags); } @@ -1558,7 +1557,7 @@ SMB2_sess_auth_rawntlmssp_authenticate(struct SMB2_sess_data *sess_data) goto out; req = (struct smb2_sess_setup_req *) sess_data->iov[0].iov_base; - req->sync_hdr.SessionId = ses->Suid; + req->hdr.SessionId = cpu_to_le64(ses->Suid); rc = build_ntlmssp_auth_blob(&ntlmssp_blob, &blob_length, ses, sess_data->nls_cp); @@ -1584,7 +1583,7 @@ SMB2_sess_auth_rawntlmssp_authenticate(struct SMB2_sess_data *sess_data) /* keep existing ses id and flags if binding */ if (!ses->binding) { - ses->Suid = rsp->sync_hdr.SessionId; + ses->Suid = le64_to_cpu(rsp->hdr.SessionId); ses->session_flags = le16_to_cpu(rsp->SessionFlags); } @@ -1715,12 +1714,12 @@ SMB2_logoff(const unsigned int xid, struct cifs_ses *ses) return rc; /* since no tcon, smb2_init can not do this, so do here */ - req->sync_hdr.SessionId = ses->Suid; + req->hdr.SessionId = cpu_to_le64(ses->Suid); if (ses->session_flags & SMB2_SESSION_FLAG_ENCRYPT_DATA) flags |= CIFS_TRANSFORM_REQ; else if (server->sign) - req->sync_hdr.Flags |= SMB2_FLAGS_SIGNED; + req->hdr.Flags |= SMB2_FLAGS_SIGNED; flags |= CIFS_NO_RSP_BUF; @@ -1828,14 +1827,14 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, !(ses->session_flags & (SMB2_SESSION_FLAG_IS_GUEST|SMB2_SESSION_FLAG_IS_NULL)) && ((ses->user_name != NULL) || (ses->sectype == Kerberos))) - req->sync_hdr.Flags |= SMB2_FLAGS_SIGNED; + req->hdr.Flags |= SMB2_FLAGS_SIGNED; memset(&rqst, 0, sizeof(struct smb_rqst)); rqst.rq_iov = iov; rqst.rq_nvec = 2; /* Need 64 for max size write so ask for more in case not there yet */ - req->sync_hdr.CreditRequest = cpu_to_le16(64); + req->hdr.CreditRequest = cpu_to_le16(64); rc = cifs_send_recv(xid, ses, server, &rqst, &resp_buftype, flags, &rsp_iov); @@ -1871,7 +1870,7 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, tcon->maximal_access = le32_to_cpu(rsp->MaximalAccess); tcon->tidStatus = CifsGood; tcon->need_reconnect = false; - tcon->tid = rsp->sync_hdr.TreeId; + tcon->tid = le32_to_cpu(rsp->hdr.Id.SyncId.TreeId); strlcpy(tcon->treeName, tree, sizeof(tcon->treeName)); if ((rsp->Capabilities & SMB2_SHARE_CAP_DFS) && @@ -1892,9 +1891,8 @@ tcon_exit: return rc; tcon_error_exit: - if (rsp && rsp->sync_hdr.Status == STATUS_BAD_NETWORK_NAME) { + if (rsp && rsp->hdr.Status == STATUS_BAD_NETWORK_NAME) cifs_tcon_dbg(VFS, "BAD_NETWORK_NAME: %s\n", tree); - } goto tcon_exit; } @@ -2608,7 +2606,7 @@ int smb311_posix_mkdir(const unsigned int xid, struct inode *inode, if (tcon->share_flags & SHI1005_FLAGS_DFS) { int name_len; - req->sync_hdr.Flags |= SMB2_FLAGS_DFS_OPERATIONS; + req->hdr.Flags |= SMB2_FLAGS_DFS_OPERATIONS; rc = alloc_path_with_tree_prefix(©_path, ©_size, &name_len, tcon->treeName, utf16_path); @@ -2672,11 +2670,13 @@ int smb311_posix_mkdir(const unsigned int xid, struct inode *inode, } rsp = (struct smb2_create_rsp *)rsp_iov.iov_base; - trace_smb3_posix_mkdir_done(xid, rsp->PersistentFileId, tcon->tid, + trace_smb3_posix_mkdir_done(xid, le64_to_cpu(rsp->PersistentFileId), + tcon->tid, ses->Suid, CREATE_NOT_FILE, FILE_WRITE_ATTRIBUTES); - SMB2_close(xid, tcon, rsp->PersistentFileId, rsp->VolatileFileId); + SMB2_close(xid, tcon, le64_to_cpu(rsp->PersistentFileId), + le64_to_cpu(rsp->VolatileFileId)); /* Eventually save off posix specific response info and timestaps */ @@ -2740,7 +2740,7 @@ SMB2_open_init(struct cifs_tcon *tcon, struct TCP_Server_Info *server, if (tcon->share_flags & SHI1005_FLAGS_DFS) { int name_len; - req->sync_hdr.Flags |= SMB2_FLAGS_DFS_OPERATIONS; + req->hdr.Flags |= SMB2_FLAGS_DFS_OPERATIONS; rc = alloc_path_with_tree_prefix(©_path, ©_size, &name_len, tcon->treeName, path); @@ -2943,16 +2943,17 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, } goto creat_exit; } else - trace_smb3_open_done(xid, rsp->PersistentFileId, tcon->tid, + trace_smb3_open_done(xid, le64_to_cpu(rsp->PersistentFileId), + tcon->tid, ses->Suid, oparms->create_options, oparms->desired_access); atomic_inc(&tcon->num_remote_opens); - oparms->fid->persistent_fid = rsp->PersistentFileId; - oparms->fid->volatile_fid = rsp->VolatileFileId; + oparms->fid->persistent_fid = le64_to_cpu(rsp->PersistentFileId); + oparms->fid->volatile_fid = le64_to_cpu(rsp->VolatileFileId); oparms->fid->access = oparms->desired_access; #ifdef CONFIG_CIFS_DEBUG2 - oparms->fid->mid = le64_to_cpu(rsp->sync_hdr.MessageId); + oparms->fid->mid = le64_to_cpu(rsp->hdr.MessageId); #endif /* CIFS_DEBUG2 */ if (buf) { @@ -3052,7 +3053,7 @@ SMB2_ioctl_init(struct cifs_tcon *tcon, struct TCP_Server_Info *server, * response size smaller. */ req->MaxOutputResponse = cpu_to_le32(max_response_size); - req->sync_hdr.CreditCharge = + req->hdr.CreditCharge = cpu_to_le16(DIV_ROUND_UP(max(indatalen, max_response_size), SMB2_MAX_BUFFER_SIZE)); if (is_fsctl) @@ -3062,7 +3063,7 @@ SMB2_ioctl_init(struct cifs_tcon *tcon, struct TCP_Server_Info *server, /* validate negotiate request must be signed - see MS-SMB2 3.2.5.5 */ if (opcode == FSCTL_VALIDATE_NEGOTIATE_INFO) - req->sync_hdr.Flags |= SMB2_FLAGS_SIGNED; + req->hdr.Flags |= SMB2_FLAGS_SIGNED; return 0; } @@ -3236,8 +3237,8 @@ SMB2_close_init(struct cifs_tcon *tcon, struct TCP_Server_Info *server, if (rc) return rc; - req->PersistentFileId = persistent_fid; - req->VolatileFileId = volatile_fid; + req->PersistentFileId = cpu_to_le64(persistent_fid); + req->VolatileFileId = cpu_to_le64(volatile_fid); if (query_attrs) req->Flags = SMB2_CLOSE_FLAG_POSTQUERY_ATTRIB; else @@ -3600,8 +3601,8 @@ SMB2_notify_init(const unsigned int xid, struct smb_rqst *rqst, if (rc) return rc; - req->PersistentFileId = persistent_fid; - req->VolatileFileId = volatile_fid; + req->PersistentFileId = cpu_to_le64(persistent_fid); + req->VolatileFileId = cpu_to_le64(volatile_fid); /* See note 354 of MS-SMB2, 64K max */ req->OutputBufferLength = cpu_to_le32(SMB2_MAX_BUFFER_SIZE - MAX_SMB2_HDR_SIZE); @@ -3687,7 +3688,7 @@ smb2_echo_callback(struct mid_q_entry *mid) if (mid->mid_state == MID_RESPONSE_RECEIVED || mid->mid_state == MID_RESPONSE_MALFORMED) { - credits.value = le16_to_cpu(rsp->sync_hdr.CreditRequest); + credits.value = le16_to_cpu(rsp->hdr.CreditRequest); credits.instance = server->reconnect_instance; } @@ -3787,7 +3788,7 @@ SMB2_echo(struct TCP_Server_Info *server) if (rc) return rc; - req->sync_hdr.CreditRequest = cpu_to_le16(1); + req->hdr.CreditRequest = cpu_to_le16(1); iov[0].iov_len = total_len; iov[0].iov_base = (char *)req; @@ -3823,8 +3824,8 @@ SMB2_flush_init(const unsigned int xid, struct smb_rqst *rqst, if (rc) return rc; - req->PersistentFileId = persistent_fid; - req->VolatileFileId = volatile_fid; + req->PersistentFileId = cpu_to_le64(persistent_fid); + req->VolatileFileId = cpu_to_le64(volatile_fid); iov[0].iov_base = (char *)req; iov[0].iov_len = total_len; @@ -3890,8 +3891,8 @@ smb2_new_read_req(void **buf, unsigned int *total_len, unsigned int remaining_bytes, int request_type) { int rc = -EACCES; - struct smb2_read_plain_req *req = NULL; - struct smb2_sync_hdr *shdr; + struct smb2_read_req *req = NULL; + struct smb2_hdr *shdr; struct TCP_Server_Info *server = io_parms->server; rc = smb2_plain_req_init(SMB2_READ, io_parms->tcon, server, @@ -3902,11 +3903,11 @@ smb2_new_read_req(void **buf, unsigned int *total_len, if (server == NULL) return -ECONNABORTED; - shdr = &req->sync_hdr; - shdr->ProcessId = cpu_to_le32(io_parms->pid); + shdr = &req->hdr; + shdr->Id.SyncId.ProcessId = cpu_to_le32(io_parms->pid); - req->PersistentFileId = io_parms->persistent_fid; - req->VolatileFileId = io_parms->volatile_fid; + req->PersistentFileId = cpu_to_le64(io_parms->persistent_fid); + req->VolatileFileId = cpu_to_le64(io_parms->volatile_fid); req->ReadChannelInfoOffset = 0; /* reserved */ req->ReadChannelInfoLength = 0; /* reserved */ req->Channel = 0; /* reserved */ @@ -3940,7 +3941,7 @@ smb2_new_read_req(void **buf, unsigned int *total_len, if (need_invalidate) req->Channel = SMB2_CHANNEL_RDMA_V1; req->ReadChannelInfoOffset = - cpu_to_le16(offsetof(struct smb2_read_plain_req, Buffer)); + cpu_to_le16(offsetof(struct smb2_read_req, Buffer)); req->ReadChannelInfoLength = cpu_to_le16(sizeof(struct smbd_buffer_descriptor_v1)); v1 = (struct smbd_buffer_descriptor_v1 *) &req->Buffer[0]; @@ -3964,10 +3965,10 @@ smb2_new_read_req(void **buf, unsigned int *total_len, * Related requests use info from previous read request * in chain. */ - shdr->SessionId = 0xFFFFFFFFFFFFFFFF; - shdr->TreeId = 0xFFFFFFFF; - req->PersistentFileId = 0xFFFFFFFFFFFFFFFF; - req->VolatileFileId = 0xFFFFFFFFFFFFFFFF; + shdr->SessionId = cpu_to_le64(0xFFFFFFFFFFFFFFFF); + shdr->Id.SyncId.TreeId = cpu_to_le32(0xFFFFFFFF); + req->PersistentFileId = cpu_to_le64(0xFFFFFFFFFFFFFFFF); + req->VolatileFileId = cpu_to_le64(0xFFFFFFFFFFFFFFFF); } } if (remaining_bytes > io_parms->length) @@ -3985,8 +3986,8 @@ smb2_readv_callback(struct mid_q_entry *mid) struct cifs_readdata *rdata = mid->callback_data; struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink); struct TCP_Server_Info *server = rdata->server; - struct smb2_sync_hdr *shdr = - (struct smb2_sync_hdr *)rdata->iov[0].iov_base; + struct smb2_hdr *shdr = + (struct smb2_hdr *)rdata->iov[0].iov_base; struct cifs_credits credits = { .value = 0, .instance = 0 }; struct smb_rqst rqst = { .rq_iov = &rdata->iov[1], .rq_nvec = 1, @@ -4072,7 +4073,7 @@ smb2_async_readv(struct cifs_readdata *rdata) { int rc, flags = 0; char *buf; - struct smb2_sync_hdr *shdr; + struct smb2_hdr *shdr; struct cifs_io_parms io_parms; struct smb_rqst rqst = { .rq_iov = rdata->iov, .rq_nvec = 1 }; @@ -4105,7 +4106,7 @@ smb2_async_readv(struct cifs_readdata *rdata) rdata->iov[0].iov_base = buf; rdata->iov[0].iov_len = total_len; - shdr = (struct smb2_sync_hdr *)buf; + shdr = (struct smb2_hdr *)buf; if (rdata->credits.value > 0) { shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(rdata->bytes, @@ -4144,7 +4145,7 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms, { struct smb_rqst rqst; int resp_buftype, rc; - struct smb2_read_plain_req *req = NULL; + struct smb2_read_req *req = NULL; struct smb2_read_rsp *rsp = NULL; struct kvec iov[1]; struct kvec rsp_iov; @@ -4178,19 +4179,22 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms, if (rc != -ENODATA) { cifs_stats_fail_inc(io_parms->tcon, SMB2_READ_HE); cifs_dbg(VFS, "Send error in read = %d\n", rc); - trace_smb3_read_err(xid, req->PersistentFileId, + trace_smb3_read_err(xid, + le64_to_cpu(req->PersistentFileId), io_parms->tcon->tid, ses->Suid, io_parms->offset, io_parms->length, rc); } else - trace_smb3_read_done(xid, req->PersistentFileId, - io_parms->tcon->tid, ses->Suid, - io_parms->offset, 0); + trace_smb3_read_done(xid, + le64_to_cpu(req->PersistentFileId), + io_parms->tcon->tid, ses->Suid, + io_parms->offset, 0); free_rsp_buf(resp_buftype, rsp_iov.iov_base); cifs_small_buf_release(req); return rc == -ENODATA ? 0 : rc; } else - trace_smb3_read_done(xid, req->PersistentFileId, + trace_smb3_read_done(xid, + le64_to_cpu(req->PersistentFileId), io_parms->tcon->tid, ses->Suid, io_parms->offset, io_parms->length); @@ -4238,7 +4242,7 @@ smb2_writev_callback(struct mid_q_entry *mid) switch (mid->mid_state) { case MID_RESPONSE_RECEIVED: - credits.value = le16_to_cpu(rsp->sync_hdr.CreditRequest); + credits.value = le16_to_cpu(rsp->hdr.CreditRequest); credits.instance = server->reconnect_instance; wdata->result = smb2_check_receive(mid, server, 0); if (wdata->result != 0) @@ -4264,7 +4268,7 @@ smb2_writev_callback(struct mid_q_entry *mid) wdata->result = -EAGAIN; break; case MID_RESPONSE_MALFORMED: - credits.value = le16_to_cpu(rsp->sync_hdr.CreditRequest); + credits.value = le16_to_cpu(rsp->hdr.CreditRequest); credits.instance = server->reconnect_instance; fallthrough; default: @@ -4311,7 +4315,7 @@ smb2_async_writev(struct cifs_writedata *wdata, { int rc = -EACCES, flags = 0; struct smb2_write_req *req = NULL; - struct smb2_sync_hdr *shdr; + struct smb2_hdr *shdr; struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink); struct TCP_Server_Info *server = wdata->server; struct kvec iov[1]; @@ -4329,11 +4333,11 @@ smb2_async_writev(struct cifs_writedata *wdata, if (smb3_encryption_required(tcon)) flags |= CIFS_TRANSFORM_REQ; - shdr = (struct smb2_sync_hdr *)req; - shdr->ProcessId = cpu_to_le32(wdata->cfile->pid); + shdr = (struct smb2_hdr *)req; + shdr->Id.SyncId.ProcessId = cpu_to_le32(wdata->cfile->pid); - req->PersistentFileId = wdata->cfile->fid.persistent_fid; - req->VolatileFileId = wdata->cfile->fid.volatile_fid; + req->PersistentFileId = cpu_to_le64(wdata->cfile->fid.persistent_fid); + req->VolatileFileId = cpu_to_le64(wdata->cfile->fid.volatile_fid); req->WriteChannelInfoOffset = 0; req->WriteChannelInfoLength = 0; req->Channel = 0; @@ -4430,7 +4434,8 @@ smb2_async_writev(struct cifs_writedata *wdata, wdata, flags, &wdata->credits); if (rc) { - trace_smb3_write_err(0 /* no xid */, req->PersistentFileId, + trace_smb3_write_err(0 /* no xid */, + le64_to_cpu(req->PersistentFileId), tcon->tid, tcon->ses->Suid, wdata->offset, wdata->bytes, rc); kref_put(&wdata->refcount, release); @@ -4481,10 +4486,10 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms, if (smb3_encryption_required(io_parms->tcon)) flags |= CIFS_TRANSFORM_REQ; - req->sync_hdr.ProcessId = cpu_to_le32(io_parms->pid); + req->hdr.Id.SyncId.ProcessId = cpu_to_le32(io_parms->pid); - req->PersistentFileId = io_parms->persistent_fid; - req->VolatileFileId = io_parms->volatile_fid; + req->PersistentFileId = cpu_to_le64(io_parms->persistent_fid); + req->VolatileFileId = cpu_to_le64(io_parms->volatile_fid); req->WriteChannelInfoOffset = 0; req->WriteChannelInfoLength = 0; req->Channel = 0; @@ -4512,7 +4517,8 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms, rsp = (struct smb2_write_rsp *)rsp_iov.iov_base; if (rc) { - trace_smb3_write_err(xid, req->PersistentFileId, + trace_smb3_write_err(xid, + le64_to_cpu(req->PersistentFileId), io_parms->tcon->tid, io_parms->tcon->ses->Suid, io_parms->offset, io_parms->length, rc); @@ -4520,10 +4526,11 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms, cifs_dbg(VFS, "Send error in write = %d\n", rc); } else { *nbytes = le32_to_cpu(rsp->DataLength); - trace_smb3_write_done(xid, req->PersistentFileId, - io_parms->tcon->tid, - io_parms->tcon->ses->Suid, - io_parms->offset, *nbytes); + trace_smb3_write_done(xid, + le64_to_cpu(req->PersistentFileId), + io_parms->tcon->tid, + io_parms->tcon->ses->Suid, + io_parms->offset, *nbytes); } cifs_small_buf_release(req); @@ -4866,7 +4873,7 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon, if (rc) { if (rc == -ENODATA && - rsp->sync_hdr.Status == STATUS_NO_MORE_FILES) { + rsp->hdr.Status == STATUS_NO_MORE_FILES) { trace_smb3_query_dir_done(xid, persistent_fid, tcon->tid, tcon->ses->Suid, index, 0); srch_inf->endOfSearch = true; @@ -4914,7 +4921,7 @@ SMB2_set_info_init(struct cifs_tcon *tcon, struct TCP_Server_Info *server, if (rc) return rc; - req->sync_hdr.ProcessId = cpu_to_le32(pid); + req->hdr.Id.SyncId.ProcessId = cpu_to_le32(pid); req->InfoType = info_type; req->FileInfoClass = info_class; req->PersistentFileId = persistent_fid; @@ -5074,7 +5081,7 @@ SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon, req->VolatileFid = volatile_fid; req->PersistentFid = persistent_fid; req->OplockLevel = oplock_level; - req->sync_hdr.CreditRequest = cpu_to_le16(1); + req->hdr.CreditRequest = cpu_to_le16(1); flags |= CIFS_NO_RSP_BUF; @@ -5376,7 +5383,7 @@ smb2_lockv(const unsigned int xid, struct cifs_tcon *tcon, if (smb3_encryption_required(tcon)) flags |= CIFS_TRANSFORM_REQ; - req->sync_hdr.ProcessId = cpu_to_le32(pid); + req->hdr.Id.SyncId.ProcessId = cpu_to_le32(pid); req->LockCount = cpu_to_le16(num_lock); req->PersistentFileId = persist_fid; @@ -5452,7 +5459,7 @@ SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon, if (smb3_encryption_required(tcon)) flags |= CIFS_TRANSFORM_REQ; - req->sync_hdr.CreditRequest = cpu_to_le16(1); + req->hdr.CreditRequest = cpu_to_le16(1); req->StructureSize = cpu_to_le16(36); total_len += 12; diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index f32c99c9ba13..33cfd0a1adf1 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -14,156 +14,12 @@ #include <net/sock.h> #include "cifsacl.h" -/* - * Note that, due to trying to use names similar to the protocol specifications, - * there are many mixed case field names in the structures below. Although - * this does not match typical Linux kernel style, it is necessary to be - * able to match against the protocol specfication. - * - * SMB2 commands - * Some commands have minimal (wct=0,bcc=0), or uninteresting, responses - * (ie no useful data other than the SMB error code itself) and are marked such. - * Knowing this helps avoid response buffer allocations and copy in some cases. - */ - -/* List of commands in host endian */ -#define SMB2_NEGOTIATE_HE 0x0000 -#define SMB2_SESSION_SETUP_HE 0x0001 -#define SMB2_LOGOFF_HE 0x0002 /* trivial request/resp */ -#define SMB2_TREE_CONNECT_HE 0x0003 -#define SMB2_TREE_DISCONNECT_HE 0x0004 /* trivial req/resp */ -#define SMB2_CREATE_HE 0x0005 -#define SMB2_CLOSE_HE 0x0006 -#define SMB2_FLUSH_HE 0x0007 /* trivial resp */ -#define SMB2_READ_HE 0x0008 -#define SMB2_WRITE_HE 0x0009 -#define SMB2_LOCK_HE 0x000A -#define SMB2_IOCTL_HE 0x000B -#define SMB2_CANCEL_HE 0x000C -#define SMB2_ECHO_HE 0x000D -#define SMB2_QUERY_DIRECTORY_HE 0x000E -#define SMB2_CHANGE_NOTIFY_HE 0x000F -#define SMB2_QUERY_INFO_HE 0x0010 -#define SMB2_SET_INFO_HE 0x0011 -#define SMB2_OPLOCK_BREAK_HE 0x0012 - -/* The same list in little endian */ -#define SMB2_NEGOTIATE cpu_to_le16(SMB2_NEGOTIATE_HE) -#define SMB2_SESSION_SETUP cpu_to_le16(SMB2_SESSION_SETUP_HE) -#define SMB2_LOGOFF cpu_to_le16(SMB2_LOGOFF_HE) -#define SMB2_TREE_CONNECT cpu_to_le16(SMB2_TREE_CONNECT_HE) -#define SMB2_TREE_DISCONNECT cpu_to_le16(SMB2_TREE_DISCONNECT_HE) -#define SMB2_CREATE cpu_to_le16(SMB2_CREATE_HE) -#define SMB2_CLOSE cpu_to_le16(SMB2_CLOSE_HE) -#define SMB2_FLUSH cpu_to_le16(SMB2_FLUSH_HE) -#define SMB2_READ cpu_to_le16(SMB2_READ_HE) -#define SMB2_WRITE cpu_to_le16(SMB2_WRITE_HE) -#define SMB2_LOCK cpu_to_le16(SMB2_LOCK_HE) -#define SMB2_IOCTL cpu_to_le16(SMB2_IOCTL_HE) -#define SMB2_CANCEL cpu_to_le16(SMB2_CANCEL_HE) -#define SMB2_ECHO cpu_to_le16(SMB2_ECHO_HE) -#define SMB2_QUERY_DIRECTORY cpu_to_le16(SMB2_QUERY_DIRECTORY_HE) -#define SMB2_CHANGE_NOTIFY cpu_to_le16(SMB2_CHANGE_NOTIFY_HE) -#define SMB2_QUERY_INFO cpu_to_le16(SMB2_QUERY_INFO_HE) -#define SMB2_SET_INFO cpu_to_le16(SMB2_SET_INFO_HE) -#define SMB2_OPLOCK_BREAK cpu_to_le16(SMB2_OPLOCK_BREAK_HE) - -#define SMB2_INTERNAL_CMD cpu_to_le16(0xFFFF) - -#define NUMBER_OF_SMB2_COMMANDS 0x0013 - /* 52 transform hdr + 64 hdr + 88 create rsp */ #define SMB2_TRANSFORM_HEADER_SIZE 52 #define MAX_SMB2_HDR_SIZE 204 -#define SMB2_PROTO_NUMBER cpu_to_le32(0x424d53fe) -#define SMB2_TRANSFORM_PROTO_NUM cpu_to_le32(0x424d53fd) -#define SMB2_COMPRESSION_TRANSFORM_ID cpu_to_le32(0x424d53fc) - -/* - * SMB2 Header Definition - * - * "MBZ" : Must be Zero - * "BB" : BugBug, Something to check/review/analyze later - * "PDU" : "Protocol Data Unit" (ie a network "frame") - * - */ - -#define SMB2_HEADER_STRUCTURE_SIZE cpu_to_le16(64) - -struct smb2_sync_hdr { - __le32 ProtocolId; /* 0xFE 'S' 'M' 'B' */ - __le16 StructureSize; /* 64 */ - __le16 CreditCharge; /* MBZ */ - __le32 Status; /* Error from server */ - __le16 Command; - __le16 CreditRequest; /* CreditResponse */ - __le32 Flags; - __le32 NextCommand; - __le64 MessageId; - __le32 ProcessId; - __u32 TreeId; /* opaque - so do not make little endian */ - __u64 SessionId; /* opaque - so do not make little endian */ - __u8 Signature[16]; -} __packed; - /* The total header size for SMB2 read and write */ -#define SMB2_READWRITE_PDU_HEADER_SIZE (48 + sizeof(struct smb2_sync_hdr)) - -struct smb2_sync_pdu { - struct smb2_sync_hdr sync_hdr; - __le16 StructureSize2; /* size of wct area (varies, request specific) */ -} __packed; - -#define SMB3_AES_CCM_NONCE 11 -#define SMB3_AES_GCM_NONCE 12 - -/* Transform flags (for 3.0 dialect this flag indicates CCM */ -#define TRANSFORM_FLAG_ENCRYPTED 0x0001 -struct smb2_transform_hdr { - __le32 ProtocolId; /* 0xFD 'S' 'M' 'B' */ - __u8 Signature[16]; - __u8 Nonce[16]; - __le32 OriginalMessageSize; - __u16 Reserved1; - __le16 Flags; /* EncryptionAlgorithm for 3.0, enc enabled for 3.1.1 */ - __u64 SessionId; -} __packed; - -/* See MS-SMB2 2.2.42 */ -struct smb2_compression_transform_hdr_unchained { - __le32 ProtocolId; /* 0xFC 'S' 'M' 'B' */ - __le32 OriginalCompressedSegmentSize; - __le16 CompressionAlgorithm; - __le16 Flags; - __le16 Length; /* if chained it is length, else offset */ -} __packed; - -/* See MS-SMB2 2.2.42.1 */ -#define SMB2_COMPRESSION_FLAG_NONE 0x0000 -#define SMB2_COMPRESSION_FLAG_CHAINED 0x0001 - -struct compression_payload_header { - __le16 CompressionAlgorithm; - __le16 Flags; - __le32 Length; /* length of compressed playload including field below if present */ - /* __le32 OriginalPayloadSize; */ /* optional, present when LZNT1, LZ77, LZ77+Huffman */ -} __packed; - -/* See MS-SMB2 2.2.42.2 */ -struct smb2_compression_transform_hdr_chained { - __le32 ProtocolId; /* 0xFC 'S' 'M' 'B' */ - __le32 OriginalCompressedSegmentSize; - /* struct compression_payload_header[] */ -} __packed; - -/* See MS-SMB2 2.2.42.2.2 */ -struct compression_pattern_payload_v1 { - __le16 Pattern; - __le16 Reserved1; - __le16 Reserved2; - __le32 Repetitions; -} __packed; +#define SMB2_READWRITE_PDU_HEADER_SIZE (48 + sizeof(struct smb2_hdr)) /* See MS-SMB2 2.2.43 */ struct smb2_rdma_transform { @@ -190,17 +46,6 @@ struct smb2_rdma_crypto_transform { } __packed; /* - * SMB2 flag definitions - */ -#define SMB2_FLAGS_SERVER_TO_REDIR cpu_to_le32(0x00000001) -#define SMB2_FLAGS_ASYNC_COMMAND cpu_to_le32(0x00000002) -#define SMB2_FLAGS_RELATED_OPERATIONS cpu_to_le32(0x00000004) -#define SMB2_FLAGS_SIGNED cpu_to_le32(0x00000008) -#define SMB2_FLAGS_PRIORITY_MASK cpu_to_le32(0x00000070) /* SMB3.1.1 */ -#define SMB2_FLAGS_DFS_OPERATIONS cpu_to_le32(0x10000000) -#define SMB2_FLAGS_REPLAY_OPERATION cpu_to_le32(0x20000000) /* SMB3 & up */ - -/* * Definitions for SMB2 Protocol Data Units (network frames) * * See MS-SMB2.PDF specification for protocol details. @@ -214,7 +59,7 @@ struct smb2_rdma_crypto_transform { #define SMB2_ERROR_STRUCTURE_SIZE2 cpu_to_le16(9) struct smb2_err_rsp { - struct smb2_sync_hdr sync_hdr; + struct smb2_hdr hdr; __le16 StructureSize; __le16 Reserved; /* MBZ */ __le32 ByteCount; /* even if zero, at least one byte follows */ @@ -270,530 +115,6 @@ struct share_redirect_error_context_rsp { /* __u8 ResourceName[] */ /* Name of share as counted Unicode string */ } __packed; -#define SMB2_CLIENT_GUID_SIZE 16 - -struct smb2_negotiate_req { - struct smb2_sync_hdr sync_hdr; - __le16 StructureSize; /* Must be 36 */ - __le16 DialectCount; - __le16 SecurityMode; - __le16 Reserved; /* MBZ */ - __le32 Capabilities; - __u8 ClientGUID[SMB2_CLIENT_GUID_SIZE]; - /* In SMB3.02 and earlier next three were MBZ le64 ClientStartTime */ - __le32 NegotiateContextOffset; /* SMB3.1.1 only. MBZ earlier */ - __le16 NegotiateContextCount; /* SMB3.1.1 only. MBZ earlier */ - __le16 Reserved2; - __le16 Dialects[4]; /* BB expand this if autonegotiate > 4 dialects */ -} __packed; - -/* Dialects */ -#define SMB10_PROT_ID 0x0000 /* local only, not sent on wire w/CIFS negprot */ -#define SMB20_PROT_ID 0x0202 -#define SMB21_PROT_ID 0x0210 -#define SMB30_PROT_ID 0x0300 -#define SMB302_PROT_ID 0x0302 -#define SMB311_PROT_ID 0x0311 -#define BAD_PROT_ID 0xFFFF - -/* SecurityMode flags */ -#define SMB2_NEGOTIATE_SIGNING_ENABLED 0x0001 -#define SMB2_NEGOTIATE_SIGNING_REQUIRED 0x0002 -#define SMB2_SEC_MODE_FLAGS_ALL 0x0003 - -/* Capabilities flags */ -#define SMB2_GLOBAL_CAP_DFS 0x00000001 -#define SMB2_GLOBAL_CAP_LEASING 0x00000002 /* Resp only New to SMB2.1 */ -#define SMB2_GLOBAL_CAP_LARGE_MTU 0X00000004 /* Resp only New to SMB2.1 */ -#define SMB2_GLOBAL_CAP_MULTI_CHANNEL 0x00000008 /* New to SMB3 */ -#define SMB2_GLOBAL_CAP_PERSISTENT_HANDLES 0x00000010 /* New to SMB3 */ -#define SMB2_GLOBAL_CAP_DIRECTORY_LEASING 0x00000020 /* New to SMB3 */ -#define SMB2_GLOBAL_CAP_ENCRYPTION 0x00000040 /* New to SMB3 */ -/* Internal types */ -#define SMB2_NT_FIND 0x00100000 -#define SMB2_LARGE_FILES 0x00200000 - - -/* Negotiate Contexts - ContextTypes. See MS-SMB2 section 2.2.3.1 for details */ -#define SMB2_PREAUTH_INTEGRITY_CAPABILITIES cpu_to_le16(1) -#define SMB2_ENCRYPTION_CAPABILITIES cpu_to_le16(2) -#define SMB2_COMPRESSION_CAPABILITIES cpu_to_le16(3) -#define SMB2_NETNAME_NEGOTIATE_CONTEXT_ID cpu_to_le16(5) -#define SMB2_TRANSPORT_CAPABILITIES cpu_to_le16(6) -#define SMB2_RDMA_TRANSFORM_CAPABILITIES cpu_to_le16(7) -#define SMB2_SIGNING_CAPABILITIES cpu_to_le16(8) -#define SMB2_POSIX_EXTENSIONS_AVAILABLE cpu_to_le16(0x100) - -struct smb2_neg_context { - __le16 ContextType; - __le16 DataLength; - __le32 Reserved; - /* Followed by array of data. NOTE: some servers require padding to 8 byte boundary */ -} __packed; - -#define SMB311_LINUX_CLIENT_SALT_SIZE 32 -/* Hash Algorithm Types */ -#define SMB2_PREAUTH_INTEGRITY_SHA512 cpu_to_le16(0x0001) -#define SMB2_PREAUTH_HASH_SIZE 64 - -/* - * SaltLength that the server send can be zero, so the only three required - * fields (all __le16) end up six bytes total, so the minimum context data len - * in the response is six bytes which accounts for - * - * HashAlgorithmCount, SaltLength, and 1 HashAlgorithm. - */ -#define MIN_PREAUTH_CTXT_DATA_LEN 6 - -struct smb2_preauth_neg_context { - __le16 ContextType; /* 1 */ - __le16 DataLength; - __le32 Reserved; - __le16 HashAlgorithmCount; /* 1 */ - __le16 SaltLength; - __le16 HashAlgorithms; /* HashAlgorithms[0] since only one defined */ - __u8 Salt[SMB311_LINUX_CLIENT_SALT_SIZE]; -} __packed; - -/* Encryption Algorithms Ciphers */ -#define SMB2_ENCRYPTION_AES128_CCM cpu_to_le16(0x0001) -#define SMB2_ENCRYPTION_AES128_GCM cpu_to_le16(0x0002) -/* we currently do not request AES256_CCM since presumably GCM faster */ -#define SMB2_ENCRYPTION_AES256_CCM cpu_to_le16(0x0003) -#define SMB2_ENCRYPTION_AES256_GCM cpu_to_le16(0x0004) - -/* Min encrypt context data is one cipher so 2 bytes + 2 byte count field */ -#define MIN_ENCRYPT_CTXT_DATA_LEN 4 -struct smb2_encryption_neg_context { - __le16 ContextType; /* 2 */ - __le16 DataLength; - __le32 Reserved; - /* CipherCount usally 2, but can be 3 when AES256-GCM enabled */ - __le16 CipherCount; /* AES128-GCM and AES128-CCM by default */ - __le16 Ciphers[3]; -} __packed; - -/* See MS-SMB2 2.2.3.1.3 */ -#define SMB3_COMPRESS_NONE cpu_to_le16(0x0000) -#define SMB3_COMPRESS_LZNT1 cpu_to_le16(0x0001) -#define SMB3_COMPRESS_LZ77 cpu_to_le16(0x0002) -#define SMB3_COMPRESS_LZ77_HUFF cpu_to_le16(0x0003) -/* Pattern scanning algorithm See MS-SMB2 3.1.4.4.1 */ -#define SMB3_COMPRESS_PATTERN cpu_to_le16(0x0004) /* Pattern_V1 */ - -/* Compression Flags */ -#define SMB2_COMPRESSION_CAPABILITIES_FLAG_NONE cpu_to_le32(0x00000000) -#define SMB2_COMPRESSION_CAPABILITIES_FLAG_CHAINED cpu_to_le32(0x00000001) - -struct smb2_compression_capabilities_context { - __le16 ContextType; /* 3 */ - __le16 DataLength; - __u32 Reserved; - __le16 CompressionAlgorithmCount; - __u16 Padding; - __u32 Flags; - __le16 CompressionAlgorithms[3]; - __u16 Pad; /* Some servers require pad to DataLen multiple of 8 */ - /* Check if pad needed */ -} __packed; - -/* - * For smb2_netname_negotiate_context_id See MS-SMB2 2.2.3.1.4. - * Its struct simply contains NetName, an array of Unicode characters - */ -struct smb2_netname_neg_context { - __le16 ContextType; /* 5 */ - __le16 DataLength; - __le32 Reserved; - __le16 NetName[]; /* hostname of target converted to UCS-2 */ -} __packed; - -/* - * For smb2_transport_capabilities context see MS-SMB2 2.2.3.1.5 - * and 2.2.4.1.5 - */ - -/* Flags */ -#define SMB2_ACCEPT_TRANSFORM_LEVEL_SECURITY 0x00000001 - -struct smb2_transport_capabilities_context { - __le16 ContextType; /* 6 */ - __le16 DataLength; - __u32 Reserved; - __le32 Flags; - __u32 Pad; -} __packed; - -/* - * For rdma transform capabilities context see MS-SMB2 2.2.3.1.6 - * and 2.2.4.1.6 - */ - -/* RDMA Transform IDs */ -#define SMB2_RDMA_TRANSFORM_NONE 0x0000 -#define SMB2_RDMA_TRANSFORM_ENCRYPTION 0x0001 -#define SMB2_RDMA_TRANSFORM_SIGNING 0x0002 - -struct smb2_rdma_transform_capabilities_context { - __le16 ContextType; /* 7 */ - __le16 DataLength; - __u32 Reserved; - __le16 TransformCount; - __u16 Reserved1; - __u32 Reserved2; - __le16 RDMATransformIds[]; -} __packed; - -/* - * For signing capabilities context see MS-SMB2 2.2.3.1.7 - * and 2.2.4.1.7 - */ - -/* Signing algorithms */ -#define SIGNING_ALG_HMAC_SHA256 0 -#define SIGNING_ALG_AES_CMAC 1 -#define SIGNING_ALG_AES_GMAC 2 - -struct smb2_signing_capabilities { - __le16 ContextType; /* 8 */ - __le16 DataLength; - __u32 Reserved; - __le16 SigningAlgorithmCount; - __le16 SigningAlgorithms[]; - /* Followed by padding to 8 byte boundary (required by some servers) */ -} __packed; - -#define POSIX_CTXT_DATA_LEN 16 -struct smb2_posix_neg_context { - __le16 ContextType; /* 0x100 */ - __le16 DataLength; - __le32 Reserved; - __u8 Name[16]; /* POSIX ctxt GUID 93AD25509CB411E7B42383DE968BCD7C */ -} __packed; - -struct smb2_negotiate_rsp { - struct smb2_sync_hdr sync_hdr; - __le16 StructureSize; /* Must be 65 */ - __le16 SecurityMode; - __le16 DialectRevision; - __le16 NegotiateContextCount; /* Prior to SMB3.1.1 was Reserved & MBZ */ - __u8 ServerGUID[16]; - __le32 Capabilities; - __le32 MaxTransactSize; - __le32 MaxReadSize; - __le32 MaxWriteSize; - __le64 SystemTime; /* MBZ */ - __le64 ServerStartTime; - __le16 SecurityBufferOffset; - __le16 SecurityBufferLength; - __le32 NegotiateContextOffset; /* Pre:SMB3.1.1 was reserved/ignored */ - __u8 Buffer[1]; /* variable length GSS security buffer */ -} __packed; - -/* Flags */ -#define SMB2_SESSION_REQ_FLAG_BINDING 0x01 -#define SMB2_SESSION_REQ_FLAG_ENCRYPT_DATA 0x04 - -struct smb2_sess_setup_req { - struct smb2_sync_hdr sync_hdr; - __le16 StructureSize; /* Must be 25 */ - __u8 Flags; - __u8 SecurityMode; - __le32 Capabilities; - __le32 Channel; - __le16 SecurityBufferOffset; - __le16 SecurityBufferLength; - __u64 PreviousSessionId; - __u8 Buffer[1]; /* variable length GSS security buffer */ -} __packed; - -/* Currently defined SessionFlags */ -#define SMB2_SESSION_FLAG_IS_GUEST 0x0001 -#define SMB2_SESSION_FLAG_IS_NULL 0x0002 -#define SMB2_SESSION_FLAG_ENCRYPT_DATA 0x0004 -struct smb2_sess_setup_rsp { - struct smb2_sync_hdr sync_hdr; - __le16 StructureSize; /* Must be 9 */ - __le16 SessionFlags; - __le16 SecurityBufferOffset; - __le16 SecurityBufferLength; - __u8 Buffer[1]; /* variable length GSS security buffer */ -} __packed; - -struct smb2_logoff_req { - struct smb2_sync_hdr sync_hdr; - __le16 StructureSize; /* Must be 4 */ - __le16 Reserved; -} __packed; - -struct smb2_logoff_rsp { - struct smb2_sync_hdr sync_hdr; - __le16 StructureSize; /* Must be 4 */ - __le16 Reserved; -} __packed; - -/* Flags/Reserved for SMB3.1.1 */ -#define SMB2_TREE_CONNECT_FLAG_CLUSTER_RECONNECT cpu_to_le16(0x0001) -#define SMB2_TREE_CONNECT_FLAG_REDIRECT_TO_OWNER cpu_to_le16(0x0002) -#define SMB2_TREE_CONNECT_FLAG_EXTENSION_PRESENT cpu_to_le16(0x0004) - -struct smb2_tree_connect_req { - struct smb2_sync_hdr sync_hdr; - __le16 StructureSize; /* Must be 9 */ - __le16 Flags; /* Reserved MBZ for dialects prior to SMB3.1.1 */ - __le16 PathOffset; - __le16 PathLength; - __u8 Buffer[1]; /* variable length */ -} __packed; - -/* See MS-SMB2 section 2.2.9.2 */ -/* Context Types */ -#define SMB2_RESERVED_TREE_CONNECT_CONTEXT_ID 0x0000 -#define SMB2_REMOTED_IDENTITY_TREE_CONNECT_CONTEXT_ID cpu_to_le16(0x0001) - -struct tree_connect_contexts { - __le16 ContextType; - __le16 DataLength; - __le32 Reserved; - __u8 Data[]; -} __packed; - -/* Remoted identity tree connect context structures - see MS-SMB2 2.2.9.2.1 */ -struct smb3_blob_data { - __le16 BlobSize; - __u8 BlobData[]; -} __packed; - -/* Valid values for Attr */ -#define SE_GROUP_MANDATORY 0x00000001 -#define SE_GROUP_ENABLED_BY_DEFAULT 0x00000002 -#define SE_GROUP_ENABLED 0x00000004 -#define SE_GROUP_OWNER 0x00000008 -#define SE_GROUP_USE_FOR_DENY_ONLY 0x00000010 -#define SE_GROUP_INTEGRITY 0x00000020 -#define SE_GROUP_INTEGRITY_ENABLED 0x00000040 -#define SE_GROUP_RESOURCE 0x20000000 -#define SE_GROUP_LOGON_ID 0xC0000000 - -/* struct sid_attr_data is SidData array in BlobData format then le32 Attr */ - -struct sid_array_data { - __le16 SidAttrCount; - /* SidAttrList - array of sid_attr_data structs */ -} __packed; - -struct luid_attr_data { - -} __packed; - -/* - * struct privilege_data is the same as BLOB_DATA - see MS-SMB2 2.2.9.2.1.5 - * but with size of LUID_ATTR_DATA struct and BlobData set to LUID_ATTR DATA - */ - -struct privilege_array_data { - __le16 PrivilegeCount; - /* array of privilege_data structs */ -} __packed; - -struct remoted_identity_tcon_context { - __le16 TicketType; /* must be 0x0001 */ - __le16 TicketSize; /* total size of this struct */ - __le16 User; /* offset to SID_ATTR_DATA struct with user info */ - __le16 UserName; /* offset to null terminated Unicode username string */ - __le16 Domain; /* offset to null terminated Unicode domain name */ - __le16 Groups; /* offset to SID_ARRAY_DATA struct with group info */ - __le16 RestrictedGroups; /* similar to above */ - __le16 Privileges; /* offset to PRIVILEGE_ARRAY_DATA struct */ - __le16 PrimaryGroup; /* offset to SID_ARRAY_DATA struct */ - __le16 Owner; /* offset to BLOB_DATA struct */ - __le16 DefaultDacl; /* offset to BLOB_DATA struct */ - __le16 DeviceGroups; /* offset to SID_ARRAY_DATA struct */ - __le16 UserClaims; /* offset to BLOB_DATA struct */ - __le16 DeviceClaims; /* offset to BLOB_DATA struct */ - __u8 TicketInfo[]; /* variable length buf - remoted identity data */ -} __packed; - -struct smb2_tree_connect_req_extension { - __le32 TreeConnectContextOffset; - __le16 TreeConnectContextCount; - __u8 Reserved[10]; - __u8 PathName[]; /* variable sized array */ - /* followed by array of TreeConnectContexts */ -} __packed; - -struct smb2_tree_connect_rsp { - struct smb2_sync_hdr sync_hdr; - __le16 StructureSize; /* Must be 16 */ - __u8 ShareType; /* see below */ - __u8 Reserved; - __le32 ShareFlags; /* see below */ - __le32 Capabilities; /* see below */ - __le32 MaximalAccess; -} __packed; - -/* Possible ShareType values */ -#define SMB2_SHARE_TYPE_DISK 0x01 -#define SMB2_SHARE_TYPE_PIPE 0x02 -#define SMB2_SHARE_TYPE_PRINT 0x03 - -/* - * Possible ShareFlags - exactly one and only one of the first 4 caching flags - * must be set (any of the remaining, SHI1005, flags may be set individually - * or in combination. - */ -#define SMB2_SHAREFLAG_MANUAL_CACHING 0x00000000 -#define SMB2_SHAREFLAG_AUTO_CACHING 0x00000010 -#define SMB2_SHAREFLAG_VDO_CACHING 0x00000020 -#define SMB2_SHAREFLAG_NO_CACHING 0x00000030 -#define SHI1005_FLAGS_DFS 0x00000001 -#define SHI1005_FLAGS_DFS_ROOT 0x00000002 -#define SHI1005_FLAGS_RESTRICT_EXCLUSIVE_OPENS 0x00000100 -#define SHI1005_FLAGS_FORCE_SHARED_DELETE 0x00000200 -#define SHI1005_FLAGS_ALLOW_NAMESPACE_CACHING 0x00000400 -#define SHI1005_FLAGS_ACCESS_BASED_DIRECTORY_ENUM 0x00000800 -#define SHI1005_FLAGS_FORCE_LEVELII_OPLOCK 0x00001000 -#define SHI1005_FLAGS_ENABLE_HASH_V1 0x00002000 -#define SHI1005_FLAGS_ENABLE_HASH_V2 0x00004000 -#define SHI1005_FLAGS_ENCRYPT_DATA 0x00008000 -#define SMB2_SHAREFLAG_IDENTITY_REMOTING 0x00040000 /* 3.1.1 */ -#define SMB2_SHAREFLAG_COMPRESS_DATA 0x00100000 /* 3.1.1 */ -#define SHI1005_FLAGS_ALL 0x0014FF33 - -/* Possible share capabilities */ -#define SMB2_SHARE_CAP_DFS cpu_to_le32(0x00000008) /* all dialects */ -#define SMB2_SHARE_CAP_CONTINUOUS_AVAILABILITY cpu_to_le32(0x00000010) /* 3.0 */ -#define SMB2_SHARE_CAP_SCALEOUT cpu_to_le32(0x00000020) /* 3.0 */ -#define SMB2_SHARE_CAP_CLUSTER cpu_to_le32(0x00000040) /* 3.0 */ -#define SMB2_SHARE_CAP_ASYMMETRIC cpu_to_le32(0x00000080) /* 3.02 */ -#define SMB2_SHARE_CAP_REDIRECT_TO_OWNER cpu_to_le32(0x00000100) /* 3.1.1 */ - -struct smb2_tree_disconnect_req { - struct smb2_sync_hdr sync_hdr; - __le16 StructureSize; /* Must be 4 */ - __le16 Reserved; -} __packed; - -struct smb2_tree_disconnect_rsp { - struct smb2_sync_hdr sync_hdr; - __le16 StructureSize; /* Must be 4 */ - __le16 Reserved; -} __packed; - -/* File Attrubutes */ -#define FILE_ATTRIBUTE_READONLY 0x00000001 -#define FILE_ATTRIBUTE_HIDDEN 0x00000002 -#define FILE_ATTRIBUTE_SYSTEM 0x00000004 -#define FILE_ATTRIBUTE_DIRECTORY 0x00000010 -#define FILE_ATTRIBUTE_ARCHIVE 0x00000020 -#define FILE_ATTRIBUTE_NORMAL 0x00000080 -#define FILE_ATTRIBUTE_TEMPORARY 0x00000100 -#define FILE_ATTRIBUTE_SPARSE_FILE 0x00000200 -#define FILE_ATTRIBUTE_REPARSE_POINT 0x00000400 -#define FILE_ATTRIBUTE_COMPRESSED 0x00000800 -#define FILE_ATTRIBUTE_OFFLINE 0x00001000 -#define FILE_ATTRIBUTE_NOT_CONTENT_INDEXED 0x00002000 -#define FILE_ATTRIBUTE_ENCRYPTED 0x00004000 -#define FILE_ATTRIBUTE_INTEGRITY_STREAM 0x00008000 -#define FILE_ATTRIBUTE_NO_SCRUB_DATA 0x00020000 - -/* Oplock levels */ -#define SMB2_OPLOCK_LEVEL_NONE 0x00 -#define SMB2_OPLOCK_LEVEL_II 0x01 -#define SMB2_OPLOCK_LEVEL_EXCLUSIVE 0x08 -#define SMB2_OPLOCK_LEVEL_BATCH 0x09 -#define SMB2_OPLOCK_LEVEL_LEASE 0xFF -/* Non-spec internal type */ -#define SMB2_OPLOCK_LEVEL_NOCHANGE 0x99 - -/* Desired Access Flags */ -#define FILE_READ_DATA_LE cpu_to_le32(0x00000001) -#define FILE_WRITE_DATA_LE cpu_to_le32(0x00000002) -#define FILE_APPEND_DATA_LE cpu_to_le32(0x00000004) -#define FILE_READ_EA_LE cpu_to_le32(0x00000008) -#define FILE_WRITE_EA_LE cpu_to_le32(0x00000010) -#define FILE_EXECUTE_LE cpu_to_le32(0x00000020) -#define FILE_READ_ATTRIBUTES_LE cpu_to_le32(0x00000080) -#define FILE_WRITE_ATTRIBUTES_LE cpu_to_le32(0x00000100) -#define FILE_DELETE_LE cpu_to_le32(0x00010000) -#define FILE_READ_CONTROL_LE cpu_to_le32(0x00020000) -#define FILE_WRITE_DAC_LE cpu_to_le32(0x00040000) -#define FILE_WRITE_OWNER_LE cpu_to_le32(0x00080000) -#define FILE_SYNCHRONIZE_LE cpu_to_le32(0x00100000) -#define FILE_ACCESS_SYSTEM_SECURITY_LE cpu_to_le32(0x01000000) -#define FILE_MAXIMAL_ACCESS_LE cpu_to_le32(0x02000000) -#define FILE_GENERIC_ALL_LE cpu_to_le32(0x10000000) -#define FILE_GENERIC_EXECUTE_LE cpu_to_le32(0x20000000) -#define FILE_GENERIC_WRITE_LE cpu_to_le32(0x40000000) -#define FILE_GENERIC_READ_LE cpu_to_le32(0x80000000) - -/* ShareAccess Flags */ -#define FILE_SHARE_READ_LE cpu_to_le32(0x00000001) -#define FILE_SHARE_WRITE_LE cpu_to_le32(0x00000002) -#define FILE_SHARE_DELETE_LE cpu_to_le32(0x00000004) -#define FILE_SHARE_ALL_LE cpu_to_le32(0x00000007) - -/* CreateDisposition Flags */ -#define FILE_SUPERSEDE_LE cpu_to_le32(0x00000000) -#define FILE_OPEN_LE cpu_to_le32(0x00000001) -#define FILE_CREATE_LE cpu_to_le32(0x00000002) -#define FILE_OPEN_IF_LE cpu_to_le32(0x00000003) -#define FILE_OVERWRITE_LE cpu_to_le32(0x00000004) -#define FILE_OVERWRITE_IF_LE cpu_to_le32(0x00000005) - -/* CreateOptions Flags */ -#define FILE_DIRECTORY_FILE_LE cpu_to_le32(0x00000001) -/* same as #define CREATE_NOT_FILE_LE cpu_to_le32(0x00000001) */ -#define FILE_WRITE_THROUGH_LE cpu_to_le32(0x00000002) -#define FILE_SEQUENTIAL_ONLY_LE cpu_to_le32(0x00000004) -#define FILE_NO_INTERMEDIATE_BUFFERRING_LE cpu_to_le32(0x00000008) -#define FILE_SYNCHRONOUS_IO_ALERT_LE cpu_to_le32(0x00000010) -#define FILE_SYNCHRONOUS_IO_NON_ALERT_LE cpu_to_le32(0x00000020) -#define FILE_NON_DIRECTORY_FILE_LE cpu_to_le32(0x00000040) -#define FILE_COMPLETE_IF_OPLOCKED_LE cpu_to_le32(0x00000100) -#define FILE_NO_EA_KNOWLEDGE_LE cpu_to_le32(0x00000200) -#define FILE_RANDOM_ACCESS_LE cpu_to_le32(0x00000800) -#define FILE_DELETE_ON_CLOSE_LE cpu_to_le32(0x00001000) -#define FILE_OPEN_BY_FILE_ID_LE cpu_to_le32(0x00002000) -#define FILE_OPEN_FOR_BACKUP_INTENT_LE cpu_to_le32(0x00004000) -#define FILE_NO_COMPRESSION_LE cpu_to_le32(0x00008000) -#define FILE_RESERVE_OPFILTER_LE cpu_to_le32(0x00100000) -#define FILE_OPEN_REPARSE_POINT_LE cpu_to_le32(0x00200000) -#define FILE_OPEN_NO_RECALL_LE cpu_to_le32(0x00400000) -#define FILE_OPEN_FOR_FREE_SPACE_QUERY_LE cpu_to_le32(0x00800000) - -#define FILE_READ_RIGHTS_LE (FILE_READ_DATA_LE | FILE_READ_EA_LE \ - | FILE_READ_ATTRIBUTES_LE) -#define FILE_WRITE_RIGHTS_LE (FILE_WRITE_DATA_LE | FILE_APPEND_DATA_LE \ - | FILE_WRITE_EA_LE | FILE_WRITE_ATTRIBUTES_LE) -#define FILE_EXEC_RIGHTS_LE (FILE_EXECUTE_LE) - -/* Impersonation Levels. See MS-WPO section 9.7 and MSDN-IMPERS */ -#define IL_ANONYMOUS cpu_to_le32(0x00000000) -#define IL_IDENTIFICATION cpu_to_le32(0x00000001) -#define IL_IMPERSONATION cpu_to_le32(0x00000002) -#define IL_DELEGATE cpu_to_le32(0x00000003) - -/* Create Context Values */ -#define SMB2_CREATE_EA_BUFFER "ExtA" /* extended attributes */ -#define SMB2_CREATE_SD_BUFFER "SecD" /* security descriptor */ -#define SMB2_CREATE_DURABLE_HANDLE_REQUEST "DHnQ" -#define SMB2_CREATE_DURABLE_HANDLE_RECONNECT "DHnC" -#define SMB2_CREATE_ALLOCATION_SIZE "AISi" -#define SMB2_CREATE_QUERY_MAXIMAL_ACCESS_REQUEST "MxAc" -#define SMB2_CREATE_TIMEWARP_REQUEST "TWrp" -#define SMB2_CREATE_QUERY_ON_DISK_ID "QFid" -#define SMB2_CREATE_REQUEST_LEASE "RqLs" -#define SMB2_CREATE_DURABLE_HANDLE_REQUEST_V2 "DH2Q" -#define SMB2_CREATE_DURABLE_HANDLE_RECONNECT_V2 "DH2C" -#define SMB2_CREATE_APP_INSTANCE_ID 0x45BCA66AEFA7F74A9008FA462E144D74 -#define SMB2_CREATE_APP_INSTANCE_VERSION 0xB982D0B73B56074FA07B524A8116A010 -#define SVHDX_OPEN_DEVICE_CONTEX 0x9CCBCF9E04C1E643980E158DA1F6EC83 -#define SMB2_CREATE_TAG_POSIX 0x93AD25509CB411E7B42383DE968BCD7C - -/* Flag (SMB3 open response) values */ -#define SMB2_CREATE_FLAG_REPARSEPOINT 0x01 - /* * Maximum number of iovs we need for an open/create request. * [0] : struct smb2_create_req @@ -807,26 +128,6 @@ struct smb2_tree_disconnect_rsp { */ #define SMB2_CREATE_IOV_SIZE 8 -struct smb2_create_req { - struct smb2_sync_hdr sync_hdr; - __le16 StructureSize; /* Must be 57 */ - __u8 SecurityFlags; - __u8 RequestedOplockLevel; - __le32 ImpersonationLevel; - __le64 SmbCreateFlags; - __le64 Reserved; - __le32 DesiredAccess; - __le32 FileAttributes; - __le32 ShareAccess; - __le32 CreateDisposition; - __le32 CreateOptions; - __le16 NameOffset; - __le16 NameLength; - __le32 CreateContextsOffset; - __le32 CreateContextsLength; - __u8 Buffer[]; -} __packed; - /* * Maximum size of a SMB2_CREATE response is 64 (smb2 header) + * 88 (fixed part of create response) + 520 (path) + 208 (contexts) + @@ -834,37 +135,6 @@ struct smb2_create_req { */ #define MAX_SMB2_CREATE_RESPONSE_SIZE 880 -struct smb2_create_rsp { - struct smb2_sync_hdr sync_hdr; - __le16 StructureSize; /* Must be 89 */ - __u8 OplockLevel; - __u8 Flag; /* 0x01 if reparse point */ - __le32 CreateAction; - __le64 CreationTime; - __le64 LastAccessTime; - __le64 LastWriteTime; - __le64 ChangeTime; - __le64 AllocationSize; - __le64 EndofFile; - __le32 FileAttributes; - __le32 Reserved2; - __u64 PersistentFileId; /* opaque endianness */ - __u64 VolatileFileId; /* opaque endianness */ - __le32 CreateContextsOffset; - __le32 CreateContextsLength; - __u8 Buffer[1]; -} __packed; - -struct create_context { - __le32 Next; - __le16 NameOffset; - __le16 NameLength; - __le16 Reserved; - __le16 DataOffset; - __le32 DataLength; - __u8 Buffer[]; -} __packed; - #define SMB2_LEASE_READ_CACHING_HE 0x01 #define SMB2_LEASE_HANDLE_CACHING_HE 0x02 #define SMB2_LEASE_WRITE_CACHING_HE 0x04 @@ -1210,7 +480,7 @@ struct duplicate_extents_to_file { #define SMB2_IOCTL_IOV_SIZE 2 struct smb2_ioctl_req { - struct smb2_sync_hdr sync_hdr; + struct smb2_hdr hdr; __le16 StructureSize; /* Must be 57 */ __u16 Reserved; __le32 CtlCode; @@ -1228,7 +498,7 @@ struct smb2_ioctl_req { } __packed; struct smb2_ioctl_rsp { - struct smb2_sync_hdr sync_hdr; + struct smb2_hdr hdr; __le16 StructureSize; /* Must be 57 */ __u16 Reserved; __le32 CtlCode; @@ -1243,161 +513,6 @@ struct smb2_ioctl_rsp { /* char * buffer[] */ } __packed; -/* Currently defined values for close flags */ -#define SMB2_CLOSE_FLAG_POSTQUERY_ATTRIB cpu_to_le16(0x0001) -struct smb2_close_req { - struct smb2_sync_hdr sync_hdr; - __le16 StructureSize; /* Must be 24 */ - __le16 Flags; - __le32 Reserved; - __u64 PersistentFileId; /* opaque endianness */ - __u64 VolatileFileId; /* opaque endianness */ -} __packed; - -/* - * Maximum size of a SMB2_CLOSE response is 64 (smb2 header) + 60 (data) - */ -#define MAX_SMB2_CLOSE_RESPONSE_SIZE 124 - -struct smb2_close_rsp { - struct smb2_sync_hdr sync_hdr; - __le16 StructureSize; /* 60 */ - __le16 Flags; - __le32 Reserved; - __le64 CreationTime; - __le64 LastAccessTime; - __le64 LastWriteTime; - __le64 ChangeTime; - __le64 AllocationSize; /* Beginning of FILE_STANDARD_INFO equivalent */ - __le64 EndOfFile; - __le32 Attributes; -} __packed; - -struct smb2_flush_req { - struct smb2_sync_hdr sync_hdr; - __le16 StructureSize; /* Must be 24 */ - __le16 Reserved1; - __le32 Reserved2; - __u64 PersistentFileId; /* opaque endianness */ - __u64 VolatileFileId; /* opaque endianness */ -} __packed; - -struct smb2_flush_rsp { - struct smb2_sync_hdr sync_hdr; - __le16 StructureSize; - __le16 Reserved; -} __packed; - -/* For read request Flags field below, following flag is defined for SMB3.02 */ -#define SMB2_READFLAG_READ_UNBUFFERED 0x01 -#define SMB2_READFLAG_REQUEST_COMPRESSED 0x02 /* See MS-SMB2 2.2.19 */ - -/* Channel field for read and write: exactly one of following flags can be set*/ -#define SMB2_CHANNEL_NONE cpu_to_le32(0x00000000) -#define SMB2_CHANNEL_RDMA_V1 cpu_to_le32(0x00000001) /* SMB3 or later */ -#define SMB2_CHANNEL_RDMA_V1_INVALIDATE cpu_to_le32(0x00000002) /* >= SMB3.02 */ -#define SMB2_CHANNEL_RDMA_TRANSFORM cpu_to_le32(0x00000003) /* >= SMB3.02, only used on write */ - -/* SMB2 read request without RFC1001 length at the beginning */ -struct smb2_read_plain_req { - struct smb2_sync_hdr sync_hdr; - __le16 StructureSize; /* Must be 49 */ - __u8 Padding; /* offset from start of SMB2 header to place read */ - __u8 Flags; /* MBZ unless SMB3.02 or later */ - __le32 Length; - __le64 Offset; - __u64 PersistentFileId; /* opaque endianness */ - __u64 VolatileFileId; /* opaque endianness */ - __le32 MinimumCount; - __le32 Channel; /* MBZ except for SMB3 or later */ - __le32 RemainingBytes; - __le16 ReadChannelInfoOffset; - __le16 ReadChannelInfoLength; - __u8 Buffer[1]; -} __packed; - -/* Read flags */ -#define SMB2_READFLAG_RESPONSE_NONE 0x00000000 -#define SMB2_READFLAG_RESPONSE_RDMA_TRANSFORM 0x00000001 - -struct smb2_read_rsp { - struct smb2_sync_hdr sync_hdr; - __le16 StructureSize; /* Must be 17 */ - __u8 DataOffset; - __u8 Reserved; - __le32 DataLength; - __le32 DataRemaining; - __u32 Flags; - __u8 Buffer[1]; -} __packed; - -/* For write request Flags field below the following flags are defined: */ -#define SMB2_WRITEFLAG_WRITE_THROUGH 0x00000001 /* SMB2.1 or later */ -#define SMB2_WRITEFLAG_WRITE_UNBUFFERED 0x00000002 /* SMB3.02 or later */ - -struct smb2_write_req { - struct smb2_sync_hdr sync_hdr; - __le16 StructureSize; /* Must be 49 */ - __le16 DataOffset; /* offset from start of SMB2 header to write data */ - __le32 Length; - __le64 Offset; - __u64 PersistentFileId; /* opaque endianness */ - __u64 VolatileFileId; /* opaque endianness */ - __le32 Channel; /* MBZ unless SMB3.02 or later */ - __le32 RemainingBytes; - __le16 WriteChannelInfoOffset; - __le16 WriteChannelInfoLength; - __le32 Flags; - __u8 Buffer[1]; -} __packed; - -struct smb2_write_rsp { - struct smb2_sync_hdr sync_hdr; - __le16 StructureSize; /* Must be 17 */ - __u8 DataOffset; - __u8 Reserved; - __le32 DataLength; - __le32 DataRemaining; - __u32 Reserved2; - __u8 Buffer[1]; -} __packed; - -/* notify flags */ -#define SMB2_WATCH_TREE 0x0001 - -/* notify completion filter flags. See MS-FSCC 2.6 and MS-SMB2 2.2.35 */ -#define FILE_NOTIFY_CHANGE_FILE_NAME 0x00000001 -#define FILE_NOTIFY_CHANGE_DIR_NAME 0x00000002 -#define FILE_NOTIFY_CHANGE_ATTRIBUTES 0x00000004 -#define FILE_NOTIFY_CHANGE_SIZE 0x00000008 -#define FILE_NOTIFY_CHANGE_LAST_WRITE 0x00000010 -#define FILE_NOTIFY_CHANGE_LAST_ACCESS 0x00000020 -#define FILE_NOTIFY_CHANGE_CREATION 0x00000040 -#define FILE_NOTIFY_CHANGE_EA 0x00000080 -#define FILE_NOTIFY_CHANGE_SECURITY 0x00000100 -#define FILE_NOTIFY_CHANGE_STREAM_NAME 0x00000200 -#define FILE_NOTIFY_CHANGE_STREAM_SIZE 0x00000400 -#define FILE_NOTIFY_CHANGE_STREAM_WRITE 0x00000800 - -struct smb2_change_notify_req { - struct smb2_sync_hdr sync_hdr; - __le16 StructureSize; - __le16 Flags; - __le32 OutputBufferLength; - __u64 PersistentFileId; /* opaque endianness */ - __u64 VolatileFileId; /* opaque endianness */ - __le32 CompletionFilter; - __u32 Reserved; -} __packed; - -struct smb2_change_notify_rsp { - struct smb2_sync_hdr sync_hdr; - __le16 StructureSize; /* Must be 9 */ - __le16 OutputBufferOffset; - __le32 OutputBufferLength; - __u8 Buffer[1]; /* array of file notify structs */ -} __packed; - #define SMB2_LOCKFLAG_SHARED_LOCK 0x0001 #define SMB2_LOCKFLAG_EXCLUSIVE_LOCK 0x0002 #define SMB2_LOCKFLAG_UNLOCK 0x0004 @@ -1411,7 +526,7 @@ struct smb2_lock_element { } __packed; struct smb2_lock_req { - struct smb2_sync_hdr sync_hdr; + struct smb2_hdr hdr; __le16 StructureSize; /* Must be 48 */ __le16 LockCount; /* @@ -1426,19 +541,19 @@ struct smb2_lock_req { } __packed; struct smb2_lock_rsp { - struct smb2_sync_hdr sync_hdr; + struct smb2_hdr hdr; __le16 StructureSize; /* Must be 4 */ __le16 Reserved; } __packed; struct smb2_echo_req { - struct smb2_sync_hdr sync_hdr; + struct smb2_hdr hdr; __le16 StructureSize; /* Must be 4 */ __u16 Reserved; } __packed; struct smb2_echo_rsp { - struct smb2_sync_hdr sync_hdr; + struct smb2_hdr hdr; __le16 StructureSize; /* Must be 4 */ __u16 Reserved; } __packed; @@ -1468,7 +583,7 @@ struct smb2_echo_rsp { */ struct smb2_query_directory_req { - struct smb2_sync_hdr sync_hdr; + struct smb2_hdr hdr; __le16 StructureSize; /* Must be 33 */ __u8 FileInformationClass; __u8 Flags; @@ -1482,7 +597,7 @@ struct smb2_query_directory_req { } __packed; struct smb2_query_directory_rsp { - struct smb2_sync_hdr sync_hdr; + struct smb2_hdr hdr; __le16 StructureSize; /* Must be 9 */ __le16 OutputBufferOffset; __le32 OutputBufferLength; @@ -1515,7 +630,7 @@ struct smb2_query_directory_rsp { #define SL_INDEX_SPECIFIED 0x00000004 struct smb2_query_info_req { - struct smb2_sync_hdr sync_hdr; + struct smb2_hdr hdr; __le16 StructureSize; /* Must be 41 */ __u8 InfoType; __u8 FileInfoClass; @@ -1531,7 +646,7 @@ struct smb2_query_info_req { } __packed; struct smb2_query_info_rsp { - struct smb2_sync_hdr sync_hdr; + struct smb2_hdr hdr; __le16 StructureSize; /* Must be 9 */ __le16 OutputBufferOffset; __le32 OutputBufferLength; @@ -1548,7 +663,7 @@ struct smb2_query_info_rsp { #define SMB2_SET_INFO_IOV_SIZE 3 struct smb2_set_info_req { - struct smb2_sync_hdr sync_hdr; + struct smb2_hdr hdr; __le16 StructureSize; /* Must be 33 */ __u8 InfoType; __u8 FileInfoClass; @@ -1562,12 +677,12 @@ struct smb2_set_info_req { } __packed; struct smb2_set_info_rsp { - struct smb2_sync_hdr sync_hdr; + struct smb2_hdr hdr; __le16 StructureSize; /* Must be 2 */ } __packed; struct smb2_oplock_break { - struct smb2_sync_hdr sync_hdr; + struct smb2_hdr hdr; __le16 StructureSize; /* Must be 24 */ __u8 OplockLevel; __u8 Reserved; @@ -1579,7 +694,7 @@ struct smb2_oplock_break { #define SMB2_NOTIFY_BREAK_LEASE_FLAG_ACK_REQUIRED cpu_to_le32(0x01) struct smb2_lease_break { - struct smb2_sync_hdr sync_hdr; + struct smb2_hdr hdr; __le16 StructureSize; /* Must be 44 */ __le16 Epoch; __le32 Flags; @@ -1592,7 +707,7 @@ struct smb2_lease_break { } __packed; struct smb2_lease_ack { - struct smb2_sync_hdr sync_hdr; + struct smb2_hdr hdr; __le16 StructureSize; /* Must be 36 */ __le16 Reserved; __le32 Flags; diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 547945443fa7..096fada16ebd 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -25,7 +25,7 @@ extern int smb2_check_message(char *buf, unsigned int length, struct TCP_Server_Info *server); extern unsigned int smb2_calc_size(void *buf, struct TCP_Server_Info *server); extern char *smb2_get_data_area_len(int *off, int *len, - struct smb2_sync_hdr *shdr); + struct smb2_hdr *shdr); extern __le16 *cifs_convert_path_to_utf16(const char *from, struct cifs_sb_info *cifs_sb); diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index f59b956f9d25..2bf047b390a9 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -19,7 +19,6 @@ #include <linux/mempool.h> #include <linux/highmem.h> #include <crypto/aead.h> -#include "smb2pdu.h" #include "cifsglob.h" #include "cifsproto.h" #include "smb2proto.h" @@ -213,14 +212,14 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, unsigned char smb2_signature[SMB2_HMACSHA256_SIZE]; unsigned char *sigptr = smb2_signature; struct kvec *iov = rqst->rq_iov; - struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)iov[0].iov_base; + struct smb2_hdr *shdr = (struct smb2_hdr *)iov[0].iov_base; struct cifs_ses *ses; struct shash_desc *shash; struct crypto_shash *hash; struct sdesc *sdesc = NULL; struct smb_rqst drqst; - ses = smb2_find_smb_ses(server, shdr->SessionId); + ses = smb2_find_smb_ses(server, le64_to_cpu(shdr->SessionId)); if (!ses) { cifs_server_dbg(VFS, "%s: Could not find session\n", __func__); return 0; @@ -534,14 +533,14 @@ smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, unsigned char smb3_signature[SMB2_CMACAES_SIZE]; unsigned char *sigptr = smb3_signature; struct kvec *iov = rqst->rq_iov; - struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)iov[0].iov_base; + struct smb2_hdr *shdr = (struct smb2_hdr *)iov[0].iov_base; struct shash_desc *shash; struct crypto_shash *hash; struct sdesc *sdesc = NULL; struct smb_rqst drqst; u8 key[SMB3_SIGN_KEY_SIZE]; - rc = smb2_get_sign_key(shdr->SessionId, server, key); + rc = smb2_get_sign_key(le64_to_cpu(shdr->SessionId), server, key); if (rc) return 0; @@ -611,12 +610,12 @@ static int smb2_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server) { int rc = 0; - struct smb2_sync_hdr *shdr; + struct smb2_hdr *shdr; struct smb2_sess_setup_req *ssr; bool is_binding; bool is_signed; - shdr = (struct smb2_sync_hdr *)rqst->rq_iov[0].iov_base; + shdr = (struct smb2_hdr *)rqst->rq_iov[0].iov_base; ssr = (struct smb2_sess_setup_req *)shdr; is_binding = shdr->Command == SMB2_SESSION_SETUP && @@ -642,8 +641,8 @@ smb2_verify_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) { unsigned int rc; char server_response_sig[SMB2_SIGNATURE_SIZE]; - struct smb2_sync_hdr *shdr = - (struct smb2_sync_hdr *)rqst->rq_iov[0].iov_base; + struct smb2_hdr *shdr = + (struct smb2_hdr *)rqst->rq_iov[0].iov_base; if ((shdr->Command == SMB2_NEGOTIATE) || (shdr->Command == SMB2_SESSION_SETUP) || @@ -689,7 +688,7 @@ smb2_verify_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) */ static inline void smb2_seq_num_into_buf(struct TCP_Server_Info *server, - struct smb2_sync_hdr *shdr) + struct smb2_hdr *shdr) { unsigned int i, num = le16_to_cpu(shdr->CreditCharge); @@ -700,7 +699,7 @@ smb2_seq_num_into_buf(struct TCP_Server_Info *server, } static struct mid_q_entry * -smb2_mid_entry_alloc(const struct smb2_sync_hdr *shdr, +smb2_mid_entry_alloc(const struct smb2_hdr *shdr, struct TCP_Server_Info *server) { struct mid_q_entry *temp; @@ -732,14 +731,15 @@ smb2_mid_entry_alloc(const struct smb2_sync_hdr *shdr, atomic_inc(&midCount); temp->mid_state = MID_REQUEST_ALLOCATED; - trace_smb3_cmd_enter(shdr->TreeId, shdr->SessionId, - le16_to_cpu(shdr->Command), temp->mid); + trace_smb3_cmd_enter(le32_to_cpu(shdr->Id.SyncId.TreeId), + le64_to_cpu(shdr->SessionId), + le16_to_cpu(shdr->Command), temp->mid); return temp; } static int smb2_get_mid_entry(struct cifs_ses *ses, struct TCP_Server_Info *server, - struct smb2_sync_hdr *shdr, struct mid_q_entry **mid) + struct smb2_hdr *shdr, struct mid_q_entry **mid) { if (server->tcpStatus == CifsExiting) return -ENOENT; @@ -807,8 +807,8 @@ smb2_setup_request(struct cifs_ses *ses, struct TCP_Server_Info *server, struct smb_rqst *rqst) { int rc; - struct smb2_sync_hdr *shdr = - (struct smb2_sync_hdr *)rqst->rq_iov[0].iov_base; + struct smb2_hdr *shdr = + (struct smb2_hdr *)rqst->rq_iov[0].iov_base; struct mid_q_entry *mid; smb2_seq_num_into_buf(server, shdr); @@ -833,8 +833,8 @@ struct mid_q_entry * smb2_setup_async_request(struct TCP_Server_Info *server, struct smb_rqst *rqst) { int rc; - struct smb2_sync_hdr *shdr = - (struct smb2_sync_hdr *)rqst->rq_iov[0].iov_base; + struct smb2_hdr *shdr = + (struct smb2_hdr *)rqst->rq_iov[0].iov_base; struct mid_q_entry *mid; if (server->tcpStatus == CifsNeedNegotiate && diff --git a/fs/cifs/trace.h b/fs/cifs/trace.h index dafcb6ab050d..6cecf302dcfd 100644 --- a/fs/cifs/trace.h +++ b/fs/cifs/trace.h @@ -11,6 +11,8 @@ #define _CIFS_TRACE_H #include <linux/tracepoint.h> +#include <linux/net.h> +#include <linux/inet.h> /* * Please use this 3-part article as a reference for writing new tracepoints: @@ -854,6 +856,75 @@ DEFINE_EVENT(smb3_lease_err_class, smb3_##name, \ DEFINE_SMB3_LEASE_ERR_EVENT(lease_err); +DECLARE_EVENT_CLASS(smb3_connect_class, + TP_PROTO(char *hostname, + __u64 conn_id, + const struct __kernel_sockaddr_storage *dst_addr), + TP_ARGS(hostname, conn_id, dst_addr), + TP_STRUCT__entry( + __string(hostname, hostname) + __field(__u64, conn_id) + __array(__u8, dst_addr, sizeof(struct sockaddr_storage)) + ), + TP_fast_assign( + struct sockaddr_storage *pss = NULL; + + __entry->conn_id = conn_id; + pss = (struct sockaddr_storage *)__entry->dst_addr; + *pss = *dst_addr; + __assign_str(hostname, hostname); + ), + TP_printk("conn_id=0x%llx server=%s addr=%pISpsfc", + __entry->conn_id, + __get_str(hostname), + __entry->dst_addr) +) + +#define DEFINE_SMB3_CONNECT_EVENT(name) \ +DEFINE_EVENT(smb3_connect_class, smb3_##name, \ + TP_PROTO(char *hostname, \ + __u64 conn_id, \ + const struct __kernel_sockaddr_storage *addr), \ + TP_ARGS(hostname, conn_id, addr)) + +DEFINE_SMB3_CONNECT_EVENT(connect_done); + +DECLARE_EVENT_CLASS(smb3_connect_err_class, + TP_PROTO(char *hostname, __u64 conn_id, + const struct __kernel_sockaddr_storage *dst_addr, int rc), + TP_ARGS(hostname, conn_id, dst_addr, rc), + TP_STRUCT__entry( + __string(hostname, hostname) + __field(__u64, conn_id) + __array(__u8, dst_addr, sizeof(struct sockaddr_storage)) + __field(int, rc) + ), + TP_fast_assign( + struct sockaddr_storage *pss = NULL; + + __entry->conn_id = conn_id; + __entry->rc = rc; + pss = (struct sockaddr_storage *)__entry->dst_addr; + *pss = *dst_addr; + __assign_str(hostname, hostname); + ), + TP_printk("rc=%d conn_id=0x%llx server=%s addr=%pISpsfc", + __entry->rc, + __entry->conn_id, + __get_str(hostname), + __entry->dst_addr) +) + +#define DEFINE_SMB3_CONNECT_ERR_EVENT(name) \ +DEFINE_EVENT(smb3_connect_err_class, smb3_##name, \ + TP_PROTO(char *hostname, \ + __u64 conn_id, \ + const struct __kernel_sockaddr_storage *addr, \ + int rc), \ + TP_ARGS(hostname, conn_id, addr, rc)) + +DEFINE_SMB3_CONNECT_ERR_EVENT(connect_err); + DECLARE_EVENT_CLASS(smb3_reconnect_class, TP_PROTO(__u64 currmid, __u64 conn_id, diff --git a/fs/coda/cnode.c b/fs/coda/cnode.c index 06855f6c7902..62a3d2565c26 100644 --- a/fs/coda/cnode.c +++ b/fs/coda/cnode.c @@ -63,9 +63,10 @@ struct inode * coda_iget(struct super_block * sb, struct CodaFid * fid, struct inode *inode; struct coda_inode_info *cii; unsigned long hash = coda_f2i(fid); + umode_t inode_type = coda_inode_type(attr); +retry: inode = iget5_locked(sb, hash, coda_test_inode, coda_set_inode, fid); - if (!inode) return ERR_PTR(-ENOMEM); @@ -75,11 +76,15 @@ struct inode * coda_iget(struct super_block * sb, struct CodaFid * fid, inode->i_ino = hash; /* inode is locked and unique, no need to grab cii->c_lock */ cii->c_mapcount = 0; + coda_fill_inode(inode, attr); unlock_new_inode(inode); + } else if ((inode->i_mode & S_IFMT) != inode_type) { + /* Inode has changed type, mark bad and grab a new one */ + remove_inode_hash(inode); + coda_flag_inode(inode, C_PURGE); + iput(inode); + goto retry; } - - /* always replace the attributes, type might have changed */ - coda_fill_inode(inode, attr); return inode; } diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c index 2e1a5a192074..903ca8fa4b9b 100644 --- a/fs/coda/coda_linux.c +++ b/fs/coda/coda_linux.c @@ -87,28 +87,27 @@ static struct coda_timespec timespec64_to_coda(struct timespec64 ts64) } /* utility functions below */ +umode_t coda_inode_type(struct coda_vattr *attr) +{ + switch (attr->va_type) { + case C_VREG: + return S_IFREG; + case C_VDIR: + return S_IFDIR; + case C_VLNK: + return S_IFLNK; + case C_VNON: + default: + return 0; + } +} + void coda_vattr_to_iattr(struct inode *inode, struct coda_vattr *attr) { - int inode_type; - /* inode's i_flags, i_ino are set by iget - XXX: is this all we need ?? - */ - switch (attr->va_type) { - case C_VNON: - inode_type = 0; - break; - case C_VREG: - inode_type = S_IFREG; - break; - case C_VDIR: - inode_type = S_IFDIR; - break; - case C_VLNK: - inode_type = S_IFLNK; - break; - default: - inode_type = 0; - } + /* inode's i_flags, i_ino are set by iget + * XXX: is this all we need ?? + */ + umode_t inode_type = coda_inode_type(attr); inode->i_mode |= inode_type; if (attr->va_mode != (u_short) -1) diff --git a/fs/coda/coda_linux.h b/fs/coda/coda_linux.h index e7b27754ce78..9be281bbcc06 100644 --- a/fs/coda/coda_linux.h +++ b/fs/coda/coda_linux.h @@ -53,10 +53,11 @@ int coda_getattr(struct user_namespace *, const struct path *, struct kstat *, u32, unsigned int); int coda_setattr(struct user_namespace *, struct dentry *, struct iattr *); -/* this file: heloers */ +/* this file: helpers */ char *coda_f2s(struct CodaFid *f); int coda_iscontrol(const char *name, size_t length); +umode_t coda_inode_type(struct coda_vattr *attr); void coda_vattr_to_iattr(struct inode *, struct coda_vattr *); void coda_iattr_to_vattr(struct iattr *, struct coda_vattr *); unsigned short coda_flags_to_cflags(unsigned short); @@ -83,6 +84,9 @@ static __inline__ void coda_flag_inode(struct inode *inode, int flag) { struct coda_inode_info *cii = ITOC(inode); + if (!inode) + return; + spin_lock(&cii->c_lock); cii->c_flags |= flag; spin_unlock(&cii->c_lock); diff --git a/fs/coda/dir.c b/fs/coda/dir.c index d69989c1bac3..328d7a684b63 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -317,13 +317,10 @@ static int coda_rename(struct user_namespace *mnt_userns, struct inode *old_dir, coda_dir_drop_nlink(old_dir); coda_dir_inc_nlink(new_dir); } - coda_dir_update_mtime(old_dir); - coda_dir_update_mtime(new_dir); coda_flag_inode(d_inode(new_dentry), C_VATTR); - } else { - coda_flag_inode(old_dir, C_VATTR); - coda_flag_inode(new_dir, C_VATTR); } + coda_dir_update_mtime(old_dir); + coda_dir_update_mtime(new_dir); } return error; } @@ -499,15 +496,20 @@ out: */ static int coda_dentry_delete(const struct dentry * dentry) { - int flags; + struct inode *inode; + struct coda_inode_info *cii; if (d_really_is_negative(dentry)) return 0; - flags = (ITOC(d_inode(dentry))->c_flags) & C_PURGE; - if (is_bad_inode(d_inode(dentry)) || flags) { + inode = d_inode(dentry); + if (!inode || is_bad_inode(inode)) return 1; - } + + cii = ITOC(inode); + if (cii->c_flags & C_PURGE) + return 1; + return 0; } diff --git a/fs/coda/file.c b/fs/coda/file.c index ef5ca22bfb3e..29dd87be2fb8 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c @@ -8,6 +8,7 @@ * to the Coda project. Contact Peter Braam <coda@cs.cmu.edu>. */ +#include <linux/refcount.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/time.h> @@ -28,7 +29,7 @@ #include "coda_int.h" struct coda_vm_ops { - atomic_t refcnt; + refcount_t refcnt; struct file *coda_file; const struct vm_operations_struct *host_vm_ops; struct vm_operations_struct vm_ops; @@ -98,7 +99,7 @@ coda_vm_open(struct vm_area_struct *vma) struct coda_vm_ops *cvm_ops = container_of(vma->vm_ops, struct coda_vm_ops, vm_ops); - atomic_inc(&cvm_ops->refcnt); + refcount_inc(&cvm_ops->refcnt); if (cvm_ops->host_vm_ops && cvm_ops->host_vm_ops->open) cvm_ops->host_vm_ops->open(vma); @@ -113,7 +114,7 @@ coda_vm_close(struct vm_area_struct *vma) if (cvm_ops->host_vm_ops && cvm_ops->host_vm_ops->close) cvm_ops->host_vm_ops->close(vma); - if (atomic_dec_and_test(&cvm_ops->refcnt)) { + if (refcount_dec_and_test(&cvm_ops->refcnt)) { vma->vm_ops = cvm_ops->host_vm_ops; fput(cvm_ops->coda_file); kfree(cvm_ops); @@ -189,7 +190,7 @@ coda_file_mmap(struct file *coda_file, struct vm_area_struct *vma) cvm_ops->vm_ops.open = coda_vm_open; cvm_ops->vm_ops.close = coda_vm_close; cvm_ops->coda_file = coda_file; - atomic_set(&cvm_ops->refcnt, 1); + refcount_set(&cvm_ops->refcnt, 1); vma->vm_ops = &cvm_ops->vm_ops; } @@ -238,11 +239,10 @@ int coda_release(struct inode *coda_inode, struct file *coda_file) struct coda_file_info *cfi; struct coda_inode_info *cii; struct inode *host_inode; - int err; cfi = coda_ftoc(coda_file); - err = venus_close(coda_inode->i_sb, coda_i2f(coda_inode), + venus_close(coda_inode->i_sb, coda_i2f(coda_inode), coda_flags, coda_file->f_cred->fsuid); host_inode = file_inode(cfi->cfi_container); diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c index 240669f51eac..b39580ad4ce5 100644 --- a/fs/coda/psdev.c +++ b/fs/coda/psdev.c @@ -122,14 +122,10 @@ static ssize_t coda_psdev_write(struct file *file, const char __user *buf, hdr.opcode, hdr.unique); nbytes = size; } - dcbuf = kvmalloc(nbytes, GFP_KERNEL); - if (!dcbuf) { - retval = -ENOMEM; - goto out; - } - if (copy_from_user(dcbuf, buf, nbytes)) { - kvfree(dcbuf); - retval = -EFAULT; + + dcbuf = vmemdup_user(buf, nbytes); + if (IS_ERR(dcbuf)) { + retval = PTR_ERR(dcbuf); goto out; } @@ -388,7 +384,7 @@ MODULE_AUTHOR("Jan Harkes, Peter J. Braam"); MODULE_DESCRIPTION("Coda Distributed File System VFS interface"); MODULE_ALIAS_CHARDEV_MAJOR(CODA_PSDEV_MAJOR); MODULE_LICENSE("GPL"); -MODULE_VERSION("7.0"); +MODULE_VERSION("7.2"); static int __init init_coda(void) { diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c index eb3b1898da46..59f6cfd06f96 100644 --- a/fs/coda/upcall.c +++ b/fs/coda/upcall.c @@ -744,7 +744,8 @@ static int coda_upcall(struct venus_comm *vcp, list_add_tail(&req->uc_chain, &vcp->vc_pending); wake_up_interruptible(&vcp->vc_waitq); - if (req->uc_flags & CODA_REQ_ASYNC) { + /* We can return early on asynchronous requests */ + if (outSize == NULL) { mutex_unlock(&vcp->vc_mutex); return 0; } diff --git a/fs/coredump.c b/fs/coredump.c index 3224dee44d30..a6b3c196cdef 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -359,7 +359,7 @@ static int zap_process(struct task_struct *start, int exit_code, int flags) for_each_thread(start, t) { task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); - if (t != current && t->mm) { + if (t != current && !(t->flags & PF_POSTCOREDUMP)) { sigaddset(&t->pending.signal, SIGKILL); signal_wake_up(t, 1); nr++; @@ -369,99 +369,34 @@ static int zap_process(struct task_struct *start, int exit_code, int flags) return nr; } -static int zap_threads(struct task_struct *tsk, struct mm_struct *mm, +static int zap_threads(struct task_struct *tsk, struct core_state *core_state, int exit_code) { - struct task_struct *g, *p; - unsigned long flags; int nr = -EAGAIN; spin_lock_irq(&tsk->sighand->siglock); if (!signal_group_exit(tsk->signal)) { - mm->core_state = core_state; + tsk->signal->core_state = core_state; tsk->signal->group_exit_task = tsk; nr = zap_process(tsk, exit_code, 0); clear_tsk_thread_flag(tsk, TIF_SIGPENDING); + tsk->flags |= PF_DUMPCORE; + atomic_set(&core_state->nr_threads, nr); } spin_unlock_irq(&tsk->sighand->siglock); - if (unlikely(nr < 0)) - return nr; - - tsk->flags |= PF_DUMPCORE; - if (atomic_read(&mm->mm_users) == nr + 1) - goto done; - /* - * We should find and kill all tasks which use this mm, and we should - * count them correctly into ->nr_threads. We don't take tasklist - * lock, but this is safe wrt: - * - * fork: - * None of sub-threads can fork after zap_process(leader). All - * processes which were created before this point should be - * visible to zap_threads() because copy_process() adds the new - * process to the tail of init_task.tasks list, and lock/unlock - * of ->siglock provides a memory barrier. - * - * do_exit: - * The caller holds mm->mmap_lock. This means that the task which - * uses this mm can't pass exit_mm(), so it can't exit or clear - * its ->mm. - * - * de_thread: - * It does list_replace_rcu(&leader->tasks, ¤t->tasks), - * we must see either old or new leader, this does not matter. - * However, it can change p->sighand, so lock_task_sighand(p) - * must be used. Since p->mm != NULL and we hold ->mmap_lock - * it can't fail. - * - * Note also that "g" can be the old leader with ->mm == NULL - * and already unhashed and thus removed from ->thread_group. - * This is OK, __unhash_process()->list_del_rcu() does not - * clear the ->next pointer, we will find the new leader via - * next_thread(). - */ - rcu_read_lock(); - for_each_process(g) { - if (g == tsk->group_leader) - continue; - if (g->flags & PF_KTHREAD) - continue; - - for_each_thread(g, p) { - if (unlikely(!p->mm)) - continue; - if (unlikely(p->mm == mm)) { - lock_task_sighand(p, &flags); - nr += zap_process(p, exit_code, - SIGNAL_GROUP_EXIT); - unlock_task_sighand(p, &flags); - } - break; - } - } - rcu_read_unlock(); -done: - atomic_set(&core_state->nr_threads, nr); return nr; } static int coredump_wait(int exit_code, struct core_state *core_state) { struct task_struct *tsk = current; - struct mm_struct *mm = tsk->mm; int core_waiters = -EBUSY; init_completion(&core_state->startup); core_state->dumper.task = tsk; core_state->dumper.next = NULL; - if (mmap_write_lock_killable(mm)) - return -EINTR; - - if (!mm->core_state) - core_waiters = zap_threads(tsk, mm, core_state, exit_code); - mmap_write_unlock(mm); - + core_waiters = zap_threads(tsk, core_state, exit_code); if (core_waiters > 0) { struct core_thread *ptr; @@ -483,7 +418,7 @@ static int coredump_wait(int exit_code, struct core_state *core_state) return core_waiters; } -static void coredump_finish(struct mm_struct *mm, bool core_dumped) +static void coredump_finish(bool core_dumped) { struct core_thread *curr, *next; struct task_struct *task; @@ -493,22 +428,21 @@ static void coredump_finish(struct mm_struct *mm, bool core_dumped) current->signal->group_exit_code |= 0x80; current->signal->group_exit_task = NULL; current->signal->flags = SIGNAL_GROUP_EXIT; + next = current->signal->core_state->dumper.next; + current->signal->core_state = NULL; spin_unlock_irq(¤t->sighand->siglock); - next = mm->core_state->dumper.next; while ((curr = next) != NULL) { next = curr->next; task = curr->task; /* - * see exit_mm(), curr->task must not see + * see coredump_task_exit(), curr->task must not see * ->task == NULL before we read ->next. */ smp_mb(); curr->task = NULL; wake_up_process(task); } - - mm->core_state = NULL; } static bool dump_interrupted(void) @@ -839,7 +773,7 @@ fail_dropcount: fail_unlock: kfree(argv); kfree(cn.corename); - coredump_finish(mm, core_dumped); + coredump_finish(core_dumped); revert_creds(old_cred); fail_creds: put_cred(cred); diff --git a/fs/d_path.c b/fs/d_path.c index cd60c7535181..e4e0ebad1f15 100644 --- a/fs/d_path.c +++ b/fs/d_path.c @@ -77,9 +77,8 @@ static bool prepend(struct prepend_buffer *p, const char *str, int namelen) /** * prepend_name - prepend a pathname in front of current buffer pointer - * @buffer: buffer pointer - * @buflen: allocated length of the buffer - * @name: name string and length qstr structure + * @p: prepend buffer which contains buffer pointer and allocated length + * @name: name string and length qstr structure * * With RCU path tracing, it may race with d_move(). Use READ_ONCE() to * make sure that either the old or the new name pointer and length are @@ -141,8 +140,7 @@ static int __prepend_path(const struct dentry *dentry, const struct mount *mnt, * prepend_path - Prepend path string to a buffer * @path: the dentry/vfsmount to report * @root: root vfsmnt/dentry - * @buffer: pointer to the end of the buffer - * @buflen: pointer to buffer length + * @p: prepend buffer which contains buffer pointer and allocated length * * The function will first try to write out the pathname without taking any * lock other than the RCU read lock to make sure that dentries won't go away. diff --git a/fs/exec.c b/fs/exec.c index a098c133d8d7..537d92c41105 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -987,16 +987,14 @@ static int exec_mmap(struct mm_struct *mm) if (old_mm) { /* - * Make sure that if there is a core dump in progress - * for the old mm, we get out and die instead of going - * through with the exec. We must hold mmap_lock around - * checking core_state and changing tsk->mm. + * If there is a pending fatal signal perhaps a signal + * whose default action is to create a coredump get + * out and die instead of going through with the exec. */ - mmap_read_lock(old_mm); - if (unlikely(old_mm->core_state)) { - mmap_read_unlock(old_mm); + ret = mmap_read_lock_killable(old_mm); + if (ret) { up_write(&tsk->signal->exec_update_lock); - return -EINTR; + return ret; } } @@ -1852,7 +1850,7 @@ out: * SIGSEGV. */ if (bprm->point_of_no_return && !fatal_signal_pending(current)) - force_sigsegv(SIGSEGV); + force_fatal_sig(SIGSEGV); out_unmark: current->fs->in_exec = 0; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 79b6a0c47f6f..a320c54202d9 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -46,6 +46,7 @@ #include <linux/part_stat.h> #include <linux/kthread.h> #include <linux/freezer.h> +#include <linux/fsnotify.h> #include "ext4.h" #include "ext4_extents.h" /* Needed for trace points definition */ @@ -759,6 +760,8 @@ void __ext4_error(struct super_block *sb, const char *function, sb->s_id, function, line, current->comm, &vaf); va_end(args); } + fsnotify_sb_error(sb, NULL, error ? error : EFSCORRUPTED); + ext4_handle_error(sb, force_ro, error, 0, block, function, line); } @@ -789,6 +792,8 @@ void __ext4_error_inode(struct inode *inode, const char *function, current->comm, &vaf); va_end(args); } + fsnotify_sb_error(inode->i_sb, inode, error ? error : EFSCORRUPTED); + ext4_handle_error(inode->i_sb, false, error, inode->i_ino, block, function, line); } @@ -827,6 +832,8 @@ void __ext4_error_file(struct file *file, const char *function, current->comm, path, &vaf); va_end(args); } + fsnotify_sb_error(inode->i_sb, inode, EFSCORRUPTED); + ext4_handle_error(inode->i_sb, false, EFSCORRUPTED, inode->i_ino, block, function, line); } @@ -894,6 +901,7 @@ void __ext4_std_error(struct super_block *sb, const char *function, printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n", sb->s_id, function, line, errstr); } + fsnotify_sb_error(sb, NULL, errno ? errno : EFSCORRUPTED); ext4_handle_error(sb, false, -errno, 0, 0, function, line); } diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c index 281d79f8b3d3..713818d74de6 100644 --- a/fs/fuse/dax.c +++ b/fs/fuse/dax.c @@ -732,11 +732,8 @@ static ssize_t fuse_dax_direct_write(struct kiocb *iocb, struct iov_iter *from) ssize_t ret; ret = fuse_direct_io(&io, from, &iocb->ki_pos, FUSE_DIO_WRITE); - if (ret < 0) - return ret; - fuse_invalidate_attr(inode); - fuse_write_update_size(inode, iocb->ki_pos); + fuse_write_update_attr(inode, iocb->ki_pos, ret); return ret; } diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index dde341a6388a..79f7eda49e06 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -756,7 +756,7 @@ static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size) { unsigned ncpy = min(*size, cs->len); if (val) { - void *pgaddr = kmap_atomic(cs->pg); + void *pgaddr = kmap_local_page(cs->pg); void *buf = pgaddr + cs->offset; if (cs->write) @@ -764,7 +764,7 @@ static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size) else memcpy(*val, buf, ncpy); - kunmap_atomic(pgaddr); + kunmap_local(pgaddr); *val += ncpy; } *size -= ncpy; @@ -847,6 +847,12 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) replace_page_cache_page(oldpage, newpage); + /* + * Release while we have extra ref on stolen page. Otherwise + * anon_pipe_buf_release() might think the page can be reused. + */ + pipe_buf_release(cs->pipe, buf); + get_page(newpage); if (!(buf->flags & PIPE_BUF_FLAG_LRU)) @@ -949,10 +955,10 @@ static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep, } } if (page) { - void *mapaddr = kmap_atomic(page); + void *mapaddr = kmap_local_page(page); void *buf = mapaddr + offset; offset += fuse_copy_do(cs, &buf, &count); - kunmap_atomic(mapaddr); + kunmap_local(mapaddr); } else offset += fuse_copy_do(cs, NULL, &count); } @@ -1591,7 +1597,7 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size, end = outarg.offset + outarg.size; if (end > file_size) { file_size = end; - fuse_write_update_size(inode, file_size); + fuse_write_update_attr(inode, file_size, outarg.size); } num = outarg.size; @@ -2031,8 +2037,12 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, pipe_lock(pipe); out_free: - for (idx = 0; idx < nbuf; idx++) - pipe_buf_release(pipe, &bufs[idx]); + for (idx = 0; idx < nbuf; idx++) { + struct pipe_buffer *buf = &bufs[idx]; + + if (buf->ops) + pipe_buf_release(pipe, buf); + } pipe_unlock(pipe); kvfree(bufs); diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index d9b977c0f38d..0654bfedcbb0 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -116,7 +116,7 @@ u64 entry_attr_timeout(struct fuse_entry_out *o) return time_to_jiffies(o->attr_valid, o->attr_valid_nsec); } -static void fuse_invalidate_attr_mask(struct inode *inode, u32 mask) +void fuse_invalidate_attr_mask(struct inode *inode, u32 mask) { set_mask_bits(&get_fuse_inode(inode)->inval_mask, 0, mask); } @@ -738,14 +738,51 @@ static int fuse_symlink(struct user_namespace *mnt_userns, struct inode *dir, return create_new_entry(fm, &args, dir, entry, S_IFLNK); } -void fuse_update_ctime(struct inode *inode) +void fuse_flush_time_update(struct inode *inode) +{ + int err = sync_inode_metadata(inode, 1); + + mapping_set_error(inode->i_mapping, err); +} + +static void fuse_update_ctime_in_cache(struct inode *inode) { if (!IS_NOCMTIME(inode)) { inode->i_ctime = current_time(inode); mark_inode_dirty_sync(inode); + fuse_flush_time_update(inode); } } +void fuse_update_ctime(struct inode *inode) +{ + fuse_invalidate_attr_mask(inode, STATX_CTIME); + fuse_update_ctime_in_cache(inode); +} + +static void fuse_entry_unlinked(struct dentry *entry) +{ + struct inode *inode = d_inode(entry); + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_inode *fi = get_fuse_inode(inode); + + spin_lock(&fi->lock); + fi->attr_version = atomic64_inc_return(&fc->attr_version); + /* + * If i_nlink == 0 then unlink doesn't make sense, yet this can + * happen if userspace filesystem is careless. It would be + * difficult to enforce correct nlink usage so just ignore this + * condition here + */ + if (S_ISDIR(inode->i_mode)) + clear_nlink(inode); + else if (inode->i_nlink > 0) + drop_nlink(inode); + spin_unlock(&fi->lock); + fuse_invalidate_entry_cache(entry); + fuse_update_ctime(inode); +} + static int fuse_unlink(struct inode *dir, struct dentry *entry) { int err; @@ -762,24 +799,8 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry) args.in_args[0].value = entry->d_name.name; err = fuse_simple_request(fm, &args); if (!err) { - struct inode *inode = d_inode(entry); - struct fuse_inode *fi = get_fuse_inode(inode); - - spin_lock(&fi->lock); - fi->attr_version = atomic64_inc_return(&fm->fc->attr_version); - /* - * If i_nlink == 0 then unlink doesn't make sense, yet this can - * happen if userspace filesystem is careless. It would be - * difficult to enforce correct nlink usage so just ignore this - * condition here - */ - if (inode->i_nlink > 0) - drop_nlink(inode); - spin_unlock(&fi->lock); - fuse_invalidate_attr(inode); fuse_dir_changed(dir); - fuse_invalidate_entry_cache(entry); - fuse_update_ctime(inode); + fuse_entry_unlinked(entry); } else if (err == -EINTR) fuse_invalidate_entry(entry); return err; @@ -801,9 +822,8 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry) args.in_args[0].value = entry->d_name.name; err = fuse_simple_request(fm, &args); if (!err) { - clear_nlink(d_inode(entry)); fuse_dir_changed(dir); - fuse_invalidate_entry_cache(entry); + fuse_entry_unlinked(entry); } else if (err == -EINTR) fuse_invalidate_entry(entry); return err; @@ -833,24 +853,18 @@ static int fuse_rename_common(struct inode *olddir, struct dentry *oldent, err = fuse_simple_request(fm, &args); if (!err) { /* ctime changes */ - fuse_invalidate_attr(d_inode(oldent)); fuse_update_ctime(d_inode(oldent)); - if (flags & RENAME_EXCHANGE) { - fuse_invalidate_attr(d_inode(newent)); + if (flags & RENAME_EXCHANGE) fuse_update_ctime(d_inode(newent)); - } fuse_dir_changed(olddir); if (olddir != newdir) fuse_dir_changed(newdir); /* newent will end up negative */ - if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent)) { - fuse_invalidate_attr(d_inode(newent)); - fuse_invalidate_entry_cache(newent); - fuse_update_ctime(d_inode(newent)); - } + if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent)) + fuse_entry_unlinked(newent); } else if (err == -EINTR) { /* If request was interrupted, DEITY only knows if the rename actually took place. If the invalidation @@ -916,25 +930,11 @@ static int fuse_link(struct dentry *entry, struct inode *newdir, args.in_args[1].size = newent->d_name.len + 1; args.in_args[1].value = newent->d_name.name; err = create_new_entry(fm, &args, newdir, newent, inode->i_mode); - /* Contrary to "normal" filesystems it can happen that link - makes two "logical" inodes point to the same "physical" - inode. We invalidate the attributes of the old one, so it - will reflect changes in the backing inode (link count, - etc.) - */ - if (!err) { - struct fuse_inode *fi = get_fuse_inode(inode); - - spin_lock(&fi->lock); - fi->attr_version = atomic64_inc_return(&fm->fc->attr_version); - if (likely(inode->i_nlink < UINT_MAX)) - inc_nlink(inode); - spin_unlock(&fi->lock); - fuse_invalidate_attr(inode); - fuse_update_ctime(inode); - } else if (err == -EINTR) { + if (!err) + fuse_update_ctime_in_cache(inode); + else if (err == -EINTR) fuse_invalidate_attr(inode); - } + return err; } @@ -944,15 +944,6 @@ static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr, unsigned int blkbits; struct fuse_conn *fc = get_fuse_conn(inode); - /* see the comment in fuse_change_attributes() */ - if (fc->writeback_cache && S_ISREG(inode->i_mode)) { - attr->size = i_size_read(inode); - attr->mtime = inode->i_mtime.tv_sec; - attr->mtimensec = inode->i_mtime.tv_nsec; - attr->ctime = inode->i_ctime.tv_sec; - attr->ctimensec = inode->i_ctime.tv_nsec; - } - stat->dev = inode->i_sb->s_dev; stat->ino = attr->ino; stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); @@ -1030,12 +1021,14 @@ static int fuse_update_get_attr(struct inode *inode, struct file *file, struct fuse_inode *fi = get_fuse_inode(inode); int err = 0; bool sync; + u32 inval_mask = READ_ONCE(fi->inval_mask); + u32 cache_mask = fuse_get_cache_mask(inode); if (flags & AT_STATX_FORCE_SYNC) sync = true; else if (flags & AT_STATX_DONT_SYNC) sync = false; - else if (request_mask & READ_ONCE(fi->inval_mask)) + else if (request_mask & inval_mask & ~cache_mask) sync = true; else sync = time_before64(fi->i_time, get_jiffies_64()); @@ -1052,11 +1045,9 @@ static int fuse_update_get_attr(struct inode *inode, struct file *file, return err; } -int fuse_update_attributes(struct inode *inode, struct file *file) +int fuse_update_attributes(struct inode *inode, struct file *file, u32 mask) { - /* Do *not* need to get atime for internal purposes */ - return fuse_update_get_attr(inode, file, NULL, - STATX_BASIC_STATS & ~STATX_ATIME, 0); + return fuse_update_get_attr(inode, file, NULL, mask, 0); } int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid, @@ -1071,7 +1062,7 @@ int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid, if (!parent) return -ENOENT; - inode_lock(parent); + inode_lock_nested(parent, I_MUTEX_PARENT); if (!S_ISDIR(parent->i_mode)) goto unlock; @@ -1561,10 +1552,10 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, struct fuse_setattr_in inarg; struct fuse_attr_out outarg; bool is_truncate = false; - bool is_wb = fc->writeback_cache; + bool is_wb = fc->writeback_cache && S_ISREG(inode->i_mode); loff_t oldsize; int err; - bool trust_local_cmtime = is_wb && S_ISREG(inode->i_mode); + bool trust_local_cmtime = is_wb; bool fault_blocked = false; if (!fc->default_permissions) @@ -1608,7 +1599,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, } /* Flush dirty data/metadata before non-truncate SETATTR */ - if (is_wb && S_ISREG(inode->i_mode) && + if (is_wb && attr->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_MTIME_SET | ATTR_TIMES_SET)) { @@ -1676,10 +1667,11 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, } fuse_change_attributes_common(inode, &outarg.attr, - attr_timeout(&outarg)); + attr_timeout(&outarg), + fuse_get_cache_mask(inode)); oldsize = inode->i_size; /* see the comment in fuse_change_attributes() */ - if (!is_wb || is_truncate || !S_ISREG(inode->i_mode)) + if (!is_wb || is_truncate) i_size_write(inode, outarg.attr.size); if (is_truncate) { diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 34b6d0650e66..9d6c5f6361f7 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -211,9 +211,8 @@ void fuse_finish_open(struct inode *inode, struct file *file) i_size_write(inode, 0); spin_unlock(&fi->lock); truncate_pagecache(inode, 0); - fuse_invalidate_attr(inode); - if (fc->writeback_cache) - file_update_time(file); + file_update_time(file); + fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE); } else if (!(ff->open_flags & FOPEN_KEEP_CACHE)) { invalidate_inode_pages2(inode->i_mapping); } @@ -339,12 +338,6 @@ static int fuse_open(struct inode *inode, struct file *file) static int fuse_release(struct inode *inode, struct file *file) { - struct fuse_conn *fc = get_fuse_conn(inode); - - /* see fuse_vma_close() for !writeback_cache case */ - if (fc->writeback_cache) - write_inode_now(inode, 1); - fuse_release_common(file, false); /* return value is ignored by VFS */ @@ -483,6 +476,9 @@ static int fuse_flush(struct file *file, fl_owner_t id) if (fuse_is_bad(inode)) return -EIO; + if (ff->open_flags & FOPEN_NOFLUSH && !fm->fc->writeback_cache) + return 0; + err = write_inode_now(inode, 1); if (err) return err; @@ -521,7 +517,7 @@ inval_attr_out: * enabled, i_blocks from cached attr may not be accurate. */ if (!err && fm->fc->writeback_cache) - fuse_invalidate_attr(inode); + fuse_invalidate_attr_mask(inode, STATX_BLOCKS); return err; } @@ -793,7 +789,7 @@ static void fuse_read_update_size(struct inode *inode, loff_t size, struct fuse_inode *fi = get_fuse_inode(inode); spin_lock(&fi->lock); - if (attr_ver == fi->attr_version && size < inode->i_size && + if (attr_ver >= fi->attr_version && size < inode->i_size && !test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) { fi->attr_version = atomic64_inc_return(&fc->attr_version); i_size_write(inode, size); @@ -1003,7 +999,7 @@ static ssize_t fuse_cache_read_iter(struct kiocb *iocb, struct iov_iter *to) if (fc->auto_inval_data || (iocb->ki_pos + iov_iter_count(to) > i_size_read(inode))) { int err; - err = fuse_update_attributes(inode, iocb->ki_filp); + err = fuse_update_attributes(inode, iocb->ki_filp, STATX_SIZE); if (err) return err; } @@ -1072,7 +1068,7 @@ static ssize_t fuse_send_write(struct fuse_io_args *ia, loff_t pos, return err ?: ia->write.out.size; } -bool fuse_write_update_size(struct inode *inode, loff_t pos) +bool fuse_write_update_attr(struct inode *inode, loff_t pos, ssize_t written) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); @@ -1080,12 +1076,14 @@ bool fuse_write_update_size(struct inode *inode, loff_t pos) spin_lock(&fi->lock); fi->attr_version = atomic64_inc_return(&fc->attr_version); - if (pos > inode->i_size) { + if (written > 0 && pos > inode->i_size) { i_size_write(inode, pos); ret = true; } spin_unlock(&fi->lock); + fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE); + return ret; } @@ -1268,11 +1266,8 @@ static ssize_t fuse_perform_write(struct kiocb *iocb, kfree(ap->pages); } while (!err && iov_iter_count(ii)); - if (res > 0) - fuse_write_update_size(inode, pos); - + fuse_write_update_attr(inode, pos, res); clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); - fuse_invalidate_attr(inode); return res > 0 ? res : err; } @@ -1290,7 +1285,8 @@ static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from) if (fc->writeback_cache) { /* Update size (EOF optimization) and mode (SUID clearing) */ - err = fuse_update_attributes(mapping->host, file); + err = fuse_update_attributes(mapping->host, file, + STATX_SIZE | STATX_MODE); if (err) return err; @@ -1451,7 +1447,6 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, if (!ia) return -ENOMEM; - ia->io = io; if (!cuse && fuse_range_is_writeback(inode, idx_from, idx_to)) { if (!write) inode_lock(inode); @@ -1561,11 +1556,9 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from) } else { res = fuse_direct_io(&io, from, &iocb->ki_pos, FUSE_DIO_WRITE); + fuse_write_update_attr(inode, iocb->ki_pos, res); } } - fuse_invalidate_attr(inode); - if (res > 0) - fuse_write_update_size(inode, iocb->ki_pos); inode_unlock(inode); return res; @@ -1776,7 +1769,7 @@ static void fuse_writepage_end(struct fuse_mount *fm, struct fuse_args *args, * is enabled, we trust local ctime/mtime. */ if (!fc->writeback_cache) - fuse_invalidate_attr(inode); + fuse_invalidate_attr_mask(inode, FUSE_STATX_MODIFY); spin_lock(&fi->lock); rb_erase(&wpa->writepages_entry, &fi->writepages); while (wpa->next) { @@ -1822,14 +1815,13 @@ static void fuse_writepage_end(struct fuse_mount *fm, struct fuse_args *args, static struct fuse_file *__fuse_write_file_get(struct fuse_inode *fi) { - struct fuse_file *ff = NULL; + struct fuse_file *ff; spin_lock(&fi->lock); - if (!list_empty(&fi->write_files)) { - ff = list_entry(fi->write_files.next, struct fuse_file, - write_entry); + ff = list_first_entry_or_null(&fi->write_files, struct fuse_file, + write_entry); + if (ff) fuse_file_get(ff); - } spin_unlock(&fi->lock); return ff; @@ -1848,6 +1840,17 @@ int fuse_write_inode(struct inode *inode, struct writeback_control *wbc) struct fuse_file *ff; int err; + /* + * Inode is always written before the last reference is dropped and + * hence this should not be reached from reclaim. + * + * Writing back the inode from reclaim can deadlock if the request + * processing itself needs an allocation. Allocations triggering + * reclaim while serving a request can't be prevented, because it can + * involve any number of unrelated userspace processes. + */ + WARN_ON(wbc->for_reclaim); + ff = __fuse_write_file_get(fi); err = fuse_flush_times(inode, ff); if (ff) @@ -2306,15 +2309,18 @@ static int fuse_write_end(struct file *file, struct address_space *mapping, if (!copied) goto unlock; + pos += copied; if (!PageUptodate(page)) { /* Zero any unwritten bytes at the end of the page */ - size_t endoff = (pos + copied) & ~PAGE_MASK; + size_t endoff = pos & ~PAGE_MASK; if (endoff) zero_user_segment(page, endoff, PAGE_SIZE); SetPageUptodate(page); } - fuse_write_update_size(inode, pos + copied); + if (pos > inode->i_size) + i_size_write(inode, pos); + set_page_dirty(page); unlock: @@ -2340,12 +2346,15 @@ static int fuse_launder_page(struct page *page) } /* - * Write back dirty pages now, because there may not be any suitable - * open files later + * Write back dirty data/metadata now (there may not be any suitable + * open files later for data) */ static void fuse_vma_close(struct vm_area_struct *vma) { - filemap_write_and_wait(vma->vm_file->f_mapping); + int err; + + err = write_inode_now(vma->vm_file->f_mapping->host, 1); + mapping_set_error(vma->vm_file->f_mapping, err); } /* @@ -2628,7 +2637,7 @@ static loff_t fuse_lseek(struct file *file, loff_t offset, int whence) return vfs_setpos(file, outarg.offset, inode->i_sb->s_maxbytes); fallback: - err = fuse_update_attributes(inode, file); + err = fuse_update_attributes(inode, file, STATX_SIZE); if (!err) return generic_file_llseek(file, offset, whence); else @@ -2648,7 +2657,7 @@ static loff_t fuse_file_llseek(struct file *file, loff_t offset, int whence) break; case SEEK_END: inode_lock(inode); - retval = fuse_update_attributes(inode, file); + retval = fuse_update_attributes(inode, file, STATX_SIZE); if (!retval) retval = generic_file_llseek(file, offset, whence); inode_unlock(inode); @@ -2869,7 +2878,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter) if (iov_iter_rw(iter) == WRITE) { ret = fuse_direct_io(io, iter, &pos, FUSE_DIO_WRITE); - fuse_invalidate_attr(inode); + fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE); } else { ret = __fuse_direct_read(io, iter, &pos); } @@ -2891,9 +2900,8 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter) kref_put(&io->refcnt, fuse_io_release); if (iov_iter_rw(iter) == WRITE) { - if (ret > 0) - fuse_write_update_size(inode, pos); - else if (ret < 0 && offset + count > i_size) + fuse_write_update_attr(inode, pos, ret); + if (ret < 0 && offset + count > i_size) fuse_do_truncate(file); } @@ -2981,16 +2989,14 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, /* we could have extended the file */ if (!(mode & FALLOC_FL_KEEP_SIZE)) { - bool changed = fuse_write_update_size(inode, offset + length); - - if (changed && fm->fc->writeback_cache) + if (fuse_write_update_attr(inode, offset + length, length)) file_update_time(file); } if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) truncate_pagecache_range(inode, offset, offset + length - 1); - fuse_invalidate_attr(inode); + fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE); out: if (!(mode & FALLOC_FL_KEEP_SIZE)) @@ -3002,6 +3008,8 @@ out: if (lock_inode) inode_unlock(inode); + fuse_flush_time_update(inode); + return err; } @@ -3096,12 +3104,8 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in, ALIGN_DOWN(pos_out, PAGE_SIZE), ALIGN(pos_out + outarg.size, PAGE_SIZE) - 1); - if (fc->writeback_cache) { - fuse_write_update_size(inode_out, pos_out + outarg.size); - file_update_time(file_out); - } - - fuse_invalidate_attr(inode_out); + file_update_time(file_out); + fuse_write_update_attr(inode_out, pos_out + outarg.size, outarg.size); err = outarg.size; out: @@ -3111,6 +3115,8 @@ out: inode_unlock(inode_out); file_accessed(file_in); + fuse_flush_time_update(inode_out); + return err; } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index f55f9f94b1a4..198637b41e19 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -1031,7 +1031,9 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, u64 attr_valid, u64 attr_version); void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, - u64 attr_valid); + u64 attr_valid, u32 cache_mask); + +u32 fuse_get_cache_mask(struct inode *inode); /** * Initialize the client device @@ -1065,7 +1067,15 @@ void fuse_wait_aborted(struct fuse_conn *fc); /** * Invalidate inode attributes */ + +/* Attributes possibly changed on data modification */ +#define FUSE_STATX_MODIFY (STATX_MTIME | STATX_CTIME | STATX_BLOCKS) + +/* Attributes possibly changed on data and/or size modification */ +#define FUSE_STATX_MODSIZE (FUSE_STATX_MODIFY | STATX_SIZE) + void fuse_invalidate_attr(struct inode *inode); +void fuse_invalidate_attr_mask(struct inode *inode, u32 mask); void fuse_invalidate_entry_cache(struct dentry *entry); @@ -1148,9 +1158,10 @@ int fuse_allow_current_process(struct fuse_conn *fc); u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id); +void fuse_flush_time_update(struct inode *inode); void fuse_update_ctime(struct inode *inode); -int fuse_update_attributes(struct inode *inode, struct file *file); +int fuse_update_attributes(struct inode *inode, struct file *file, u32 mask); void fuse_flush_writepages(struct inode *inode); @@ -1208,7 +1219,7 @@ long fuse_ioctl_common(struct file *file, unsigned int cmd, __poll_t fuse_file_poll(struct file *file, poll_table *wait); int fuse_dev_release(struct inode *inode, struct file *file); -bool fuse_write_update_size(struct inode *inode, loff_t pos); +bool fuse_write_update_attr(struct inode *inode, loff_t pos, ssize_t written); int fuse_flush_times(struct inode *inode, struct fuse_file *ff); int fuse_write_inode(struct inode *inode, struct writeback_control *wbc); diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 12d49a1914e8..8b89e3ba7df3 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -118,6 +118,9 @@ static void fuse_evict_inode(struct inode *inode) { struct fuse_inode *fi = get_fuse_inode(inode); + /* Will write inode on close/munmap and in all other dirtiers */ + WARN_ON(inode->i_state & I_DIRTY_INODE); + truncate_inode_pages_final(&inode->i_data); clear_inode(inode); if (inode->i_sb->s_flags & SB_ACTIVE) { @@ -161,7 +164,7 @@ static ino_t fuse_squash_ino(u64 ino64) } void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, - u64 attr_valid) + u64 attr_valid, u32 cache_mask) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); @@ -181,9 +184,11 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, inode->i_atime.tv_sec = attr->atime; inode->i_atime.tv_nsec = attr->atimensec; /* mtime from server may be stale due to local buffered write */ - if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) { + if (!(cache_mask & STATX_MTIME)) { inode->i_mtime.tv_sec = attr->mtime; inode->i_mtime.tv_nsec = attr->mtimensec; + } + if (!(cache_mask & STATX_CTIME)) { inode->i_ctime.tv_sec = attr->ctime; inode->i_ctime.tv_nsec = attr->ctimensec; } @@ -215,16 +220,44 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, inode->i_flags &= ~S_NOSEC; } +u32 fuse_get_cache_mask(struct inode *inode) +{ + struct fuse_conn *fc = get_fuse_conn(inode); + + if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) + return 0; + + return STATX_MTIME | STATX_CTIME | STATX_SIZE; +} + void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, u64 attr_valid, u64 attr_version) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); - bool is_wb = fc->writeback_cache; + u32 cache_mask; loff_t oldsize; struct timespec64 old_mtime; spin_lock(&fi->lock); + /* + * In case of writeback_cache enabled, writes update mtime, ctime and + * may update i_size. In these cases trust the cached value in the + * inode. + */ + cache_mask = fuse_get_cache_mask(inode); + if (cache_mask & STATX_SIZE) + attr->size = i_size_read(inode); + + if (cache_mask & STATX_MTIME) { + attr->mtime = inode->i_mtime.tv_sec; + attr->mtimensec = inode->i_mtime.tv_nsec; + } + if (cache_mask & STATX_CTIME) { + attr->ctime = inode->i_ctime.tv_sec; + attr->ctimensec = inode->i_ctime.tv_nsec; + } + if ((attr_version != 0 && fi->attr_version > attr_version) || test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) { spin_unlock(&fi->lock); @@ -232,7 +265,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, } old_mtime = inode->i_mtime; - fuse_change_attributes_common(inode, attr, attr_valid); + fuse_change_attributes_common(inode, attr, attr_valid, cache_mask); oldsize = inode->i_size; /* @@ -240,11 +273,11 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, * extend local i_size without keeping userspace server in sync. So, * attr->size coming from server can be stale. We cannot trust it. */ - if (!is_wb || !S_ISREG(inode->i_mode)) + if (!(cache_mask & STATX_SIZE)) i_size_write(inode, attr->size); spin_unlock(&fi->lock); - if (!is_wb && S_ISREG(inode->i_mode)) { + if (!cache_mask && S_ISREG(inode->i_mode)) { bool inval = false; if (oldsize != attr->size) { diff --git a/fs/fuse/ioctl.c b/fs/fuse/ioctl.c index 546ea3d58fb4..fbc09dab1f85 100644 --- a/fs/fuse/ioctl.c +++ b/fs/fuse/ioctl.c @@ -286,11 +286,11 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV) goto out; - vaddr = kmap_atomic(ap.pages[0]); + vaddr = kmap_local_page(ap.pages[0]); err = fuse_copy_ioctl_iovec(fm->fc, iov_page, vaddr, transferred, in_iovs + out_iovs, (flags & FUSE_IOCTL_COMPAT) != 0); - kunmap_atomic(vaddr); + kunmap_local(vaddr); if (err) goto out; diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c index bc267832310c..b4e565711045 100644 --- a/fs/fuse/readdir.c +++ b/fs/fuse/readdir.c @@ -76,11 +76,11 @@ static void fuse_add_dirent_to_cache(struct file *file, WARN_ON(fi->rdc.pos != pos)) goto unlock; - addr = kmap_atomic(page); + addr = kmap_local_page(page); if (!offset) clear_page(addr); memcpy(addr + offset, dirent, reclen); - kunmap_atomic(addr); + kunmap_local(addr); fi->rdc.size = (index << PAGE_SHIFT) + offset + reclen; fi->rdc.pos = dirent->off; unlock: @@ -454,7 +454,7 @@ static int fuse_readdir_cached(struct file *file, struct dir_context *ctx) * cache; both cases require an up-to-date mtime value. */ if (!ctx->pos && fc->auto_inval_data) { - int err = fuse_update_attributes(inode, file); + int err = fuse_update_attributes(inode, file, STATX_MTIME); if (err) return err; diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 94fc874f5de7..4cfa4bc1f579 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -649,7 +649,7 @@ static void virtio_fs_vq_done(struct virtqueue *vq) static void virtio_fs_init_vq(struct virtio_fs_vq *fsvq, char *name, int vq_type) { - strncpy(fsvq->name, name, VQ_NAME_LEN); + strscpy(fsvq->name, name, VQ_NAME_LEN); spin_lock_init(&fsvq->lock); INIT_LIST_HEAD(&fsvq->queued_reqs); INIT_LIST_HEAD(&fsvq->end_reqs); diff --git a/fs/fuse/xattr.c b/fs/fuse/xattr.c index 61dfaf7b7d20..0d3e7177fce0 100644 --- a/fs/fuse/xattr.c +++ b/fs/fuse/xattr.c @@ -42,10 +42,9 @@ int fuse_setxattr(struct inode *inode, const char *name, const void *value, fm->fc->no_setxattr = 1; err = -EOPNOTSUPP; } - if (!err) { - fuse_invalidate_attr(inode); + if (!err) fuse_update_ctime(inode); - } + return err; } @@ -173,10 +172,9 @@ int fuse_removexattr(struct inode *inode, const char *name) fm->fc->no_removexattr = 1; err = -EOPNOTSUPP; } - if (!err) { - fuse_invalidate_attr(inode); + if (!err) fuse_update_ctime(inode); - } + return err; } diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 8f4627a19359..adafaaf7d24d 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -213,11 +213,9 @@ void gfs2_set_inode_flags(struct inode *inode) * @inode: The inode * @reqflags: The flags to set * @mask: Indicates which flags are valid - * @fsflags: The FS_* inode flags passed in * */ -static int do_gfs2_set_flags(struct inode *inode, u32 reqflags, u32 mask, - const u32 fsflags) +static int do_gfs2_set_flags(struct inode *inode, u32 reqflags, u32 mask) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); @@ -236,11 +234,6 @@ static int do_gfs2_set_flags(struct inode *inode, u32 reqflags, u32 mask, if ((new_flags ^ flags) == 0) goto out; - error = -EPERM; - if (IS_IMMUTABLE(inode) && (new_flags & GFS2_DIF_IMMUTABLE)) - goto out; - if (IS_APPEND(inode) && (new_flags & GFS2_DIF_APPENDONLY)) - goto out; if (!IS_IMMUTABLE(inode)) { error = gfs2_permission(&init_user_ns, inode, MAY_WRITE); if (error) @@ -313,7 +306,7 @@ int gfs2_fileattr_set(struct user_namespace *mnt_userns, mask &= ~(GFS2_DIF_TOPDIR | GFS2_DIF_INHERIT_JDATA); } - return do_gfs2_set_flags(inode, gfsflags, mask, fsflags); + return do_gfs2_set_flags(inode, gfsflags, mask); } static int gfs2_getlabel(struct file *filp, char __user *label) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index f686083d0250..19f38aee1b61 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -301,6 +301,9 @@ void gfs2_glock_queue_put(struct gfs2_glock *gl) void gfs2_glock_put(struct gfs2_glock *gl) { + /* last put could call sleepable dlm api */ + might_sleep(); + if (lockref_put_or_lock(&gl->gl_lockref)) return; @@ -472,6 +475,51 @@ find_first_strong_holder(struct gfs2_glock *gl) return NULL; } +/* + * gfs2_instantiate - Call the glops instantiate function + * @gl: The glock + * + * Returns: 0 if instantiate was successful, 2 if type specific operation is + * underway, or error. + */ +int gfs2_instantiate(struct gfs2_holder *gh) +{ + struct gfs2_glock *gl = gh->gh_gl; + const struct gfs2_glock_operations *glops = gl->gl_ops; + int ret; + +again: + if (!test_bit(GLF_INSTANTIATE_NEEDED, &gl->gl_flags)) + return 0; + + /* + * Since we unlock the lockref lock, we set a flag to indicate + * instantiate is in progress. + */ + if (test_bit(GLF_INSTANTIATE_IN_PROG, &gl->gl_flags)) { + wait_on_bit(&gl->gl_flags, GLF_INSTANTIATE_IN_PROG, + TASK_UNINTERRUPTIBLE); + /* + * Here we just waited for a different instantiate to finish. + * But that may not have been successful, as when a process + * locks an inode glock _before_ it has an actual inode to + * instantiate into. So we check again. This process might + * have an inode to instantiate, so might be successful. + */ + goto again; + } + + set_bit(GLF_INSTANTIATE_IN_PROG, &gl->gl_flags); + + ret = glops->go_instantiate(gh); + if (!ret) + clear_bit(GLF_INSTANTIATE_NEEDED, &gl->gl_flags); + clear_bit(GLF_INSTANTIATE_IN_PROG, &gl->gl_flags); + smp_mb__after_atomic(); + wake_up_bit(&gl->gl_flags, GLF_INSTANTIATE_IN_PROG); + return ret; +} + /** * do_promote - promote as many requests as possible on the current queue * @gl: The glock @@ -484,56 +532,59 @@ static int do_promote(struct gfs2_glock *gl) __releases(&gl->gl_lockref.lock) __acquires(&gl->gl_lockref.lock) { - const struct gfs2_glock_operations *glops = gl->gl_ops; struct gfs2_holder *gh, *tmp, *first_gh; bool incompat_holders_demoted = false; + bool lock_released; int ret; restart: first_gh = find_first_strong_holder(gl); list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) { - if (!test_bit(HIF_WAIT, &gh->gh_iflags)) + lock_released = false; + if (test_bit(HIF_HOLDER, &gh->gh_iflags)) continue; - if (may_grant(gl, first_gh, gh)) { - if (!incompat_holders_demoted) { - demote_incompat_holders(gl, first_gh); - incompat_holders_demoted = true; - first_gh = gh; - } - if (gh->gh_list.prev == &gl->gl_holders && - glops->go_lock) { - spin_unlock(&gl->gl_lockref.lock); - /* FIXME: eliminate this eventually */ - ret = glops->go_lock(gh); - spin_lock(&gl->gl_lockref.lock); - if (ret) { - if (ret == 1) - return 2; - gh->gh_error = ret; - list_del_init(&gh->gh_list); - trace_gfs2_glock_queue(gh, 0); - gfs2_holder_wake(gh); - goto restart; - } - set_bit(HIF_HOLDER, &gh->gh_iflags); - trace_gfs2_promote(gh, 1); + if (!may_grant(gl, first_gh, gh)) { + /* + * If we get here, it means we may not grant this holder for + * some reason. If this holder is the head of the list, it + * means we have a blocked holder at the head, so return 1. + */ + if (gh->gh_list.prev == &gl->gl_holders) + return 1; + do_error(gl, 0); + break; + } + if (!incompat_holders_demoted) { + demote_incompat_holders(gl, first_gh); + incompat_holders_demoted = true; + first_gh = gh; + } + if (test_bit(GLF_INSTANTIATE_NEEDED, &gl->gl_flags) && + !(gh->gh_flags & GL_SKIP) && gl->gl_ops->go_instantiate) { + lock_released = true; + spin_unlock(&gl->gl_lockref.lock); + ret = gfs2_instantiate(gh); + spin_lock(&gl->gl_lockref.lock); + if (ret) { + if (ret == 1) + return 2; + gh->gh_error = ret; + list_del_init(&gh->gh_list); + trace_gfs2_glock_queue(gh, 0); gfs2_holder_wake(gh); goto restart; } - set_bit(HIF_HOLDER, &gh->gh_iflags); - trace_gfs2_promote(gh, 0); - gfs2_holder_wake(gh); - continue; } + set_bit(HIF_HOLDER, &gh->gh_iflags); + trace_gfs2_promote(gh); + gfs2_holder_wake(gh); /* - * If we get here, it means we may not grant this holder for - * some reason. If this holder is the head of the list, it - * means we have a blocked holder at the head, so return 1. + * If we released the gl_lockref.lock the holders list may have + * changed. For that reason, we start again at the start of + * the holders queue. */ - if (gh->gh_list.prev == &gl->gl_holders) - return 1; - do_error(gl, 0); - break; + if (lock_released) + goto restart; } return 0; } @@ -909,7 +960,7 @@ static void gfs2_glock_poke(struct gfs2_glock *gl) struct gfs2_holder gh; int error; - gfs2_holder_init(gl, LM_ST_SHARED, flags, &gh); + __gfs2_holder_init(gl, LM_ST_SHARED, flags, &gh, _RET_IP_); error = gfs2_glock_nq(&gh); if (!error) gfs2_glock_dq(&gh); @@ -1144,7 +1195,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, atomic_inc(&sdp->sd_glock_disposal); gl->gl_node.next = NULL; - gl->gl_flags = 0; + gl->gl_flags = glops->go_instantiate ? BIT(GLF_INSTANTIATE_NEEDED) : 0; gl->gl_name = name; lockdep_set_subclass(&gl->gl_lockref.lock, glops->go_subclass); gl->gl_lockref.count = 1; @@ -1206,12 +1257,12 @@ out: * */ -void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, u16 flags, - struct gfs2_holder *gh) +void __gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, u16 flags, + struct gfs2_holder *gh, unsigned long ip) { INIT_LIST_HEAD(&gh->gh_list); gh->gh_gl = gl; - gh->gh_ip = _RET_IP_; + gh->gh_ip = ip; gh->gh_owner_pid = get_pid(task_pid(current)); gh->gh_state = state; gh->gh_flags = flags; @@ -2053,10 +2104,10 @@ static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp) do { rhashtable_walk_start(&iter); - while ((gl = rhashtable_walk_next(&iter)) && !IS_ERR(gl)) - if (gl->gl_name.ln_sbd == sdp && - lockref_get_not_dead(&gl->gl_lockref)) + while ((gl = rhashtable_walk_next(&iter)) && !IS_ERR(gl)) { + if (gl->gl_name.ln_sbd == sdp) examiner(gl); + } rhashtable_walk_stop(&iter); } while (cond_resched(), gl == ERR_PTR(-EAGAIN)); @@ -2079,7 +2130,7 @@ bool gfs2_queue_delete_work(struct gfs2_glock *gl, unsigned long delay) void gfs2_cancel_delete_work(struct gfs2_glock *gl) { - if (cancel_delayed_work_sync(&gl->gl_delete)) { + if (cancel_delayed_work(&gl->gl_delete)) { clear_bit(GLF_PENDING_DELETE, &gl->gl_flags); gfs2_glock_put(gl); } @@ -2098,7 +2149,6 @@ static void flush_delete_work(struct gfs2_glock *gl) &gl->gl_delete, 0); } } - gfs2_glock_queue_work(gl, 0); } void gfs2_flush_delete_work(struct gfs2_sbd *sdp) @@ -2115,10 +2165,10 @@ void gfs2_flush_delete_work(struct gfs2_sbd *sdp) static void thaw_glock(struct gfs2_glock *gl) { - if (!test_and_clear_bit(GLF_FROZEN, &gl->gl_flags)) { - gfs2_glock_put(gl); + if (!test_and_clear_bit(GLF_FROZEN, &gl->gl_flags)) + return; + if (!lockref_get_not_dead(&gl->gl_lockref)) return; - } set_bit(GLF_REPLY_PENDING, &gl->gl_flags); gfs2_glock_queue_work(gl, 0); } @@ -2134,9 +2184,12 @@ static void clear_glock(struct gfs2_glock *gl) gfs2_glock_remove_from_lru(gl); spin_lock(&gl->gl_lockref.lock); - if (gl->gl_state != LM_ST_UNLOCKED) - handle_callback(gl, LM_ST_UNLOCKED, 0, false); - __gfs2_glock_queue_work(gl, 0); + if (!__lockref_is_dead(&gl->gl_lockref)) { + gl->gl_lockref.count++; + if (gl->gl_state != LM_ST_UNLOCKED) + handle_callback(gl, LM_ST_UNLOCKED, 0, false); + __gfs2_glock_queue_work(gl, 0); + } spin_unlock(&gl->gl_lockref.lock); } @@ -2238,6 +2291,8 @@ static const char *hflags2str(char *buf, u16 flags, unsigned long iflags) *p++ = 'W'; if (test_bit(HIF_MAY_DEMOTE, &iflags)) *p++ = 'D'; + if (flags & GL_SKIP) + *p++ = 's'; *p = 0; return buf; } @@ -2306,6 +2361,10 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl) *p++ = 'P'; if (test_bit(GLF_FREEING, gflags)) *p++ = 'x'; + if (test_bit(GLF_INSTANTIATE_NEEDED, gflags)) + *p++ = 'n'; + if (test_bit(GLF_INSTANTIATE_IN_PROG, gflags)) + *p++ = 'N'; *p = 0; return buf; } diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 9012487da4c6..4f8642301801 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -190,13 +190,21 @@ extern int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, extern void gfs2_glock_hold(struct gfs2_glock *gl); extern void gfs2_glock_put(struct gfs2_glock *gl); extern void gfs2_glock_queue_put(struct gfs2_glock *gl); -extern void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, - u16 flags, struct gfs2_holder *gh); + +extern void __gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, + u16 flags, struct gfs2_holder *gh, + unsigned long ip); +static inline void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, + u16 flags, struct gfs2_holder *gh) { + __gfs2_holder_init(gl, state, flags, gh, _RET_IP_); +} + extern void gfs2_holder_reinit(unsigned int state, u16 flags, struct gfs2_holder *gh); extern void gfs2_holder_uninit(struct gfs2_holder *gh); extern int gfs2_glock_nq(struct gfs2_holder *gh); extern int gfs2_glock_poll(struct gfs2_holder *gh); +extern int gfs2_instantiate(struct gfs2_holder *gh); extern int gfs2_glock_wait(struct gfs2_holder *gh); extern int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs); extern void gfs2_glock_dq(struct gfs2_holder *gh); @@ -241,7 +249,7 @@ static inline int gfs2_glock_nq_init(struct gfs2_glock *gl, { int error; - gfs2_holder_init(gl, state, flags, gh); + __gfs2_holder_init(gl, state, flags, gh, _RET_IP_); error = gfs2_glock_nq(gh); if (error) diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 79c621c7863d..650ad77c4d0b 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -228,7 +228,7 @@ static void rgrp_go_inval(struct gfs2_glock *gl, int flags) gfs2_rgrp_brelse(rgd); WARN_ON_ONCE(!(flags & DIO_METADATA)); truncate_inode_pages_range(mapping, start, end); - rgd->rd_flags &= ~GFS2_RDF_UPTODATE; + set_bit(GLF_INSTANTIATE_NEEDED, &gl->gl_flags); } static void gfs2_rgrp_go_dump(struct seq_file *seq, struct gfs2_glock *gl, @@ -356,7 +356,7 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags) struct address_space *mapping = gfs2_glock2aspace(gl); truncate_inode_pages(mapping, 0); if (ip) { - set_bit(GIF_INVALID, &ip->i_flags); + set_bit(GLF_INSTANTIATE_NEEDED, &gl->gl_flags); forget_all_cached_acls(&ip->i_inode); security_inode_invalidate_secctx(&ip->i_inode); gfs2_dir_hash_inval(ip); @@ -476,33 +476,29 @@ int gfs2_inode_refresh(struct gfs2_inode *ip) error = gfs2_dinode_in(ip, dibh->b_data); brelse(dibh); - clear_bit(GIF_INVALID, &ip->i_flags); - return error; } /** - * inode_go_lock - operation done after an inode lock is locked by a process + * inode_go_instantiate - read in an inode if necessary * @gh: The glock holder * * Returns: errno */ -static int inode_go_lock(struct gfs2_holder *gh) +static int inode_go_instantiate(struct gfs2_holder *gh) { struct gfs2_glock *gl = gh->gh_gl; struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct gfs2_inode *ip = gl->gl_object; int error = 0; - if (!ip || (gh->gh_flags & GL_SKIP)) - return 0; + if (!ip) /* no inode to populate - read it in later */ + goto out; - if (test_bit(GIF_INVALID, &ip->i_flags)) { - error = gfs2_inode_refresh(ip); - if (error) - return error; - } + error = gfs2_inode_refresh(ip); + if (error) + goto out; if (gh->gh_state != LM_ST_DEFERRED) inode_dio_wait(&ip->i_inode); @@ -515,9 +511,10 @@ static int inode_go_lock(struct gfs2_holder *gh) list_add(&ip->i_trunc_list, &sdp->sd_trunc_list); spin_unlock(&sdp->sd_trunc_lock); wake_up(&sdp->sd_quota_wait); - return 1; + error = 1; } +out: return error; } @@ -740,7 +737,7 @@ const struct gfs2_glock_operations gfs2_inode_glops = { .go_sync = inode_go_sync, .go_inval = inode_go_inval, .go_demote_ok = inode_go_demote_ok, - .go_lock = inode_go_lock, + .go_instantiate = inode_go_instantiate, .go_dump = inode_go_dump, .go_type = LM_TYPE_INODE, .go_flags = GLOF_ASPACE | GLOF_LRU | GLOF_LVB, @@ -750,7 +747,7 @@ const struct gfs2_glock_operations gfs2_inode_glops = { const struct gfs2_glock_operations gfs2_rgrp_glops = { .go_sync = rgrp_go_sync, .go_inval = rgrp_go_inval, - .go_lock = gfs2_rgrp_go_lock, + .go_instantiate = gfs2_rgrp_go_instantiate, .go_dump = gfs2_rgrp_go_dump, .go_type = LM_TYPE_RGRP, .go_flags = GLOF_LVB, diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index ca42d310fd4d..8c00fb389ae5 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -119,7 +119,6 @@ struct gfs2_rgrpd { u32 rd_flags; u32 rd_extfail_pt; /* extent failure point */ #define GFS2_RDF_CHECK 0x10000000 /* check for unlinked inodes */ -#define GFS2_RDF_UPTODATE 0x20000000 /* rg is up to date */ #define GFS2_RDF_ERROR 0x40000000 /* error in rg */ #define GFS2_RDF_PREFERRED 0x80000000 /* This rgrp is preferred */ #define GFS2_RDF_MASK 0xf0000000 /* mask for internal flags */ @@ -220,7 +219,7 @@ struct gfs2_glock_operations { int (*go_xmote_bh)(struct gfs2_glock *gl); void (*go_inval) (struct gfs2_glock *gl, int flags); int (*go_demote_ok) (const struct gfs2_glock *gl); - int (*go_lock) (struct gfs2_holder *gh); + int (*go_instantiate) (struct gfs2_holder *gh); void (*go_dump)(struct seq_file *seq, struct gfs2_glock *gl, const char *fs_id_buf); void (*go_callback)(struct gfs2_glock *gl, bool remote); @@ -316,6 +315,7 @@ struct gfs2_alloc_parms { enum { GLF_LOCK = 1, + GLF_INSTANTIATE_NEEDED = 2, /* needs instantiate */ GLF_DEMOTE = 3, GLF_PENDING_DEMOTE = 4, GLF_DEMOTE_IN_PROGRESS = 5, @@ -325,6 +325,7 @@ enum { GLF_REPLY_PENDING = 9, GLF_INITIAL = 10, GLF_FROZEN = 11, + GLF_INSTANTIATE_IN_PROG = 12, /* instantiate happening now */ GLF_LRU = 13, GLF_OBJECT = 14, /* Used only for tracing */ GLF_BLOCKING = 15, @@ -371,7 +372,6 @@ struct gfs2_glock { }; enum { - GIF_INVALID = 0, GIF_QD_LOCKED = 1, GIF_ALLOC_FAILED = 2, GIF_SW_PAGED = 3, diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 3130f85d2b3f..6424b903e885 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -182,7 +182,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, } glock_set_object(ip->i_gl, ip); - set_bit(GIF_INVALID, &ip->i_flags); + set_bit(GLF_INSTANTIATE_NEEDED, &ip->i_gl->gl_flags); error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); if (unlikely(error)) goto fail; @@ -196,7 +196,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, if (type == DT_UNKNOWN) { /* Inode glock must be locked already */ - error = gfs2_inode_refresh(GFS2_I(inode)); + error = gfs2_instantiate(&i_gh); if (error) goto fail; } else { @@ -225,6 +225,10 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, return inode; fail: + if (gfs2_holder_initialized(&ip->i_iopen_gh)) { + glock_clear_object(ip->i_iopen_gh.gh_gl, ip); + gfs2_glock_dq_uninit(&ip->i_iopen_gh); + } if (io_gl) gfs2_glock_put(io_gl); if (gfs2_holder_initialized(&i_gh)) @@ -727,18 +731,17 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (error) goto fail_free_inode; flush_delayed_work(&ip->i_gl->gl_work); - glock_set_object(ip->i_gl, ip); error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_iopen_glops, CREATE, &io_gl); if (error) goto fail_free_inode; gfs2_cancel_delete_work(io_gl); - glock_set_object(io_gl, ip); error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1); if (error) goto fail_gunlock2; + glock_set_object(ip->i_gl, ip); error = gfs2_trans_begin(sdp, blocks, 0); if (error) goto fail_gunlock2; @@ -754,6 +757,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (error) goto fail_gunlock2; + glock_set_object(io_gl, ip); gfs2_set_iop(inode); insert_inode_hash(inode); diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index c3b00ba92ed2..0fb3c01bc557 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -932,7 +932,7 @@ static int read_rindex_entry(struct gfs2_inode *ip) goto fail; rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr; - rgd->rd_flags &= ~(GFS2_RDF_UPTODATE | GFS2_RDF_PREFERRED); + rgd->rd_flags &= ~GFS2_RDF_PREFERRED; if (rgd->rd_data > sdp->sd_max_rg_data) sdp->sd_max_rg_data = rgd->rd_data; spin_lock(&sdp->sd_rindex_spin); @@ -1185,8 +1185,8 @@ static void rgrp_set_bitmap_flags(struct gfs2_rgrpd *rgd) } /** - * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps - * @rgd: the struct gfs2_rgrpd describing the RG to read in + * gfs2_rgrp_go_instantiate - Read in a RG's header and bitmaps + * @gh: the glock holder representing the rgrpd to read in * * Read in all of a Resource Group's header and bitmap blocks. * Caller must eventually call gfs2_rgrp_brelse() to free the bitmaps. @@ -1194,10 +1194,11 @@ static void rgrp_set_bitmap_flags(struct gfs2_rgrpd *rgd) * Returns: errno */ -static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) +int gfs2_rgrp_go_instantiate(struct gfs2_holder *gh) { + struct gfs2_glock *gl = gh->gh_gl; + struct gfs2_rgrpd *rgd = gl->gl_object; struct gfs2_sbd *sdp = rgd->rd_sbd; - struct gfs2_glock *gl = rgd->rd_gl; unsigned int length = rgd->rd_length; struct gfs2_bitmap *bi; unsigned int x, y; @@ -1225,21 +1226,18 @@ static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) } } - if (!(rgd->rd_flags & GFS2_RDF_UPTODATE)) { - gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data); - rgrp_set_bitmap_flags(rgd); - rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); - rgd->rd_free_clone = rgd->rd_free; - BUG_ON(rgd->rd_reserved); - /* max out the rgrp allocation failure point */ - rgd->rd_extfail_pt = rgd->rd_free; - } + gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data); + rgrp_set_bitmap_flags(rgd); + rgd->rd_flags |= GFS2_RDF_CHECK; + rgd->rd_free_clone = rgd->rd_free; + GLOCK_BUG_ON(rgd->rd_gl, rgd->rd_reserved); + /* max out the rgrp allocation failure point */ + rgd->rd_extfail_pt = rgd->rd_free; if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) { rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd)); gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); - } - else if (sdp->sd_args.ar_rgrplvb) { + } else if (sdp->sd_args.ar_rgrplvb) { if (!gfs2_rgrp_lvb_valid(rgd)){ gfs2_consist_rgrpd(rgd); error = -EIO; @@ -1257,19 +1255,18 @@ fail: bi->bi_bh = NULL; gfs2_assert_warn(sdp, !bi->bi_clone); } - return error; } -static int update_rgrp_lvb(struct gfs2_rgrpd *rgd) +static int update_rgrp_lvb(struct gfs2_rgrpd *rgd, struct gfs2_holder *gh) { u32 rl_flags; - if (rgd->rd_flags & GFS2_RDF_UPTODATE) + if (!test_bit(GLF_INSTANTIATE_NEEDED, &gh->gh_gl->gl_flags)) return 0; if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) - return gfs2_rgrp_bh_get(rgd); + return gfs2_instantiate(gh); rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags); rl_flags &= ~GFS2_RDF_MASK; @@ -1280,7 +1277,7 @@ static int update_rgrp_lvb(struct gfs2_rgrpd *rgd) rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free); rgrp_set_bitmap_flags(rgd); rgd->rd_free_clone = rgd->rd_free; - BUG_ON(rgd->rd_reserved); + GLOCK_BUG_ON(rgd->rd_gl, rgd->rd_reserved); /* max out the rgrp allocation failure point */ rgd->rd_extfail_pt = rgd->rd_free; rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes); @@ -1288,16 +1285,6 @@ static int update_rgrp_lvb(struct gfs2_rgrpd *rgd) return 0; } -int gfs2_rgrp_go_lock(struct gfs2_holder *gh) -{ - struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object; - struct gfs2_sbd *sdp = rgd->rd_sbd; - - if (gh->gh_flags & GL_SKIP && sdp->sd_args.ar_rgrplvb) - return 0; - return gfs2_rgrp_bh_get(rgd); -} - /** * gfs2_rgrp_brelse - Release RG bitmaps read in with gfs2_rgrp_bh_get() * @rgd: The resource group @@ -1315,6 +1302,7 @@ void gfs2_rgrp_brelse(struct gfs2_rgrpd *rgd) bi->bi_bh = NULL; } } + set_bit(GLF_INSTANTIATE_NEEDED, &rgd->rd_gl->gl_flags); } int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, @@ -2113,7 +2101,8 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) gfs2_rgrp_congested(rs->rs_rgd, loops)) goto skip_rgrp; if (sdp->sd_args.ar_rgrplvb) { - error = update_rgrp_lvb(rs->rs_rgd); + error = update_rgrp_lvb(rs->rs_rgd, + &ip->i_rgd_gh); if (unlikely(error)) { rgrp_unlock_local(rs->rs_rgd); gfs2_glock_dq_uninit(&ip->i_rgd_gh); @@ -2128,8 +2117,11 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) (loops == 0 && target > rs->rs_rgd->rd_extfail_pt)) goto skip_rgrp; - if (sdp->sd_args.ar_rgrplvb) - gfs2_rgrp_bh_get(rs->rs_rgd); + if (sdp->sd_args.ar_rgrplvb) { + error = gfs2_instantiate(&ip->i_rgd_gh); + if (error) + goto skip_rgrp; + } /* Get a reservation if we don't already have one */ if (!gfs2_rs_active(rs)) @@ -2215,7 +2207,7 @@ void gfs2_inplace_release(struct gfs2_inode *ip) struct gfs2_rgrpd *rgd = rs->rs_rgd; spin_lock(&rgd->rd_rsspin); - BUG_ON(rgd->rd_reserved < rs->rs_reserved); + GLOCK_BUG_ON(rgd->rd_gl, rgd->rd_reserved < rs->rs_reserved); rgd->rd_reserved -= rs->rs_reserved; spin_unlock(&rgd->rd_rsspin); rs->rs_reserved = 0; @@ -2476,9 +2468,9 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, spin_unlock(&rbm.rgd->rd_rsspin); goto rgrp_error; } - BUG_ON(rbm.rgd->rd_reserved < *nblocks); - BUG_ON(rbm.rgd->rd_free_clone < *nblocks); - BUG_ON(rbm.rgd->rd_free < *nblocks); + GLOCK_BUG_ON(rbm.rgd->rd_gl, rbm.rgd->rd_reserved < *nblocks); + GLOCK_BUG_ON(rbm.rgd->rd_gl, rbm.rgd->rd_free_clone < *nblocks); + GLOCK_BUG_ON(rbm.rgd->rd_gl, rbm.rgd->rd_free < *nblocks); rbm.rgd->rd_reserved -= *nblocks; rbm.rgd->rd_free_clone -= *nblocks; rbm.rgd->rd_free -= *nblocks; @@ -2765,8 +2757,6 @@ void gfs2_rlist_free(struct gfs2_rgrp_list *rlist) void rgrp_lock_local(struct gfs2_rgrpd *rgd) { - BUG_ON(!gfs2_glock_is_held_excl(rgd->rd_gl) && - !test_bit(SDF_NORECOVERY, &rgd->rd_sbd->sd_flags)); mutex_lock(&rgd->rd_mutex); } diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index a6855fd796e0..3e2ca1fb4305 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -31,7 +31,7 @@ extern struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd); extern void gfs2_clear_rgrpd(struct gfs2_sbd *sdp); extern int gfs2_rindex_update(struct gfs2_sbd *sdp); extern void gfs2_free_clones(struct gfs2_rgrpd *rgd); -extern int gfs2_rgrp_go_lock(struct gfs2_holder *gh); +extern int gfs2_rgrp_go_instantiate(struct gfs2_holder *gh); extern void gfs2_rgrp_brelse(struct gfs2_rgrpd *rgd); extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 6e00d15ef0a8..5b121371508a 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1244,8 +1244,8 @@ static enum dinode_demise evict_should_delete(struct inode *inode, if (ret) return SHOULD_NOT_DELETE_DINODE; - if (test_bit(GIF_INVALID, &ip->i_flags)) { - ret = gfs2_inode_refresh(ip); + if (test_bit(GLF_INSTANTIATE_NEEDED, &ip->i_gl->gl_flags)) { + ret = gfs2_instantiate(gh); if (ret) return SHOULD_NOT_DELETE_DINODE; } diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h index bd6c8e9e49db..a5deb9f86831 100644 --- a/fs/gfs2/trace_gfs2.h +++ b/fs/gfs2/trace_gfs2.h @@ -197,15 +197,14 @@ TRACE_EVENT(gfs2_demote_rq, /* Promotion/grant of a glock */ TRACE_EVENT(gfs2_promote, - TP_PROTO(const struct gfs2_holder *gh, int first), + TP_PROTO(const struct gfs2_holder *gh), - TP_ARGS(gh, first), + TP_ARGS(gh), TP_STRUCT__entry( __field( dev_t, dev ) __field( u64, glnum ) __field( u32, gltype ) - __field( int, first ) __field( u8, state ) ), @@ -213,14 +212,12 @@ TRACE_EVENT(gfs2_promote, __entry->dev = gh->gh_gl->gl_name.ln_sbd->sd_vfs->s_dev; __entry->glnum = gh->gh_gl->gl_name.ln_number; __entry->gltype = gh->gh_gl->gl_name.ln_type; - __entry->first = first; __entry->state = glock_trace_state(gh->gh_state); ), - TP_printk("%u,%u glock %u:%llu promote %s %s", + TP_printk("%u,%u glock %u:%llu promote %s", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype, (unsigned long long)__entry->glnum, - __entry->first ? "first": "other", glock_trace_name(__entry->state)) ); diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index cf345a86ef67..8241029a2a5d 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -454,6 +454,7 @@ void gfs2_consist_inode_i(struct gfs2_inode *ip, (unsigned long long)ip->i_no_formal_ino, (unsigned long long)ip->i_no_addr, function, file, line); + gfs2_dump_glock(NULL, ip->i_gl, 1); gfs2_withdraw(sdp); } @@ -475,6 +476,7 @@ void gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, " function = %s, file = %s, line = %u\n", (unsigned long long)rgd->rd_addr, function, file, line); + gfs2_dump_glock(NULL, rgd->rd_gl, 1); gfs2_withdraw(sdp); } diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 4a95a92546a0..2a5143246282 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -462,8 +462,7 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc) goto out; if (S_ISDIR(main_inode->i_mode)) { - if (fd.entrylength < sizeof(struct hfs_cat_dir)) - /* panic? */; + WARN_ON(fd.entrylength < sizeof(struct hfs_cat_dir)); hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, sizeof(struct hfs_cat_dir)); if (rec.type != HFS_CDR_DIR || @@ -483,8 +482,7 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc) hfs_bnode_write(fd.bnode, &rec, fd.entryoffset, sizeof(struct hfs_cat_file)); } else { - if (fd.entrylength < sizeof(struct hfs_cat_file)) - /* panic? */; + WARN_ON(fd.entrylength < sizeof(struct hfs_cat_file)); hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, sizeof(struct hfs_cat_file)); if (rec.type != HFS_CDR_FIL || diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 6fef67c2a9f0..d08a8d1d40a4 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -509,8 +509,7 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) if (type == HFSPLUS_FOLDER) { struct hfsplus_cat_folder *folder = &entry.folder; - if (fd->entrylength < sizeof(struct hfsplus_cat_folder)) - /* panic? */; + WARN_ON(fd->entrylength < sizeof(struct hfsplus_cat_folder)); hfs_bnode_read(fd->bnode, &entry, fd->entryoffset, sizeof(struct hfsplus_cat_folder)); hfsplus_get_perms(inode, &folder->permissions, 1); @@ -530,8 +529,7 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) } else if (type == HFSPLUS_FILE) { struct hfsplus_cat_file *file = &entry.file; - if (fd->entrylength < sizeof(struct hfsplus_cat_file)) - /* panic? */; + WARN_ON(fd->entrylength < sizeof(struct hfsplus_cat_file)); hfs_bnode_read(fd->bnode, &entry, fd->entryoffset, sizeof(struct hfsplus_cat_file)); @@ -588,8 +586,7 @@ int hfsplus_cat_write_inode(struct inode *inode) if (S_ISDIR(main_inode->i_mode)) { struct hfsplus_cat_folder *folder = &entry.folder; - if (fd.entrylength < sizeof(struct hfsplus_cat_folder)) - /* panic? */; + WARN_ON(fd.entrylength < sizeof(struct hfsplus_cat_folder)); hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, sizeof(struct hfsplus_cat_folder)); /* simple node checks? */ @@ -614,8 +611,7 @@ int hfsplus_cat_write_inode(struct inode *inode) } else { struct hfsplus_cat_file *file = &entry.file; - if (fd.entrylength < sizeof(struct hfsplus_cat_file)) - /* panic? */; + WARN_ON(fd.entrylength < sizeof(struct hfsplus_cat_file)); hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, sizeof(struct hfsplus_cat_file)); hfsplus_inode_write_fork(inode, &file->data_fork); diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index cdfb1ae78a3f..49d2e686be74 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -1446,8 +1446,8 @@ static int get_hstate_idx(int page_size_log) * otherwise hugetlb_reserve_pages reserves one less hugepages than intended. */ struct file *hugetlb_file_setup(const char *name, size_t size, - vm_flags_t acctflag, struct ucounts **ucounts, - int creat_flags, int page_size_log) + vm_flags_t acctflag, int creat_flags, + int page_size_log) { struct inode *inode; struct vfsmount *mnt; @@ -1458,22 +1458,19 @@ struct file *hugetlb_file_setup(const char *name, size_t size, if (hstate_idx < 0) return ERR_PTR(-ENODEV); - *ucounts = NULL; mnt = hugetlbfs_vfsmount[hstate_idx]; if (!mnt) return ERR_PTR(-ENOENT); if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) { - *ucounts = current_ucounts(); - if (user_shm_lock(size, *ucounts)) { - task_lock(current); - pr_warn_once("%s (%d): Using mlock ulimits for SHM_HUGETLB is deprecated\n", + struct ucounts *ucounts = current_ucounts(); + + if (user_shm_lock(size, ucounts)) { + pr_warn_once("%s (%d): Using mlock ulimits for SHM_HUGETLB is obsolete\n", current->comm, current->pid); - task_unlock(current); - } else { - *ucounts = NULL; - return ERR_PTR(-EPERM); + user_shm_unlock(size, ucounts); } + return ERR_PTR(-EPERM); } file = ERR_PTR(-ENOSPC); @@ -1498,10 +1495,6 @@ struct file *hugetlb_file_setup(const char *name, size_t size, iput(inode); out: - if (*ucounts) { - user_shm_unlock(size, *ucounts); - *ucounts = NULL; - } return file; } diff --git a/fs/inode.c b/fs/inode.c index 9abc88d7959c..3eba0940ffcf 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -428,11 +428,20 @@ void ihold(struct inode *inode) } EXPORT_SYMBOL(ihold); -static void inode_lru_list_add(struct inode *inode) +static void __inode_add_lru(struct inode *inode, bool rotate) { + if (inode->i_state & (I_DIRTY_ALL | I_SYNC | I_FREEING | I_WILL_FREE)) + return; + if (atomic_read(&inode->i_count)) + return; + if (!(inode->i_sb->s_flags & SB_ACTIVE)) + return; + if (!mapping_shrinkable(&inode->i_data)) + return; + if (list_lru_add(&inode->i_sb->s_inode_lru, &inode->i_lru)) this_cpu_inc(nr_unused); - else + else if (rotate) inode->i_state |= I_REFERENCED; } @@ -443,16 +452,11 @@ static void inode_lru_list_add(struct inode *inode) */ void inode_add_lru(struct inode *inode) { - if (!(inode->i_state & (I_DIRTY_ALL | I_SYNC | - I_FREEING | I_WILL_FREE)) && - !atomic_read(&inode->i_count) && inode->i_sb->s_flags & SB_ACTIVE) - inode_lru_list_add(inode); + __inode_add_lru(inode, false); } - static void inode_lru_list_del(struct inode *inode) { - if (list_lru_del(&inode->i_sb->s_inode_lru, &inode->i_lru)) this_cpu_dec(nr_unused); } @@ -728,10 +732,6 @@ again: /* * Isolate the inode from the LRU in preparation for freeing it. * - * Any inodes which are pinned purely because of attached pagecache have their - * pagecache removed. If the inode has metadata buffers attached to - * mapping->private_list then try to remove them. - * * If the inode has the I_REFERENCED flag set, then it means that it has been * used recently - the flag is set in iput_final(). When we encounter such an * inode, clear the flag and move it to the back of the LRU so it gets another @@ -747,31 +747,39 @@ static enum lru_status inode_lru_isolate(struct list_head *item, struct inode *inode = container_of(item, struct inode, i_lru); /* - * we are inverting the lru lock/inode->i_lock here, so use a trylock. - * If we fail to get the lock, just skip it. + * We are inverting the lru lock/inode->i_lock here, so use a + * trylock. If we fail to get the lock, just skip it. */ if (!spin_trylock(&inode->i_lock)) return LRU_SKIP; /* - * Referenced or dirty inodes are still in use. Give them another pass - * through the LRU as we canot reclaim them now. + * Inodes can get referenced, redirtied, or repopulated while + * they're already on the LRU, and this can make them + * unreclaimable for a while. Remove them lazily here; iput, + * sync, or the last page cache deletion will requeue them. */ if (atomic_read(&inode->i_count) || - (inode->i_state & ~I_REFERENCED)) { + (inode->i_state & ~I_REFERENCED) || + !mapping_shrinkable(&inode->i_data)) { list_lru_isolate(lru, &inode->i_lru); spin_unlock(&inode->i_lock); this_cpu_dec(nr_unused); return LRU_REMOVED; } - /* recently referenced inodes get one more pass */ + /* Recently referenced inodes get one more pass */ if (inode->i_state & I_REFERENCED) { inode->i_state &= ~I_REFERENCED; spin_unlock(&inode->i_lock); return LRU_ROTATE; } + /* + * On highmem systems, mapping_shrinkable() permits dropping + * page cache in order to free up struct inodes: lowmem might + * be under pressure before the cache inside the highmem zone. + */ if (inode_has_buffers(inode) || !mapping_empty(&inode->i_data)) { __iget(inode); spin_unlock(&inode->i_lock); @@ -1638,7 +1646,7 @@ static void iput_final(struct inode *inode) if (!drop && !(inode->i_state & I_DONTCACHE) && (sb->s_flags & SB_ACTIVE)) { - inode_add_lru(inode); + __inode_add_lru(inode, true); spin_unlock(&inode->i_lock); return; } diff --git a/fs/internal.h b/fs/internal.h index cdd83d4899bb..7979ff8d168c 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -138,7 +138,6 @@ extern int vfs_open(const struct path *, struct file *); * inode.c */ extern long prune_icache_sb(struct super_block *sb, struct shrink_control *sc); -extern void inode_add_lru(struct inode *inode); extern int dentry_needs_remove_privs(struct dentry *dentry); /* diff --git a/fs/io-wq.c b/fs/io-wq.c index c51691262208..afd955d53db9 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -1308,7 +1308,9 @@ int io_wq_cpu_affinity(struct io_wq *wq, cpumask_var_t mask) */ int io_wq_max_workers(struct io_wq *wq, int *new_count) { - int i, node, prev = 0; + int prev[IO_WQ_ACCT_NR]; + bool first_node = true; + int i, node; BUILD_BUG_ON((int) IO_WQ_ACCT_BOUND != (int) IO_WQ_BOUND); BUILD_BUG_ON((int) IO_WQ_ACCT_UNBOUND != (int) IO_WQ_UNBOUND); @@ -1319,6 +1321,9 @@ int io_wq_max_workers(struct io_wq *wq, int *new_count) new_count[i] = task_rlimit(current, RLIMIT_NPROC); } + for (i = 0; i < IO_WQ_ACCT_NR; i++) + prev[i] = 0; + rcu_read_lock(); for_each_node(node) { struct io_wqe *wqe = wq->wqes[node]; @@ -1327,14 +1332,19 @@ int io_wq_max_workers(struct io_wq *wq, int *new_count) raw_spin_lock(&wqe->lock); for (i = 0; i < IO_WQ_ACCT_NR; i++) { acct = &wqe->acct[i]; - prev = max_t(int, acct->max_workers, prev); + if (first_node) + prev[i] = max_t(int, acct->max_workers, prev[i]); if (new_count[i]) acct->max_workers = new_count[i]; - new_count[i] = prev; } raw_spin_unlock(&wqe->lock); + first_node = false; } rcu_read_unlock(); + + for (i = 0; i < IO_WQ_ACCT_NR; i++) + new_count[i] = prev[i]; + return 0; } diff --git a/fs/io_uring.c b/fs/io_uring.c index 3ecd4b51510e..b07196b4511c 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -6950,10 +6950,6 @@ static void io_queue_sqe_arm_apoll(struct io_kiocb *req) switch (io_arm_poll_handler(req)) { case IO_APOLL_READY: - if (linked_timeout) { - io_queue_linked_timeout(linked_timeout); - linked_timeout = NULL; - } io_req_task_queue(req); break; case IO_APOLL_ABORTED: @@ -10144,7 +10140,7 @@ static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, for (i = 0; i < sq_entries; i++) { unsigned int entry = i + sq_head; unsigned int sq_idx = READ_ONCE(ctx->sq_array[entry & sq_mask]); - struct io_uring_sqe *sqe = &ctx->sq_sqes[sq_idx]; + struct io_uring_sqe *sqe; if (sq_idx > sq_mask) continue; @@ -10795,10 +10791,11 @@ static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx, BUILD_BUG_ON(sizeof(new_count) != sizeof(ctx->iowq_limits)); - memcpy(ctx->iowq_limits, new_count, sizeof(new_count)); + for (i = 0; i < ARRAY_SIZE(new_count); i++) + if (new_count[i]) + ctx->iowq_limits[i] = new_count[i]; ctx->iowq_limits_set = true; - ret = -EINVAL; if (tctx && tctx->io_wq) { ret = io_wq_max_workers(tctx->io_wq, new_count); if (ret) diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 678e2c51b855..0c6eacfcbeef 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -1322,6 +1322,8 @@ static int isofs_read_inode(struct inode *inode, int relocated) de = (struct iso_directory_record *) (bh->b_data + offset); de_len = *(unsigned char *) de; + if (de_len < sizeof(struct iso_directory_record)) + goto fail; if (offset + de_len > bufsize) { int frag1 = bufsize - offset; diff --git a/fs/jfs/jfs_mount.c b/fs/jfs/jfs_mount.c index 5d7d7170c03c..aa4ff7bcaff2 100644 --- a/fs/jfs/jfs_mount.c +++ b/fs/jfs/jfs_mount.c @@ -81,14 +81,14 @@ int jfs_mount(struct super_block *sb) * (initialize mount inode from the superblock) */ if ((rc = chkSuper(sb))) { - goto errout20; + goto out; } ipaimap = diReadSpecial(sb, AGGREGATE_I, 0); if (ipaimap == NULL) { jfs_err("jfs_mount: Failed to read AGGREGATE_I"); rc = -EIO; - goto errout20; + goto out; } sbi->ipaimap = ipaimap; @@ -99,7 +99,7 @@ int jfs_mount(struct super_block *sb) */ if ((rc = diMount(ipaimap))) { jfs_err("jfs_mount: diMount(ipaimap) failed w/rc = %d", rc); - goto errout21; + goto err_ipaimap; } /* @@ -108,7 +108,7 @@ int jfs_mount(struct super_block *sb) ipbmap = diReadSpecial(sb, BMAP_I, 0); if (ipbmap == NULL) { rc = -EIO; - goto errout22; + goto err_umount_ipaimap; } jfs_info("jfs_mount: ipbmap:0x%p", ipbmap); @@ -120,7 +120,7 @@ int jfs_mount(struct super_block *sb) */ if ((rc = dbMount(ipbmap))) { jfs_err("jfs_mount: dbMount failed w/rc = %d", rc); - goto errout22; + goto err_ipbmap; } /* @@ -139,7 +139,7 @@ int jfs_mount(struct super_block *sb) if (!ipaimap2) { jfs_err("jfs_mount: Failed to read AGGREGATE_I"); rc = -EIO; - goto errout35; + goto err_umount_ipbmap; } sbi->ipaimap2 = ipaimap2; @@ -151,7 +151,7 @@ int jfs_mount(struct super_block *sb) if ((rc = diMount(ipaimap2))) { jfs_err("jfs_mount: diMount(ipaimap2) failed, rc = %d", rc); - goto errout35; + goto err_ipaimap2; } } else /* Secondary aggregate inode table is not valid */ @@ -168,7 +168,7 @@ int jfs_mount(struct super_block *sb) jfs_err("jfs_mount: Failed to read FILESYSTEM_I"); /* open fileset secondary inode allocation map */ rc = -EIO; - goto errout40; + goto err_umount_ipaimap2; } jfs_info("jfs_mount: ipimap:0x%p", ipimap); @@ -178,41 +178,34 @@ int jfs_mount(struct super_block *sb) /* initialize fileset inode allocation map */ if ((rc = diMount(ipimap))) { jfs_err("jfs_mount: diMount failed w/rc = %d", rc); - goto errout41; + goto err_ipimap; } - goto out; + return rc; /* * unwind on error */ - errout41: /* close fileset inode allocation map inode */ +err_ipimap: + /* close fileset inode allocation map inode */ diFreeSpecial(ipimap); - - errout40: /* fileset closed */ - +err_umount_ipaimap2: /* close secondary aggregate inode allocation map */ - if (ipaimap2) { + if (ipaimap2) diUnmount(ipaimap2, 1); +err_ipaimap2: + /* close aggregate inodes */ + if (ipaimap2) diFreeSpecial(ipaimap2); - } - - errout35: - - /* close aggregate block allocation map */ +err_umount_ipbmap: /* close aggregate block allocation map */ dbUnmount(ipbmap, 1); +err_ipbmap: /* close aggregate inodes */ diFreeSpecial(ipbmap); - - errout22: /* close aggregate inode allocation map */ - +err_umount_ipaimap: /* close aggregate inode allocation map */ diUnmount(ipaimap, 1); - - errout21: /* close aggregate inodes */ +err_ipaimap: /* close aggregate inodes */ diFreeSpecial(ipaimap); - errout20: /* aggregate closed */ - - out: - +out: if (rc) jfs_err("Mount JFS Failure: %d", rc); diff --git a/fs/kernfs/symlink.c b/fs/kernfs/symlink.c index c8f8e41b8411..19a6c71c6ff5 100644 --- a/fs/kernfs/symlink.c +++ b/fs/kernfs/symlink.c @@ -36,8 +36,7 @@ struct kernfs_node *kernfs_create_link(struct kernfs_node *parent, gid = target->iattr->ia_gid; } - kn = kernfs_new_node(parent, name, S_IFLNK|S_IRWXUGO, uid, gid, - KERNFS_LINK); + kn = kernfs_new_node(parent, name, S_IFLNK|0777, uid, gid, KERNFS_LINK); if (!kn) return ERR_PTR(-ENOMEM); diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index b11f2afa84f1..99fffc9cb958 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -794,9 +794,6 @@ static void nlmclnt_cancel_callback(struct rpc_task *task, void *data) goto retry_cancel; } - dprintk("lockd: cancel status %u (task %u)\n", - status, task->tk_pid); - switch (status) { case NLM_LCK_GRANTED: case NLM_LCK_DENIED_GRACE_PERIOD: diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index b632be3ad57b..b220e1b91726 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -780,11 +780,9 @@ module_exit(exit_nlm); static int nlmsvc_dispatch(struct svc_rqst *rqstp, __be32 *statp) { const struct svc_procedure *procp = rqstp->rq_procinfo; - struct kvec *argv = rqstp->rq_arg.head; - struct kvec *resv = rqstp->rq_res.head; svcxdr_init_decode(rqstp); - if (!procp->pc_decode(rqstp, argv->iov_base)) + if (!procp->pc_decode(rqstp, &rqstp->rq_arg_stream)) goto out_decode_err; *statp = procp->pc_func(rqstp); @@ -794,7 +792,7 @@ static int nlmsvc_dispatch(struct svc_rqst *rqstp, __be32 *statp) return 1; svcxdr_init_encode(rqstp); - if (!procp->pc_encode(rqstp, resv->iov_base + resv->iov_len)) + if (!procp->pc_encode(rqstp, &rqstp->rq_res_stream)) goto out_encode_err; return 1; diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index e10ae2c41279..176b468a61c7 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -269,8 +269,6 @@ nlm4svc_proc_granted(struct svc_rqst *rqstp) */ static void nlm4svc_callback_exit(struct rpc_task *task, void *data) { - dprintk("lockd: %5u callback returned %d\n", task->tk_pid, - -task->tk_status); } static void nlm4svc_callback_release(void *data) diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index 99696d3f6dd6..4dc1b40a489a 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -301,8 +301,6 @@ nlmsvc_proc_granted(struct svc_rqst *rqstp) */ static void nlmsvc_callback_exit(struct rpc_task *task, void *data) { - dprintk("lockd: %5u callback returned %d\n", task->tk_pid, - -task->tk_status); } void nlmsvc_release_call(struct nlm_rqst *call) diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c index 9235e60b1769..2fb5748dae0c 100644 --- a/fs/lockd/xdr.c +++ b/fs/lockd/xdr.c @@ -145,137 +145,131 @@ svcxdr_encode_testrply(struct xdr_stream *xdr, const struct nlm_res *resp) * Decode Call arguments */ -int -nlmsvc_decode_void(struct svc_rqst *rqstp, __be32 *p) +bool +nlmsvc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - return 1; + return true; } -int -nlmsvc_decode_testargs(struct svc_rqst *rqstp, __be32 *p) +bool +nlmsvc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nlm_args *argp = rqstp->rq_argp; u32 exclusive; if (!svcxdr_decode_cookie(xdr, &argp->cookie)) - return 0; + return false; if (xdr_stream_decode_bool(xdr, &exclusive) < 0) - return 0; + return false; if (!svcxdr_decode_lock(xdr, &argp->lock)) - return 0; + return false; if (exclusive) argp->lock.fl.fl_type = F_WRLCK; - return 1; + return true; } -int -nlmsvc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p) +bool +nlmsvc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nlm_args *argp = rqstp->rq_argp; u32 exclusive; if (!svcxdr_decode_cookie(xdr, &argp->cookie)) - return 0; + return false; if (xdr_stream_decode_bool(xdr, &argp->block) < 0) - return 0; + return false; if (xdr_stream_decode_bool(xdr, &exclusive) < 0) - return 0; + return false; if (!svcxdr_decode_lock(xdr, &argp->lock)) - return 0; + return false; if (exclusive) argp->lock.fl.fl_type = F_WRLCK; if (xdr_stream_decode_bool(xdr, &argp->reclaim) < 0) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &argp->state) < 0) - return 0; + return false; argp->monitor = 1; /* monitor client by default */ - return 1; + return true; } -int -nlmsvc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p) +bool +nlmsvc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nlm_args *argp = rqstp->rq_argp; u32 exclusive; if (!svcxdr_decode_cookie(xdr, &argp->cookie)) - return 0; + return false; if (xdr_stream_decode_bool(xdr, &argp->block) < 0) - return 0; + return false; if (xdr_stream_decode_bool(xdr, &exclusive) < 0) - return 0; + return false; if (!svcxdr_decode_lock(xdr, &argp->lock)) - return 0; + return false; if (exclusive) argp->lock.fl.fl_type = F_WRLCK; - return 1; + return true; } -int -nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p) +bool +nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nlm_args *argp = rqstp->rq_argp; if (!svcxdr_decode_cookie(xdr, &argp->cookie)) - return 0; + return false; if (!svcxdr_decode_lock(xdr, &argp->lock)) - return 0; + return false; argp->lock.fl.fl_type = F_UNLCK; - return 1; + return true; } -int -nlmsvc_decode_res(struct svc_rqst *rqstp, __be32 *p) +bool +nlmsvc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nlm_res *resp = rqstp->rq_argp; if (!svcxdr_decode_cookie(xdr, &resp->cookie)) - return 0; + return false; if (!svcxdr_decode_stats(xdr, &resp->status)) - return 0; + return false; - return 1; + return true; } -int -nlmsvc_decode_reboot(struct svc_rqst *rqstp, __be32 *p) +bool +nlmsvc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nlm_reboot *argp = rqstp->rq_argp; + __be32 *p; u32 len; if (xdr_stream_decode_u32(xdr, &len) < 0) - return 0; + return false; if (len > SM_MAXSTRLEN) - return 0; + return false; p = xdr_inline_decode(xdr, len); if (!p) - return 0; + return false; argp->len = len; argp->mon = (char *)p; if (xdr_stream_decode_u32(xdr, &argp->state) < 0) - return 0; + return false; p = xdr_inline_decode(xdr, SM_PRIV_SIZE); if (!p) - return 0; + return false; memcpy(&argp->priv.data, p, sizeof(argp->priv.data)); - return 1; + return true; } -int -nlmsvc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p) +bool +nlmsvc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nlm_args *argp = rqstp->rq_argp; struct nlm_lock *lock = &argp->lock; @@ -284,35 +278,34 @@ nlmsvc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p) lock->svid = ~(u32)0; if (!svcxdr_decode_cookie(xdr, &argp->cookie)) - return 0; + return false; if (!svcxdr_decode_string(xdr, &lock->caller, &lock->len)) - return 0; + return false; if (!svcxdr_decode_fhandle(xdr, &lock->fh)) - return 0; + return false; if (!svcxdr_decode_owner(xdr, &lock->oh)) - return 0; + return false; /* XXX: Range checks are missing in the original code */ if (xdr_stream_decode_u32(xdr, &argp->fsm_mode) < 0) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &argp->fsm_access) < 0) - return 0; + return false; - return 1; + return true; } -int -nlmsvc_decode_notify(struct svc_rqst *rqstp, __be32 *p) +bool +nlmsvc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nlm_args *argp = rqstp->rq_argp; struct nlm_lock *lock = &argp->lock; if (!svcxdr_decode_string(xdr, &lock->caller, &lock->len)) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &argp->state) < 0) - return 0; + return false; - return 1; + return true; } @@ -320,45 +313,42 @@ nlmsvc_decode_notify(struct svc_rqst *rqstp, __be32 *p) * Encode Reply results */ -int -nlmsvc_encode_void(struct svc_rqst *rqstp, __be32 *p) +bool +nlmsvc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - return 1; + return true; } -int -nlmsvc_encode_testres(struct svc_rqst *rqstp, __be32 *p) +bool +nlmsvc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nlm_res *resp = rqstp->rq_resp; return svcxdr_encode_cookie(xdr, &resp->cookie) && svcxdr_encode_testrply(xdr, resp); } -int -nlmsvc_encode_res(struct svc_rqst *rqstp, __be32 *p) +bool +nlmsvc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nlm_res *resp = rqstp->rq_resp; return svcxdr_encode_cookie(xdr, &resp->cookie) && svcxdr_encode_stats(xdr, resp->status); } -int -nlmsvc_encode_shareres(struct svc_rqst *rqstp, __be32 *p) +bool +nlmsvc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nlm_res *resp = rqstp->rq_resp; if (!svcxdr_encode_cookie(xdr, &resp->cookie)) - return 0; + return false; if (!svcxdr_encode_stats(xdr, resp->status)) - return 0; + return false; /* sequence */ if (xdr_stream_encode_u32(xdr, 0) < 0) - return 0; + return false; - return 1; + return true; } diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c index 98e957e4566c..856267c0864b 100644 --- a/fs/lockd/xdr4.c +++ b/fs/lockd/xdr4.c @@ -144,136 +144,131 @@ svcxdr_encode_testrply(struct xdr_stream *xdr, const struct nlm_res *resp) * Decode Call arguments */ -int -nlm4svc_decode_void(struct svc_rqst *rqstp, __be32 *p) +bool +nlm4svc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - return 1; + return true; } -int -nlm4svc_decode_testargs(struct svc_rqst *rqstp, __be32 *p) +bool +nlm4svc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nlm_args *argp = rqstp->rq_argp; u32 exclusive; if (!svcxdr_decode_cookie(xdr, &argp->cookie)) - return 0; + return false; if (xdr_stream_decode_bool(xdr, &exclusive) < 0) - return 0; + return false; if (!svcxdr_decode_lock(xdr, &argp->lock)) - return 0; + return false; if (exclusive) argp->lock.fl.fl_type = F_WRLCK; - return 1; + return true; } -int -nlm4svc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p) +bool +nlm4svc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nlm_args *argp = rqstp->rq_argp; u32 exclusive; if (!svcxdr_decode_cookie(xdr, &argp->cookie)) - return 0; + return false; if (xdr_stream_decode_bool(xdr, &argp->block) < 0) - return 0; + return false; if (xdr_stream_decode_bool(xdr, &exclusive) < 0) - return 0; + return false; if (!svcxdr_decode_lock(xdr, &argp->lock)) - return 0; + return false; if (exclusive) argp->lock.fl.fl_type = F_WRLCK; if (xdr_stream_decode_bool(xdr, &argp->reclaim) < 0) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &argp->state) < 0) - return 0; + return false; argp->monitor = 1; /* monitor client by default */ - return 1; + return true; } -int -nlm4svc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p) +bool +nlm4svc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nlm_args *argp = rqstp->rq_argp; u32 exclusive; if (!svcxdr_decode_cookie(xdr, &argp->cookie)) - return 0; + return false; if (xdr_stream_decode_bool(xdr, &argp->block) < 0) - return 0; + return false; if (xdr_stream_decode_bool(xdr, &exclusive) < 0) - return 0; + return false; if (!svcxdr_decode_lock(xdr, &argp->lock)) - return 0; + return false; if (exclusive) argp->lock.fl.fl_type = F_WRLCK; - return 1; + + return true; } -int -nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p) +bool +nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nlm_args *argp = rqstp->rq_argp; if (!svcxdr_decode_cookie(xdr, &argp->cookie)) - return 0; + return false; if (!svcxdr_decode_lock(xdr, &argp->lock)) - return 0; + return false; argp->lock.fl.fl_type = F_UNLCK; - return 1; + return true; } -int -nlm4svc_decode_res(struct svc_rqst *rqstp, __be32 *p) +bool +nlm4svc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nlm_res *resp = rqstp->rq_argp; if (!svcxdr_decode_cookie(xdr, &resp->cookie)) - return 0; + return false; if (!svcxdr_decode_stats(xdr, &resp->status)) - return 0; + return false; - return 1; + return true; } -int -nlm4svc_decode_reboot(struct svc_rqst *rqstp, __be32 *p) +bool +nlm4svc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nlm_reboot *argp = rqstp->rq_argp; + __be32 *p; u32 len; if (xdr_stream_decode_u32(xdr, &len) < 0) - return 0; + return false; if (len > SM_MAXSTRLEN) - return 0; + return false; p = xdr_inline_decode(xdr, len); if (!p) - return 0; + return false; argp->len = len; argp->mon = (char *)p; if (xdr_stream_decode_u32(xdr, &argp->state) < 0) - return 0; + return false; p = xdr_inline_decode(xdr, SM_PRIV_SIZE); if (!p) - return 0; + return false; memcpy(&argp->priv.data, p, sizeof(argp->priv.data)); - return 1; + return true; } -int -nlm4svc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p) +bool +nlm4svc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nlm_args *argp = rqstp->rq_argp; struct nlm_lock *lock = &argp->lock; @@ -282,35 +277,34 @@ nlm4svc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p) lock->svid = ~(u32)0; if (!svcxdr_decode_cookie(xdr, &argp->cookie)) - return 0; + return false; if (!svcxdr_decode_string(xdr, &lock->caller, &lock->len)) - return 0; + return false; if (!svcxdr_decode_fhandle(xdr, &lock->fh)) - return 0; + return false; if (!svcxdr_decode_owner(xdr, &lock->oh)) - return 0; + return false; /* XXX: Range checks are missing in the original code */ if (xdr_stream_decode_u32(xdr, &argp->fsm_mode) < 0) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &argp->fsm_access) < 0) - return 0; + return false; - return 1; + return true; } -int -nlm4svc_decode_notify(struct svc_rqst *rqstp, __be32 *p) +bool +nlm4svc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nlm_args *argp = rqstp->rq_argp; struct nlm_lock *lock = &argp->lock; if (!svcxdr_decode_string(xdr, &lock->caller, &lock->len)) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &argp->state) < 0) - return 0; + return false; - return 1; + return true; } @@ -318,45 +312,42 @@ nlm4svc_decode_notify(struct svc_rqst *rqstp, __be32 *p) * Encode Reply results */ -int -nlm4svc_encode_void(struct svc_rqst *rqstp, __be32 *p) +bool +nlm4svc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - return 1; + return true; } -int -nlm4svc_encode_testres(struct svc_rqst *rqstp, __be32 *p) +bool +nlm4svc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nlm_res *resp = rqstp->rq_resp; return svcxdr_encode_cookie(xdr, &resp->cookie) && svcxdr_encode_testrply(xdr, resp); } -int -nlm4svc_encode_res(struct svc_rqst *rqstp, __be32 *p) +bool +nlm4svc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nlm_res *resp = rqstp->rq_resp; return svcxdr_encode_cookie(xdr, &resp->cookie) && svcxdr_encode_stats(xdr, resp->status); } -int -nlm4svc_encode_shareres(struct svc_rqst *rqstp, __be32 *p) +bool +nlm4svc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nlm_res *resp = rqstp->rq_resp; if (!svcxdr_encode_cookie(xdr, &resp->cookie)) - return 0; + return false; if (!svcxdr_encode_stats(xdr, resp->status)) - return 0; + return false; /* sequence */ if (xdr_stream_encode_u32(xdr, 0) < 0) - return 0; + return false; - return 1; + return true; } diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index ed9d580826f5..09c5b1cb3e07 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -739,6 +739,9 @@ out: kfree(copy); spin_unlock(&cps->clp->cl_lock); + trace_nfs4_cb_offload(&args->coa_fh, &args->coa_stateid, + args->wr_count, args->error, + args->wr_writeverf.committed); return 0; } #endif /* CONFIG_NFS_V4_2 */ diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 4c48d85f6517..a67c41ec545f 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -67,9 +67,9 @@ static __be32 nfs4_callback_null(struct svc_rqst *rqstp) * svc_process_common() looks for an XDR encoder to know when * not to drop a Reply. */ -static int nfs4_encode_void(struct svc_rqst *rqstp, __be32 *p) +static bool nfs4_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - return xdr_ressize_check(rqstp, p); + return true; } static __be32 decode_string(struct xdr_stream *xdr, unsigned int *len, diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 23e165d5ec9c..1e4dc1ab9312 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -828,7 +828,7 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, /* * Probe filesystem information, including the FSID on v2/v3 */ -int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs_fattr *fattr) +static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs_fattr *fattr) { struct nfs_fsinfo fsinfo; struct nfs_client *clp = server->nfs_client; @@ -862,7 +862,30 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs return 0; } -EXPORT_SYMBOL_GPL(nfs_probe_fsinfo); + +/* + * Grab the destination's particulars, including lease expiry time. + * + * Returns zero if probe succeeded and retrieved FSID matches the FSID + * we have cached. + */ +int nfs_probe_server(struct nfs_server *server, struct nfs_fh *mntfh) +{ + struct nfs_fattr *fattr; + int error; + + fattr = nfs_alloc_fattr(); + if (fattr == NULL) + return -ENOMEM; + + /* Sanity: the probe won't work if the destination server + * does not recognize the migrated FH. */ + error = nfs_probe_fsinfo(server, mntfh, fattr); + + nfs_free_fattr(fattr); + return error; +} +EXPORT_SYMBOL_GPL(nfs_probe_server); /* * Copy useful information when duplicating a server record @@ -1025,7 +1048,7 @@ struct nfs_server *nfs_create_server(struct fs_context *fc) if (!(fattr->valid & NFS_ATTR_FATTR)) { error = ctx->nfs_mod->rpc_ops->getattr(server, ctx->mntfh, - fattr, NULL, NULL); + fattr, NULL); if (error < 0) { dprintk("nfs_create_server: getattr error = %d\n", -error); goto error; @@ -1058,7 +1081,6 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source, rpc_authflavor_t flavor) { struct nfs_server *server; - struct nfs_fattr *fattr_fsinfo; int error; server = nfs_alloc_server(); @@ -1067,11 +1089,6 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source, server->cred = get_cred(source->cred); - error = -ENOMEM; - fattr_fsinfo = nfs_alloc_fattr(); - if (fattr_fsinfo == NULL) - goto out_free_server; - /* Copy data from the source */ server->nfs_client = source->nfs_client; server->destroy = source->destroy; @@ -1087,7 +1104,7 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source, goto out_free_server; /* probe the filesystem info for this server filesystem */ - error = nfs_probe_fsinfo(server, fh, fattr_fsinfo); + error = nfs_probe_server(server, fh); if (error < 0) goto out_free_server; @@ -1101,11 +1118,9 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source, nfs_server_insert_lists(server); server->mount_time = jiffies; - nfs_free_fattr(fattr_fsinfo); return server; out_free_server: - nfs_free_fattr(fattr_fsinfo); nfs_free_server(server); return ERR_PTR(error); } diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 11118398f495..7c9eb679dbdb 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -755,11 +755,13 @@ int nfs4_inode_return_delegation(struct inode *inode) struct nfs_delegation *delegation; delegation = nfs_start_delegation_return(nfsi); - /* Synchronous recall of any application leases */ - break_lease(inode, O_WRONLY | O_RDWR); - nfs_wb_all(inode); - if (delegation != NULL) + if (delegation != NULL) { + /* Synchronous recall of any application leases */ + break_lease(inode, O_WRONLY | O_RDWR); + if (S_ISREG(inode->i_mode)) + nfs_wb_all(inode); return nfs_end_delegation_return(inode, delegation, 1); + } return 0; } diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 1a6d2867fba4..731d31015b6a 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -78,6 +78,7 @@ static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir ctx->attr_gencount = nfsi->attr_gencount; ctx->dir_cookie = 0; ctx->dup_cookie = 0; + ctx->page_index = 0; spin_lock(&dir->i_lock); if (list_empty(&nfsi->open_files) && (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER)) @@ -85,6 +86,7 @@ static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir NFS_INO_INVALID_DATA | NFS_INO_REVAL_FORCED); list_add(&ctx->list, &nfsi->open_files); + clear_bit(NFS_INO_FORCE_READDIR, &nfsi->flags); spin_unlock(&dir->i_lock); return ctx; } @@ -411,7 +413,8 @@ out_eof: static bool nfs_readdir_inode_mapping_valid(struct nfs_inode *nfsi) { - if (nfsi->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA)) + if (nfsi->cache_validity & (NFS_INO_INVALID_CHANGE | + NFS_INO_INVALID_DATA)) return false; smp_rmb(); return !test_bit(NFS_INO_INVALIDATING, &nfsi->flags); @@ -626,8 +629,7 @@ void nfs_force_use_readdirplus(struct inode *dir) if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) && !list_empty(&nfsi->open_files)) { set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags); - invalidate_mapping_pages(dir->i_mapping, - nfsi->page_index + 1, -1); + set_bit(NFS_INO_FORCE_READDIR, &nfsi->flags); } } @@ -680,7 +682,7 @@ again: nfs_set_verifier(dentry, dir_verifier); status = nfs_refresh_inode(d_inode(dentry), entry->fattr); if (!status) - nfs_setsecurity(d_inode(dentry), entry->fattr, entry->label); + nfs_setsecurity(d_inode(dentry), entry->fattr); goto out; } else { d_invalidate(dentry); @@ -694,7 +696,7 @@ again: goto out; } - inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr, entry->label); + inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr); alias = d_splice_alias(inode, dentry); d_lookup_done(dentry); if (alias) { @@ -730,8 +732,8 @@ static int nfs_readdir_page_filler(struct nfs_readdir_descriptor *desc, xdr_set_scratch_page(&stream, scratch); do { - if (entry->label) - entry->label->len = NFS4_MAXLABELLEN; + if (entry->fattr->label) + entry->fattr->label->len = NFS4_MAXLABELLEN; status = xdr_decode(desc, entry, &stream); if (status != 0) @@ -836,21 +838,15 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc, return -ENOMEM; entry->cookie = nfs_readdir_page_last_cookie(page); entry->fh = nfs_alloc_fhandle(); - entry->fattr = nfs_alloc_fattr(); + entry->fattr = nfs_alloc_fattr_with_label(NFS_SERVER(inode)); entry->server = NFS_SERVER(inode); if (entry->fh == NULL || entry->fattr == NULL) goto out; - entry->label = nfs4_label_alloc(NFS_SERVER(inode), GFP_NOWAIT); - if (IS_ERR(entry->label)) { - status = PTR_ERR(entry->label); - goto out; - } - array_size = (dtsize + PAGE_SIZE - 1) >> PAGE_SHIFT; pages = nfs_readdir_alloc_pages(array_size); if (!pages) - goto out_release_label; + goto out; do { unsigned int pglen; @@ -873,8 +869,6 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc, } while (!status && nfs_readdir_page_needs_filling(page)); nfs_readdir_free_pages(pages, array_size); -out_release_label: - nfs4_label_free(entry->label); out: nfs_free_fattr(entry->fattr); nfs_free_fhandle(entry->fh); @@ -937,10 +931,8 @@ static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc) sizeof(nfsi->cookieverf)); } res = nfs_readdir_search_array(desc); - if (res == 0) { - nfsi->page_index = desc->page_index; + if (res == 0) return 0; - } nfs_readdir_page_unlock_and_put_cached(desc); return res; } @@ -1079,6 +1071,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) struct nfs_inode *nfsi = NFS_I(inode); struct nfs_open_dir_context *dir_ctx = file->private_data; struct nfs_readdir_descriptor *desc; + pgoff_t page_index; int res; dfprintk(FILE, "NFS: readdir(%pD2) starting at cookie %llu\n", @@ -1109,10 +1102,15 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) desc->dir_cookie = dir_ctx->dir_cookie; desc->dup_cookie = dir_ctx->dup_cookie; desc->duped = dir_ctx->duped; + page_index = dir_ctx->page_index; desc->attr_gencount = dir_ctx->attr_gencount; memcpy(desc->verf, dir_ctx->verf, sizeof(desc->verf)); spin_unlock(&file->f_lock); + if (test_and_clear_bit(NFS_INO_FORCE_READDIR, &nfsi->flags) && + list_is_singular(&nfsi->open_files)) + invalidate_mapping_pages(inode->i_mapping, page_index + 1, -1); + do { res = readdir_search_pagecache(desc); @@ -1149,6 +1147,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) dir_ctx->dup_cookie = desc->dup_cookie; dir_ctx->duped = desc->duped; dir_ctx->attr_gencount = desc->attr_gencount; + dir_ctx->page_index = desc->page_index; memcpy(dir_ctx->verf, desc->verf, sizeof(dir_ctx->verf)); spin_unlock(&file->f_lock); @@ -1269,13 +1268,12 @@ static bool nfs_verifier_is_delegated(struct dentry *dentry) static void nfs_set_verifier_locked(struct dentry *dentry, unsigned long verf) { struct inode *inode = d_inode(dentry); + struct inode *dir = d_inode(dentry->d_parent); - if (!nfs_verifier_is_delegated(dentry) && - !nfs_verify_change_attribute(d_inode(dentry->d_parent), verf)) - goto out; + if (!nfs_verify_change_attribute(dir, verf)) + return; if (inode && NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) nfs_set_verifier_delegated(&verf); -out: dentry->d_time = verf; } @@ -1413,7 +1411,7 @@ out_force: static void nfs_mark_dir_for_revalidate(struct inode *inode) { spin_lock(&inode->i_lock); - nfs_set_cache_invalid(inode, NFS_INO_REVAL_PAGECACHE); + nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE); spin_unlock(&inode->i_lock); } @@ -1495,19 +1493,17 @@ nfs_lookup_revalidate_dentry(struct inode *dir, struct dentry *dentry, { struct nfs_fh *fhandle; struct nfs_fattr *fattr; - struct nfs4_label *label; unsigned long dir_verifier; int ret; ret = -ENOMEM; fhandle = nfs_alloc_fhandle(); - fattr = nfs_alloc_fattr(); - label = nfs4_label_alloc(NFS_SERVER(inode), GFP_KERNEL); - if (fhandle == NULL || fattr == NULL || IS_ERR(label)) + fattr = nfs_alloc_fattr_with_label(NFS_SERVER(inode)); + if (fhandle == NULL || fattr == NULL) goto out; dir_verifier = nfs_save_change_attribute(dir); - ret = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr, label); + ret = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr); if (ret < 0) { switch (ret) { case -ESTALE: @@ -1526,7 +1522,7 @@ nfs_lookup_revalidate_dentry(struct inode *dir, struct dentry *dentry, if (nfs_refresh_inode(inode, fattr) < 0) goto out; - nfs_setsecurity(inode, fattr, label); + nfs_setsecurity(inode, fattr); nfs_set_verifier(dentry, dir_verifier); /* set a readdirplus hint that we had a cache miss */ @@ -1535,7 +1531,6 @@ nfs_lookup_revalidate_dentry(struct inode *dir, struct dentry *dentry, out: nfs_free_fattr(fattr); nfs_free_fhandle(fhandle); - nfs4_label_free(label); /* * If the lookup failed despite the dentry change attribute being @@ -1721,10 +1716,6 @@ static void nfs_drop_nlink(struct inode *inode) */ static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode) { - if (S_ISDIR(inode->i_mode)) - /* drop any readdir cache as it could easily be old */ - nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA); - if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { nfs_complete_unlink(dentry, inode); nfs_drop_nlink(inode); @@ -1759,7 +1750,6 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in struct inode *inode = NULL; struct nfs_fh *fhandle = NULL; struct nfs_fattr *fattr = NULL; - struct nfs4_label *label = NULL; unsigned long dir_verifier; int error; @@ -1778,27 +1768,23 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in res = ERR_PTR(-ENOMEM); fhandle = nfs_alloc_fhandle(); - fattr = nfs_alloc_fattr(); + fattr = nfs_alloc_fattr_with_label(NFS_SERVER(dir)); if (fhandle == NULL || fattr == NULL) goto out; - label = nfs4_label_alloc(NFS_SERVER(dir), GFP_NOWAIT); - if (IS_ERR(label)) - goto out; - dir_verifier = nfs_save_change_attribute(dir); trace_nfs_lookup_enter(dir, dentry, flags); - error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr, label); + error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr); if (error == -ENOENT) goto no_entry; if (error < 0) { res = ERR_PTR(error); - goto out_label; + goto out; } - inode = nfs_fhget(dentry->d_sb, fhandle, fattr, label); + inode = nfs_fhget(dentry->d_sb, fhandle, fattr); res = ERR_CAST(inode); if (IS_ERR(res)) - goto out_label; + goto out; /* Notify readdir to use READDIRPLUS */ nfs_force_use_readdirplus(dir); @@ -1807,14 +1793,12 @@ no_entry: res = d_splice_alias(inode, dentry); if (res != NULL) { if (IS_ERR(res)) - goto out_label; + goto out; dentry = res; } nfs_set_verifier(dentry, dir_verifier); -out_label: - trace_nfs_lookup_exit(dir, dentry, flags, error); - nfs4_label_free(label); out: + trace_nfs_lookup_exit(dir, dentry, flags, PTR_ERR_OR_ZERO(res)); nfs_free_fattr(fattr); nfs_free_fhandle(fhandle); return res; @@ -2051,8 +2035,7 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags) struct dentry * nfs_add_or_obtain(struct dentry *dentry, struct nfs_fh *fhandle, - struct nfs_fattr *fattr, - struct nfs4_label *label) + struct nfs_fattr *fattr) { struct dentry *parent = dget_parent(dentry); struct inode *dir = d_inode(parent); @@ -2063,7 +2046,7 @@ nfs_add_or_obtain(struct dentry *dentry, struct nfs_fh *fhandle, d_drop(dentry); if (fhandle->size == 0) { - error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr, NULL); + error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr); if (error) goto out_error; } @@ -2071,11 +2054,11 @@ nfs_add_or_obtain(struct dentry *dentry, struct nfs_fh *fhandle, if (!(fattr->valid & NFS_ATTR_FATTR)) { struct nfs_server *server = NFS_SB(dentry->d_sb); error = server->nfs_client->rpc_ops->getattr(server, fhandle, - fattr, NULL, NULL); + fattr, NULL); if (error < 0) goto out_error; } - inode = nfs_fhget(dentry->d_sb, fhandle, fattr, label); + inode = nfs_fhget(dentry->d_sb, fhandle, fattr); d = d_splice_alias(inode, dentry); out: dput(parent); @@ -2090,12 +2073,11 @@ EXPORT_SYMBOL_GPL(nfs_add_or_obtain); * Code common to create, mkdir, and mknod. */ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, - struct nfs_fattr *fattr, - struct nfs4_label *label) + struct nfs_fattr *fattr) { struct dentry *d; - d = nfs_add_or_obtain(dentry, fhandle, fattr, label); + d = nfs_add_or_obtain(dentry, fhandle, fattr); if (IS_ERR(d)) return PTR_ERR(d); @@ -2197,6 +2179,18 @@ static void nfs_dentry_handle_enoent(struct dentry *dentry) d_delete(dentry); } +static void nfs_dentry_remove_handle_error(struct inode *dir, + struct dentry *dentry, int error) +{ + switch (error) { + case -ENOENT: + d_delete(dentry); + fallthrough; + case 0: + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + } +} + int nfs_rmdir(struct inode *dir, struct dentry *dentry) { int error; @@ -2219,6 +2213,7 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry) up_write(&NFS_I(d_inode(dentry))->rmdir_sem); } else error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); + nfs_dentry_remove_handle_error(dir, dentry, error); trace_nfs_rmdir_exit(dir, dentry, error); return error; @@ -2288,9 +2283,8 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry) } spin_unlock(&dentry->d_lock); error = nfs_safe_remove(dentry); - if (!error || error == -ENOENT) { - nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); - } else if (need_rehash) + nfs_dentry_remove_handle_error(dir, dentry, error); + if (need_rehash) d_rehash(dentry); out: trace_nfs_unlink_exit(dir, dentry, error); @@ -2352,6 +2346,8 @@ int nfs_symlink(struct user_namespace *mnt_userns, struct inode *dir, return error; } + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + /* * No big deal if we can't add this page to the page cache here. * READLINK will get the missing page from the server if needed. @@ -2385,6 +2381,7 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) d_drop(dentry); error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name); if (error == 0) { + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); ihold(inode); d_add(dentry, inode); } diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 7a5f287c5391..9cff8709c80a 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -620,7 +620,7 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) nfs_unlock_and_release_request(req); } - if (atomic_dec_and_test(&cinfo.mds->rpcs_out)) + if (nfs_commit_end(cinfo.mds)) nfs_direct_write_complete(dreq); } diff --git a/fs/nfs/export.c b/fs/nfs/export.c index d772c20bbfd1..171c424cb6d5 100644 --- a/fs/nfs/export.c +++ b/fs/nfs/export.c @@ -64,7 +64,6 @@ static struct dentry * nfs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { - struct nfs4_label *label = NULL; struct nfs_fattr *fattr = NULL; struct nfs_fh *server_fh = nfs_exp_embedfh(fid->raw); size_t fh_size = offsetof(struct nfs_fh, data) + server_fh->size; @@ -79,7 +78,7 @@ nfs_fh_to_dentry(struct super_block *sb, struct fid *fid, if (fh_len < len || fh_type != len) return NULL; - fattr = nfs_alloc_fattr(); + fattr = nfs_alloc_fattr_with_label(NFS_SB(sb)); if (fattr == NULL) { dentry = ERR_PTR(-ENOMEM); goto out; @@ -95,28 +94,19 @@ nfs_fh_to_dentry(struct super_block *sb, struct fid *fid, if (inode) goto out_found; - label = nfs4_label_alloc(NFS_SB(sb), GFP_KERNEL); - if (IS_ERR(label)) { - dentry = ERR_CAST(label); - goto out_free_fattr; - } - rpc_ops = NFS_SB(sb)->nfs_client->rpc_ops; - ret = rpc_ops->getattr(NFS_SB(sb), server_fh, fattr, label, NULL); + ret = rpc_ops->getattr(NFS_SB(sb), server_fh, fattr, NULL); if (ret) { dprintk("%s: getattr failed %d\n", __func__, ret); trace_nfs_fh_to_dentry(sb, server_fh, fattr->fileid, ret); dentry = ERR_PTR(ret); - goto out_free_label; + goto out_free_fattr; } - inode = nfs_fhget(sb, server_fh, fattr, label); + inode = nfs_fhget(sb, server_fh, fattr); out_found: dentry = d_obtain_alias(inode); - -out_free_label: - nfs4_label_free(label); out_free_fattr: nfs_free_fattr(fattr); out: @@ -131,7 +121,6 @@ nfs_get_parent(struct dentry *dentry) struct super_block *sb = inode->i_sb; struct nfs_server *server = NFS_SB(sb); struct nfs_fattr *fattr = NULL; - struct nfs4_label *label = NULL; struct dentry *parent; struct nfs_rpc_ops const *ops = server->nfs_client->rpc_ops; struct nfs_fh fh; @@ -139,31 +128,20 @@ nfs_get_parent(struct dentry *dentry) if (!ops->lookupp) return ERR_PTR(-EACCES); - fattr = nfs_alloc_fattr(); - if (fattr == NULL) { - parent = ERR_PTR(-ENOMEM); - goto out; - } + fattr = nfs_alloc_fattr_with_label(server); + if (fattr == NULL) + return ERR_PTR(-ENOMEM); - label = nfs4_label_alloc(server, GFP_KERNEL); - if (IS_ERR(label)) { - parent = ERR_CAST(label); - goto out_free_fattr; - } - - ret = ops->lookupp(inode, &fh, fattr, label); + ret = ops->lookupp(inode, &fh, fattr); if (ret) { parent = ERR_PTR(ret); - goto out_free_label; + goto out; } - pinode = nfs_fhget(sb, &fh, fattr, label); + pinode = nfs_fhget(sb, &fh, fattr); parent = d_obtain_alias(pinode); -out_free_label: - nfs4_label_free(label); -out_free_fattr: - nfs_free_fattr(fattr); out: + nfs_free_fattr(fattr); return parent; } diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index d2103852475f..9c96e3e5ed35 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -293,8 +293,6 @@ static void filelayout_read_call_done(struct rpc_task *task, void *data) { struct nfs_pgio_header *hdr = data; - dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); - if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && task->tk_status == 0) { nfs41_sequence_done(task, &hdr->res.seq_res); diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index d383de00d486..a553d59afa8b 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1414,8 +1414,6 @@ static void ff_layout_read_call_done(struct rpc_task *task, void *data) { struct nfs_pgio_header *hdr = data; - dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); - if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && task->tk_status == 0) { nfs4_sequence_done(task, &hdr->res.seq_res); diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index c9b61b818ec1..bfa7202ca7be 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -378,10 +378,10 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, goto noconnect; ds = mirror->mirror_ds->ds; + if (READ_ONCE(ds->ds_clp)) + goto out; /* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */ smp_rmb(); - if (ds->ds_clp) - goto out; /* FIXME: For now we assume the server sent only one version of NFS * to use for the DS. diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index 59355c106ece..11ff2b2e060f 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -80,31 +80,28 @@ int nfs_get_root(struct super_block *s, struct fs_context *fc) goto out; /* get the actual root for this mount */ - fsinfo.fattr = nfs_alloc_fattr(); + fsinfo.fattr = nfs_alloc_fattr_with_label(server); if (fsinfo.fattr == NULL) goto out_name; - fsinfo.fattr->label = nfs4_label_alloc(server, GFP_KERNEL); - if (IS_ERR(fsinfo.fattr->label)) - goto out_fattr; error = server->nfs_client->rpc_ops->getroot(server, ctx->mntfh, &fsinfo); if (error < 0) { dprintk("nfs_get_root: getattr error = %d\n", -error); nfs_errorf(fc, "NFS: Couldn't getattr on root"); - goto out_label; + goto out_fattr; } - inode = nfs_fhget(s, ctx->mntfh, fsinfo.fattr, NULL); + inode = nfs_fhget(s, ctx->mntfh, fsinfo.fattr); if (IS_ERR(inode)) { dprintk("nfs_get_root: get root inode failed\n"); error = PTR_ERR(inode); nfs_errorf(fc, "NFS: Couldn't get root inode"); - goto out_label; + goto out_fattr; } error = nfs_superblock_set_dummy_root(s, inode); if (error != 0) - goto out_label; + goto out_fattr; /* root dentries normally start off anonymous and get spliced in later * if the dentry tree reaches them; however if the dentry already @@ -115,7 +112,7 @@ int nfs_get_root(struct super_block *s, struct fs_context *fc) dprintk("nfs_get_root: get root dentry failed\n"); error = PTR_ERR(root); nfs_errorf(fc, "NFS: Couldn't get root dentry"); - goto out_label; + goto out_fattr; } security_d_instantiate(root, inode); @@ -151,11 +148,9 @@ int nfs_get_root(struct super_block *s, struct fs_context *fc) !(kflags_out & SECURITY_LSM_NATIVE_LABELS)) server->caps &= ~NFS_CAP_SECURITY_LABEL; - nfs_setsecurity(inode, fsinfo.fattr, fsinfo.fattr->label); + nfs_setsecurity(inode, fsinfo.fattr); error = 0; -out_label: - nfs4_label_free(fsinfo.fattr->label); out_fattr: nfs_free_fattr(fsinfo.fattr); out_name: @@ -165,5 +160,5 @@ out: error_splat_root: dput(fc->root); fc->root = NULL; - goto out_label; + goto out_fattr; } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 853213b3a209..dd53704c3f40 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -210,10 +210,15 @@ void nfs_set_cache_invalid(struct inode *inode, unsigned long flags) flags &= ~NFS_INO_INVALID_XATTR; if (flags & NFS_INO_INVALID_DATA) nfs_fscache_invalidate(inode); - if (inode->i_mapping->nrpages == 0) - flags &= ~(NFS_INO_INVALID_DATA|NFS_INO_DATA_INVAL_DEFER); flags &= ~(NFS_INO_REVAL_PAGECACHE | NFS_INO_REVAL_FORCED); + nfsi->cache_validity |= flags; + + if (inode->i_mapping->nrpages == 0) + nfsi->cache_validity &= ~(NFS_INO_INVALID_DATA | + NFS_INO_DATA_INVAL_DEFER); + else if (nfsi->cache_validity & NFS_INO_INVALID_DATA) + nfsi->cache_validity &= ~NFS_INO_DATA_INVAL_DEFER; } EXPORT_SYMBOL_GPL(nfs_set_cache_invalid); @@ -350,37 +355,32 @@ static void nfs_clear_label_invalid(struct inode *inode) spin_unlock(&inode->i_lock); } -void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr, - struct nfs4_label *label) +void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr) { int error; - if (label == NULL) + if (fattr->label == NULL) return; if ((fattr->valid & NFS_ATTR_FATTR_V4_SECURITY_LABEL) && inode->i_security) { - error = security_inode_notifysecctx(inode, label->label, - label->len); + error = security_inode_notifysecctx(inode, fattr->label->label, + fattr->label->len); if (error) printk(KERN_ERR "%s() %s %d " "security_inode_notifysecctx() %d\n", __func__, - (char *)label->label, - label->len, error); + (char *)fattr->label->label, + fattr->label->len, error); nfs_clear_label_invalid(inode); } } struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags) { - struct nfs4_label *label = NULL; - int minor_version = server->nfs_client->cl_minorversion; - - if (minor_version < 2) - return label; + struct nfs4_label *label; if (!(server->caps & NFS_CAP_SECURITY_LABEL)) - return label; + return NULL; label = kzalloc(sizeof(struct nfs4_label), flags); if (label == NULL) @@ -397,8 +397,7 @@ struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags) } EXPORT_SYMBOL_GPL(nfs4_label_alloc); #else -void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr, - struct nfs4_label *label) +void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr) { } #endif @@ -426,12 +425,28 @@ nfs_ilookup(struct super_block *sb, struct nfs_fattr *fattr, struct nfs_fh *fh) return inode; } +static void nfs_inode_init_regular(struct nfs_inode *nfsi) +{ + atomic_long_set(&nfsi->nrequests, 0); + INIT_LIST_HEAD(&nfsi->commit_info.list); + atomic_long_set(&nfsi->commit_info.ncommit, 0); + atomic_set(&nfsi->commit_info.rpcs_out, 0); + mutex_init(&nfsi->commit_mutex); +} + +static void nfs_inode_init_dir(struct nfs_inode *nfsi) +{ + nfsi->cache_change_attribute = 0; + memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); + init_rwsem(&nfsi->rmdir_sem); +} + /* * This is our front-end to iget that looks up inodes by file handle * instead of inode number. */ struct inode * -nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, struct nfs4_label *label) +nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) { struct nfs_find_desc desc = { .fh = fh, @@ -480,10 +495,12 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st if (S_ISREG(inode->i_mode)) { inode->i_fop = NFS_SB(sb)->nfs_client->rpc_ops->file_ops; inode->i_data.a_ops = &nfs_file_aops; + nfs_inode_init_regular(nfsi); } else if (S_ISDIR(inode->i_mode)) { inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops; inode->i_fop = &nfs_dir_operations; inode->i_data.a_ops = &nfs_dir_aops; + nfs_inode_init_dir(nfsi); /* Deal with crossing mountpoints */ if (fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT || fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) { @@ -509,7 +526,6 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st inode->i_uid = make_kuid(&init_user_ns, -2); inode->i_gid = make_kgid(&init_user_ns, -2); inode->i_blocks = 0; - memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); nfsi->write_io = 0; nfsi->read_io = 0; @@ -563,7 +579,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st fattr->size != 0) nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS); - nfs_setsecurity(inode, fattr, label); + nfs_setsecurity(inode, fattr); nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = now; @@ -632,7 +648,7 @@ nfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, if (S_ISREG(inode->i_mode)) nfs_sync_inode(inode); - fattr = nfs_alloc_fattr(); + fattr = nfs_alloc_fattr_with_label(NFS_SERVER(inode)); if (fattr == NULL) { error = -ENOMEM; goto out; @@ -666,6 +682,7 @@ static int nfs_vmtruncate(struct inode * inode, loff_t offset) if (err) goto out; + trace_nfs_size_truncate(inode, offset); i_size_write(inode, offset); /* Optimisation */ if (offset == 0) @@ -1024,7 +1041,7 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, ctx->cred = get_cred(filp->f_cred); else ctx->cred = get_current_cred(); - ctx->ll_cred = NULL; + rcu_assign_pointer(ctx->ll_cred, NULL); ctx->state = NULL; ctx->mode = f_mode; ctx->flags = 0; @@ -1063,7 +1080,7 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync) put_cred(ctx->cred); dput(ctx->dentry); nfs_sb_deactive(sb); - put_rpccred(ctx->ll_cred); + put_rpccred(rcu_dereference_protected(ctx->ll_cred, 1)); kfree(ctx->mdsthreshold); kfree_rcu(ctx, rcu_head); } @@ -1175,7 +1192,6 @@ int __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) { int status = -ESTALE; - struct nfs4_label *label = NULL; struct nfs_fattr *fattr = NULL; struct nfs_inode *nfsi = NFS_I(inode); @@ -1197,20 +1213,13 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) } status = -ENOMEM; - fattr = nfs_alloc_fattr(); + fattr = nfs_alloc_fattr_with_label(NFS_SERVER(inode)); if (fattr == NULL) goto out; nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE); - label = nfs4_label_alloc(NFS_SERVER(inode), GFP_KERNEL); - if (IS_ERR(label)) { - status = PTR_ERR(label); - goto out; - } - - status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), fattr, - label, inode); + status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), fattr, inode); if (status != 0) { dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Lu) getattr failed, error=%d\n", inode->i_sb->s_id, @@ -1227,7 +1236,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) else nfs_zap_caches(inode); } - goto err_out; + goto out; } status = nfs_refresh_inode(inode, fattr); @@ -1235,20 +1244,18 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Lu) refresh failed, error=%d\n", inode->i_sb->s_id, (unsigned long long)NFS_FILEID(inode), status); - goto err_out; + goto out; } if (nfsi->cache_validity & NFS_INO_INVALID_ACL) nfs_zap_acl_cache(inode); - nfs_setsecurity(inode, fattr, label); + nfs_setsecurity(inode, fattr); dfprintk(PAGECACHE, "NFS: (%s/%Lu) revalidation complete\n", inode->i_sb->s_id, (unsigned long long)NFS_FILEID(inode)); -err_out: - nfs4_label_free(label); out: nfs_free_fattr(fattr); trace_nfs_revalidate_inode_exit(inode, status); @@ -1446,13 +1453,12 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) && (fattr->valid & NFS_ATTR_FATTR_MTIME) && timespec64_equal(&ts, &fattr->pre_mtime)) { inode->i_mtime = fattr->mtime; - if (S_ISDIR(inode->i_mode)) - nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA); } if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE) && (fattr->valid & NFS_ATTR_FATTR_SIZE) && i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size) && !nfs_have_writebacks(inode)) { + trace_nfs_size_wcc(inode, fattr->size); i_size_write(inode, nfs_size_to_loff_t(fattr->size)); } } @@ -1580,12 +1586,31 @@ struct nfs_fattr *nfs_alloc_fattr(void) struct nfs_fattr *fattr; fattr = kmalloc(sizeof(*fattr), GFP_NOFS); - if (fattr != NULL) + if (fattr != NULL) { nfs_fattr_init(fattr); + fattr->label = NULL; + } return fattr; } EXPORT_SYMBOL_GPL(nfs_alloc_fattr); +struct nfs_fattr *nfs_alloc_fattr_with_label(struct nfs_server *server) +{ + struct nfs_fattr *fattr = nfs_alloc_fattr(); + + if (!fattr) + return NULL; + + fattr->label = nfs4_label_alloc(server, GFP_NOFS); + if (IS_ERR(fattr->label)) { + kfree(fattr); + return NULL; + } + + return fattr; +} +EXPORT_SYMBOL_GPL(nfs_alloc_fattr_with_label); + struct nfs_fh *nfs_alloc_fhandle(void) { struct nfs_fh *fh; @@ -1777,8 +1802,10 @@ static int nfs_inode_finish_partial_attr_update(const struct nfs_fattr *fattr, NFS_INO_INVALID_BLOCKS | NFS_INO_INVALID_OTHER | NFS_INO_INVALID_NLINK; unsigned long cache_validity = NFS_I(inode)->cache_validity; + enum nfs4_change_attr_type ctype = NFS_SERVER(inode)->change_attr_type; - if (!(cache_validity & NFS_INO_INVALID_CHANGE) && + if (ctype != NFS4_CHANGE_TYPE_IS_UNDEFINED && + !(cache_validity & NFS_INO_INVALID_CHANGE) && (cache_validity & check_valid) != 0 && (fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && nfs_inode_attrs_cmp_monotonic(fattr, inode) == 0) @@ -2095,16 +2122,11 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) /* Do we perhaps have any outstanding writes, or has * the file grown beyond our last write? */ if (!nfs_have_writebacks(inode) || new_isize > cur_isize) { + trace_nfs_size_update(inode, new_isize); i_size_write(inode, new_isize); if (!have_writers) invalid |= NFS_INO_INVALID_DATA; } - dprintk("NFS: isize change on server for file %s/%ld " - "(%Ld to %Ld)\n", - inode->i_sb->s_id, - inode->i_ino, - (long long)cur_isize, - (long long)new_isize); } if (new_isize == 0 && !(fattr->valid & (NFS_ATTR_FATTR_SPACE_USED | @@ -2155,11 +2177,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) save_cache_validity & NFS_INO_INVALID_OTHER; if (fattr->valid & NFS_ATTR_FATTR_NLINK) { - if (inode->i_nlink != fattr->nlink) { - if (S_ISDIR(inode->i_mode)) - invalid |= NFS_INO_INVALID_DATA; + if (inode->i_nlink != fattr->nlink) set_nlink(inode, fattr->nlink); - } } else if (fattr_supported & NFS_ATTR_FATTR_NLINK) nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_NLINK; @@ -2260,14 +2279,7 @@ static void init_once(void *foo) INIT_LIST_HEAD(&nfsi->open_files); INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); - INIT_LIST_HEAD(&nfsi->commit_info.list); - atomic_long_set(&nfsi->nrequests, 0); - atomic_long_set(&nfsi->commit_info.ncommit, 0); - atomic_set(&nfsi->commit_info.rpcs_out, 0); - init_rwsem(&nfsi->rmdir_sem); - mutex_init(&nfsi->commit_mutex); nfs4_init_once(nfsi); - nfsi->cache_change_attribute = 0; } static int __init nfs_init_inodecache(void) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 66fc936834f2..12f6acb483bb 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -193,7 +193,7 @@ extern void nfs_clients_exit(struct net *net); extern struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *); int nfs_create_rpc_client(struct nfs_client *, const struct nfs_client_initdata *, rpc_authflavor_t); struct nfs_client *nfs_get_client(const struct nfs_client_initdata *); -int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *, struct nfs_fattr *); +int nfs_probe_server(struct nfs_server *, struct nfs_fh *); void nfs_server_insert_lists(struct nfs_server *); void nfs_server_remove_lists(struct nfs_server *); void nfs_init_timeout_values(struct rpc_timeout *to, int proto, int timeo, int retrans); @@ -209,6 +209,7 @@ extern struct nfs_client * nfs4_find_client_sessionid(struct net *, const struct sockaddr *, struct nfs4_sessionid *, u32); extern struct nfs_server *nfs_create_server(struct fs_context *); +extern void nfs4_server_set_init_caps(struct nfs_server *); extern struct nfs_server *nfs4_create_server(struct fs_context *); extern struct nfs_server *nfs4_create_referral_server(struct fs_context *); extern int nfs4_update_server(struct nfs_server *server, const char *hostname, @@ -341,14 +342,6 @@ nfs4_label_copy(struct nfs4_label *dst, struct nfs4_label *src) return dst; } -static inline void nfs4_label_free(struct nfs4_label *label) -{ - if (label) { - kfree(label->label); - kfree(label); - } - return; -} static inline void nfs_zap_label_cache_locked(struct nfs_inode *nfsi) { @@ -357,7 +350,6 @@ static inline void nfs_zap_label_cache_locked(struct nfs_inode *nfsi) } #else static inline struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags) { return NULL; } -static inline void nfs4_label_free(void *label) {} static inline void nfs_zap_label_cache_locked(struct nfs_inode *nfsi) { } diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index bc0c698f3350..3295af4110f1 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -308,8 +308,7 @@ int nfs_submount(struct fs_context *fc, struct nfs_server *server) /* Look it up again to get its attributes */ err = server->nfs_client->rpc_ops->lookup(d_inode(parent), dentry, - ctx->mntfh, ctx->clone_data.fattr, - NULL); + ctx->mntfh, ctx->clone_data.fattr); dput(parent); if (err != 0) return err; diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index f7524310ddf4..7100514d306b 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -100,8 +100,7 @@ nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, */ static int nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fattr *fattr, struct nfs4_label *label, - struct inode *inode) + struct nfs_fattr *fattr, struct inode *inode) { struct rpc_message msg = { .rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR], @@ -193,8 +192,7 @@ __nfs3_proc_lookup(struct inode *dir, const char *name, size_t len, static int nfs3_proc_lookup(struct inode *dir, struct dentry *dentry, - struct nfs_fh *fhandle, struct nfs_fattr *fattr, - struct nfs4_label *label) + struct nfs_fh *fhandle, struct nfs_fattr *fattr) { unsigned short task_flags = 0; @@ -209,7 +207,7 @@ nfs3_proc_lookup(struct inode *dir, struct dentry *dentry, } static int nfs3_proc_lookupp(struct inode *inode, struct nfs_fh *fhandle, - struct nfs_fattr *fattr, struct nfs4_label *label) + struct nfs_fattr *fattr) { const char dotdot[] = ".."; const size_t len = strlen(dotdot); @@ -323,7 +321,7 @@ nfs3_do_create(struct inode *dir, struct dentry *dentry, struct nfs3_createdata if (status != 0) return ERR_PTR(status); - return nfs_add_or_obtain(dentry, data->res.fh, data->res.fattr, NULL); + return nfs_add_or_obtain(dentry, data->res.fh, data->res.fattr); } static void nfs3_free_createdata(struct nfs3_createdata *data) diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index e6eca1d7481b..9274c9c5efea 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -2227,7 +2227,7 @@ static int decode_fsinfo3resok(struct xdr_stream *xdr, /* ignore properties */ result->lease_time = 0; - result->change_attr_type = NFS4_CHANGE_TYPE_IS_TIME_METADATA; + result->change_attr_type = NFS4_CHANGE_TYPE_IS_UNDEFINED; return 0; } diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index a24349512ffe..08355b66e7cb 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -83,6 +83,10 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, status = nfs_post_op_update_inode_force_wcc(inode, res.falloc_fattr); + if (msg->rpc_proc == &nfs4_procedures[NFSPROC4_CLNT_ALLOCATE]) + trace_nfs4_fallocate(inode, &args, status); + else + trace_nfs4_deallocate(inode, &args, status); kfree(res.falloc_fattr); return status; } @@ -363,6 +367,7 @@ static ssize_t _nfs42_proc_copy(struct file *src, status = nfs4_call_sync(dst_server->client, dst_server, &msg, &args->seq_args, &res->seq_res, 0); + trace_nfs4_copy(src_inode, dst_inode, args, res, nss, status); if (status == -ENOTSUPP) dst_server->caps &= ~NFS_CAP_COPY; if (status) @@ -504,6 +509,7 @@ static void nfs42_offload_cancel_done(struct rpc_task *task, void *calldata) { struct nfs42_offloadcancel_data *data = calldata; + trace_nfs4_offload_cancel(&data->args, task->tk_status); nfs41_sequence_done(task, &data->res.osr_seq_res); if (task->tk_status && nfs4_async_handle_error(task, data->seq_server, NULL, @@ -598,6 +604,7 @@ static int _nfs42_proc_copy_notify(struct file *src, struct file *dst, status = nfs4_call_sync(src_server->client, src_server, &msg, &args->cna_seq_args, &res->cnr_seq_res, 0); + trace_nfs4_copy_notify(file_inode(src), args, res, status); if (status == -ENOTSUPP) src_server->caps &= ~NFS_CAP_COPY_NOTIFY; @@ -678,6 +685,7 @@ static loff_t _nfs42_proc_llseek(struct file *filep, status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); + trace_nfs4_llseek(inode, &args, &res, status); if (status == -ENOTSUPP) server->caps &= ~NFS_CAP_SEEK; if (status) @@ -1071,6 +1079,7 @@ static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f, status = nfs4_call_sync(server->client, server, msg, &args.seq_args, &res.seq_res, 0); + trace_nfs4_clone(src_inode, dst_inode, &args, status); if (status == 0) { nfs42_copy_dest_done(dst_inode, dst_offset, count); status = nfs_post_op_update_inode(dst_inode, res.dst_fattr); diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index ba78df4b13d9..ed5eaca6801e 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -234,7 +234,6 @@ struct nfs4_opendata { struct nfs4_string group_name; struct nfs4_label *a_label; struct nfs_fattr f_attr; - struct nfs4_label *f_label; struct dentry *dir; struct dentry *dentry; struct nfs4_state_owner *owner; @@ -317,8 +316,7 @@ extern int nfs4_set_rw_stateid(nfs4_stateid *stateid, const struct nfs_lock_context *l_ctx, fmode_t fmode); extern int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fattr *fattr, struct nfs4_label *label, - struct inode *inode); + struct nfs_fattr *fattr, struct inode *inode); extern int update_open_stateid(struct nfs4_state *state, const nfs4_stateid *open_stateid, const nfs4_stateid *deleg_stateid, diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index af57332503be..d8b5a250ca05 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -1059,31 +1059,15 @@ static void nfs4_session_limit_xasize(struct nfs_server *server) #endif } -static int nfs4_server_common_setup(struct nfs_server *server, - struct nfs_fh *mntfh, bool auth_probe) +void nfs4_server_set_init_caps(struct nfs_server *server) { - struct nfs_fattr *fattr; - int error; - - /* data servers support only a subset of NFSv4.1 */ - if (is_ds_only_client(server->nfs_client)) - return -EPROTONOSUPPORT; - - fattr = nfs_alloc_fattr(); - if (fattr == NULL) - return -ENOMEM; - - /* We must ensure the session is initialised first */ - error = nfs4_init_session(server->nfs_client); - if (error < 0) - goto out; - /* Set the basic capabilities */ server->caps |= server->nfs_client->cl_mvops->init_caps; if (server->flags & NFS_MOUNT_NORDIRPLUS) server->caps &= ~NFS_CAP_READDIRPLUS; if (server->nfs_client->cl_proto == XPRT_TRANSPORT_RDMA) server->caps &= ~NFS_CAP_READ_PLUS; + /* * Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower * authentication. @@ -1091,7 +1075,23 @@ static int nfs4_server_common_setup(struct nfs_server *server, if (nfs4_disable_idmapping && server->client->cl_auth->au_flavor == RPC_AUTH_UNIX) server->caps |= NFS_CAP_UIDGID_NOMAP; +} +static int nfs4_server_common_setup(struct nfs_server *server, + struct nfs_fh *mntfh, bool auth_probe) +{ + int error; + + /* data servers support only a subset of NFSv4.1 */ + if (is_ds_only_client(server->nfs_client)) + return -EPROTONOSUPPORT; + + /* We must ensure the session is initialised first */ + error = nfs4_init_session(server->nfs_client); + if (error < 0) + goto out; + + nfs4_server_set_init_caps(server); /* Probe the root fh to retrieve its FSID and filehandle */ error = nfs4_get_rootfh(server, mntfh, auth_probe); @@ -1103,7 +1103,7 @@ static int nfs4_server_common_setup(struct nfs_server *server, (unsigned long long) server->fsid.minor); nfs_display_fhandle(mntfh, "Pseudo-fs root FH"); - error = nfs_probe_fsinfo(server, mntfh, fattr); + error = nfs_probe_server(server, mntfh); if (error < 0) goto out; @@ -1117,7 +1117,6 @@ static int nfs4_server_common_setup(struct nfs_server *server, server->mount_time = jiffies; server->destroy = nfs4_destroy_server; out: - nfs_free_fattr(fattr); return error; } @@ -1288,30 +1287,6 @@ error: return ERR_PTR(error); } -/* - * Grab the destination's particulars, including lease expiry time. - * - * Returns zero if probe succeeded and retrieved FSID matches the FSID - * we have cached. - */ -static int nfs_probe_destination(struct nfs_server *server) -{ - struct inode *inode = d_inode(server->super->s_root); - struct nfs_fattr *fattr; - int error; - - fattr = nfs_alloc_fattr(); - if (fattr == NULL) - return -ENOMEM; - - /* Sanity: the probe won't work if the destination server - * does not recognize the migrated FH. */ - error = nfs_probe_fsinfo(server, NFS_FH(inode), fattr); - - nfs_free_fattr(fattr); - return error; -} - /** * nfs4_update_server - Move an nfs_server to a different nfs_client * @@ -1372,5 +1347,5 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname, server->nfs_client->cl_hostname = kstrdup(hostname, GFP_KERNEL); nfs_server_insert_lists(server); - return nfs_probe_destination(server); + return nfs_probe_server(server, NFS_FH(d_inode(server->super->s_root))); } diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index c91565227ea2..e79ae4cbc395 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -317,7 +317,7 @@ static int read_name_gen = 1; static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt, struct nfs_fh *src_fh, nfs4_stateid *stateid) { - struct nfs_fattr fattr; + struct nfs_fattr *fattr = nfs_alloc_fattr(); struct file *filep, *res; struct nfs_server *server; struct inode *r_ino = NULL; @@ -328,9 +328,10 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt, server = NFS_SERVER(ss_mnt->mnt_root->d_inode); - nfs_fattr_init(&fattr); + if (!fattr) + return ERR_PTR(-ENOMEM); - status = nfs4_proc_getattr(server, src_fh, &fattr, NULL, NULL); + status = nfs4_proc_getattr(server, src_fh, fattr, NULL); if (status < 0) { res = ERR_PTR(status); goto out; @@ -343,20 +344,18 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt, goto out; snprintf(read_name, len, SSC_READ_NAME_BODY, read_name_gen++); - r_ino = nfs_fhget(ss_mnt->mnt_root->d_inode->i_sb, src_fh, &fattr, - NULL); + r_ino = nfs_fhget(ss_mnt->mnt_root->d_inode->i_sb, src_fh, fattr); if (IS_ERR(r_ino)) { res = ERR_CAST(r_ino); goto out_free_name; } - filep = alloc_file_pseudo(r_ino, ss_mnt, read_name, FMODE_READ, + filep = alloc_file_pseudo(r_ino, ss_mnt, read_name, O_RDONLY, r_ino->i_fop); if (IS_ERR(filep)) { res = ERR_CAST(filep); goto out_free_name; } - filep->f_mode |= FMODE_READ; ctx = alloc_nfs_open_context(filep->f_path.dentry, filep->f_mode, filep); @@ -388,6 +387,7 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt, out_free_name: kfree(read_name); out: + nfs_free_fattr(fattr); return res; out_stateowner: nfs4_put_state_owner(sp); diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c index 8d8aba305ecc..f331866dd418 100644 --- a/fs/nfs/nfs4idmap.c +++ b/fs/nfs/nfs4idmap.c @@ -487,7 +487,7 @@ nfs_idmap_new(struct nfs_client *clp) err_destroy_pipe: rpc_destroy_pipe_data(idmap->idmap_pipe); err: - get_user_ns(idmap->user_ns); + put_user_ns(idmap->user_ns); kfree(idmap); return error; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 459860aa8fd7..ee3bc79f6ca3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -93,11 +93,11 @@ struct nfs4_opendata; static int _nfs4_recover_proc_open(struct nfs4_opendata *data); static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr); -static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label, struct inode *inode); +static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fattr *fattr, struct inode *inode); static int nfs4_do_setattr(struct inode *inode, const struct cred *cred, struct nfs_fattr *fattr, struct iattr *sattr, - struct nfs_open_context *ctx, struct nfs4_label *ilabel, - struct nfs4_label *olabel); + struct nfs_open_context *ctx, struct nfs4_label *ilabel); #ifdef CONFIG_NFS_V4_1 static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, const struct cred *cred, @@ -1330,7 +1330,6 @@ nfs4_map_atomic_open_claim(struct nfs_server *server, static void nfs4_init_opendata_res(struct nfs4_opendata *p) { p->o_res.f_attr = &p->f_attr; - p->o_res.f_label = p->f_label; p->o_res.seqid = p->o_arg.seqid; p->c_res.seqid = p->c_arg.seqid; p->o_res.server = p->o_arg.server; @@ -1356,8 +1355,8 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, if (p == NULL) goto err; - p->f_label = nfs4_label_alloc(server, gfp_mask); - if (IS_ERR(p->f_label)) + p->f_attr.label = nfs4_label_alloc(server, gfp_mask); + if (IS_ERR(p->f_attr.label)) goto err_free_p; p->a_label = nfs4_label_alloc(server, gfp_mask); @@ -1389,27 +1388,22 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, sizeof(p->o_arg.u.verifier.data)); } } - /* don't put an ACCESS op in OPEN compound if O_EXCL, because ACCESS - * will return permission denied for all bits until close */ - if (!(flags & O_EXCL)) { - /* ask server to check for all possible rights as results - * are cached */ - switch (p->o_arg.claim) { - default: - break; - case NFS4_OPEN_CLAIM_NULL: - case NFS4_OPEN_CLAIM_FH: - p->o_arg.access = NFS4_ACCESS_READ | - NFS4_ACCESS_MODIFY | - NFS4_ACCESS_EXTEND | - NFS4_ACCESS_EXECUTE; + /* ask server to check for all possible rights as results + * are cached */ + switch (p->o_arg.claim) { + default: + break; + case NFS4_OPEN_CLAIM_NULL: + case NFS4_OPEN_CLAIM_FH: + p->o_arg.access = NFS4_ACCESS_READ | NFS4_ACCESS_MODIFY | + NFS4_ACCESS_EXTEND | NFS4_ACCESS_DELETE | + NFS4_ACCESS_EXECUTE; #ifdef CONFIG_NFS_V4_2 - if (server->caps & NFS_CAP_XATTR) - p->o_arg.access |= NFS4_ACCESS_XAREAD | - NFS4_ACCESS_XAWRITE | - NFS4_ACCESS_XALIST; + if (!(server->caps & NFS_CAP_XATTR)) + break; + p->o_arg.access |= NFS4_ACCESS_XAREAD | NFS4_ACCESS_XAWRITE | + NFS4_ACCESS_XALIST; #endif - } } p->o_arg.clientid = server->nfs_client->cl_clientid; p->o_arg.id.create_time = ktime_to_ns(sp->so_seqid.create_time); @@ -1440,7 +1434,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, err_free_label: nfs4_label_free(p->a_label); err_free_f: - nfs4_label_free(p->f_label); + nfs4_label_free(p->f_attr.label); err_free_p: kfree(p); err: @@ -1462,7 +1456,7 @@ static void nfs4_opendata_free(struct kref *kref) nfs4_put_state_owner(p->owner); nfs4_label_free(p->a_label); - nfs4_label_free(p->f_label); + nfs4_label_free(p->f_attr.label); dput(p->dir); dput(p->dentry); @@ -1610,15 +1604,16 @@ static bool nfs_stateid_is_sequential(struct nfs4_state *state, { if (test_bit(NFS_OPEN_STATE, &state->flags)) { /* The common case - we're updating to a new sequence number */ - if (nfs4_stateid_match_other(stateid, &state->open_stateid) && - nfs4_stateid_is_next(&state->open_stateid, stateid)) { - return true; + if (nfs4_stateid_match_other(stateid, &state->open_stateid)) { + if (nfs4_stateid_is_next(&state->open_stateid, stateid)) + return true; + return false; } - } else { - /* This is the first OPEN in this generation */ - if (stateid->seqid == cpu_to_be32(1)) - return true; + /* The server returned a new stateid */ } + /* This is the first OPEN in this generation */ + if (stateid->seqid == cpu_to_be32(1)) + return true; return false; } @@ -2014,7 +2009,7 @@ nfs4_opendata_get_inode(struct nfs4_opendata *data) if (!(data->f_attr.valid & NFS_ATTR_FATTR)) return ERR_PTR(-EAGAIN); inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, - &data->f_attr, data->f_label); + &data->f_attr); break; default: inode = d_inode(data->dentry); @@ -2473,11 +2468,15 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) /* Set the create mode (note dependency on the session type) */ data->o_arg.createmode = NFS4_CREATE_UNCHECKED; if (data->o_arg.open_flags & O_EXCL) { - data->o_arg.createmode = NFS4_CREATE_EXCLUSIVE; - if (nfs4_has_persistent_session(clp)) + data->o_arg.createmode = NFS4_CREATE_EXCLUSIVE4_1; + if (clp->cl_mvops->minor_version == 0) { + data->o_arg.createmode = NFS4_CREATE_EXCLUSIVE; + /* don't put an ACCESS op in OPEN compound if O_EXCL, + * because ACCESS will return permission denied for + * all bits until close */ + data->o_res.access_request = data->o_arg.access = 0; + } else if (nfs4_has_persistent_session(clp)) data->o_arg.createmode = NFS4_CREATE_GUARDED; - else if (clp->cl_mvops->minor_version > 0) - data->o_arg.createmode = NFS4_CREATE_EXCLUSIVE4_1; } return; unlock_no_action: @@ -2709,8 +2708,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data, } if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) { nfs4_sequence_free_slot(&o_res->seq_res); - nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr, - o_res->f_label, NULL); + nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr, NULL); } return 0; } @@ -3126,7 +3124,6 @@ static int _nfs4_do_open(struct inode *dir, enum open_claim_type4 claim = NFS4_OPEN_CLAIM_NULL; struct iattr *sattr = c->sattr; struct nfs4_label *label = c->label; - struct nfs4_label *olabel = NULL; int status; /* Protect against reboot recovery conflicts */ @@ -3149,19 +3146,11 @@ static int _nfs4_do_open(struct inode *dir, if (opendata == NULL) goto err_put_state_owner; - if (label) { - olabel = nfs4_label_alloc(server, GFP_KERNEL); - if (IS_ERR(olabel)) { - status = PTR_ERR(olabel); - goto err_opendata_put; - } - } - if (server->attr_bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD) { if (!opendata->f_attr.mdsthreshold) { opendata->f_attr.mdsthreshold = pnfs_mdsthreshold_alloc(); if (!opendata->f_attr.mdsthreshold) - goto err_free_label; + goto err_opendata_put; } opendata->o_arg.open_bitmap = &nfs4_pnfs_open_bitmap[0]; } @@ -3170,7 +3159,7 @@ static int _nfs4_do_open(struct inode *dir, status = _nfs4_open_and_get_state(opendata, flags, ctx); if (status != 0) - goto err_free_label; + goto err_opendata_put; state = ctx->state; if ((opendata->o_arg.open_flags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL) && @@ -3187,11 +3176,11 @@ static int _nfs4_do_open(struct inode *dir, nfs_fattr_init(opendata->o_res.f_attr); status = nfs4_do_setattr(state->inode, cred, opendata->o_res.f_attr, sattr, - ctx, label, olabel); + ctx, label); if (status == 0) { nfs_setattr_update_inode(state->inode, sattr, opendata->o_res.f_attr); - nfs_setsecurity(state->inode, opendata->o_res.f_attr, olabel); + nfs_setsecurity(state->inode, opendata->o_res.f_attr); } sattr->ia_valid = ia_old; } @@ -3204,13 +3193,9 @@ static int _nfs4_do_open(struct inode *dir, opendata->f_attr.mdsthreshold = NULL; } - nfs4_label_free(olabel); - nfs4_opendata_put(opendata); nfs4_put_state_owner(sp); return 0; -err_free_label: - nfs4_label_free(olabel); err_opendata_put: nfs4_opendata_put(opendata); err_put_state_owner: @@ -3355,8 +3340,7 @@ zero_stateid: static int nfs4_do_setattr(struct inode *inode, const struct cred *cred, struct nfs_fattr *fattr, struct iattr *sattr, - struct nfs_open_context *ctx, struct nfs4_label *ilabel, - struct nfs4_label *olabel) + struct nfs_open_context *ctx, struct nfs4_label *ilabel) { struct nfs_server *server = NFS_SERVER(inode); __u32 bitmask[NFS4_BITMASK_SZ]; @@ -3370,7 +3354,6 @@ static int nfs4_do_setattr(struct inode *inode, const struct cred *cred, }; struct nfs_setattrres res = { .fattr = fattr, - .label = olabel, .server = server, }; struct nfs4_exception exception = { @@ -3387,7 +3370,7 @@ static int nfs4_do_setattr(struct inode *inode, const struct cred *cred, adjust_flags |= NFS_INO_INVALID_OTHER; do { - nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, olabel), + nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, fattr->label), inode, adjust_flags); err = _nfs4_do_setattr(inode, &arg, &res, cred, ctx); @@ -3562,7 +3545,6 @@ static void nfs4_close_done(struct rpc_task *task, void *data) .stateid = &calldata->arg.stateid, }; - dprintk("%s: begin!\n", __func__); if (!nfs4_sequence_done(task, &calldata->res.seq_res)) return; trace_nfs4_close(state, &calldata->arg, &calldata->res, task->tk_status); @@ -3617,7 +3599,7 @@ out_release: task->tk_status = 0; nfs_release_seqid(calldata->arg.seqid); nfs_refresh_inode(calldata->inode, &calldata->fattr); - dprintk("%s: done, ret = %d!\n", __func__, task->tk_status); + dprintk("%s: ret = %d\n", __func__, task->tk_status); return; out_restart: task->tk_status = 0; @@ -3635,7 +3617,6 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) bool is_rdonly, is_wronly, is_rdwr; int call_close = 0; - dprintk("%s: begin!\n", __func__); if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) goto out_wait; @@ -3709,7 +3690,6 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) &calldata->res.seq_res, task) != 0) nfs_release_seqid(calldata->arg.seqid); - dprintk("%s: done!\n", __func__); return; out_no_action: task->tk_action = NULL; @@ -3942,6 +3922,8 @@ int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) .interruptible = true, }; int err; + + nfs4_server_set_init_caps(server); do { err = nfs4_handle_exception(server, _nfs4_server_capabilities(server, fhandle), @@ -4105,7 +4087,6 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *mntfh, { int error; struct nfs_fattr *fattr = info->fattr; - struct nfs4_label *label = fattr->label; error = nfs4_server_capabilities(server, mntfh); if (error < 0) { @@ -4113,7 +4094,7 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *mntfh, return error; } - error = nfs4_proc_getattr(server, mntfh, fattr, label, NULL); + error = nfs4_proc_getattr(server, mntfh, fattr, NULL); if (error < 0) { dprintk("nfs4_get_root: getattr error = %d\n", -error); goto out; @@ -4176,8 +4157,7 @@ out: } static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fattr *fattr, struct nfs4_label *label, - struct inode *inode) + struct nfs_fattr *fattr, struct inode *inode) { __u32 bitmask[NFS4_BITMASK_SZ]; struct nfs4_getattr_arg args = { @@ -4186,7 +4166,6 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, }; struct nfs4_getattr_res res = { .fattr = fattr, - .label = label, .server = server, }; struct rpc_message msg = { @@ -4203,7 +4182,7 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, if (inode && (server->flags & NFS_MOUNT_SOFTREVAL)) task_flags |= RPC_TASK_TIMEOUT; - nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, label), inode, 0); + nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, fattr->label), inode, 0); nfs_fattr_init(fattr); nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 0); return nfs4_do_call_sync(server->client, server, &msg, @@ -4211,15 +4190,14 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, } int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fattr *fattr, struct nfs4_label *label, - struct inode *inode) + struct nfs_fattr *fattr, struct inode *inode) { struct nfs4_exception exception = { .interruptible = true, }; int err; do { - err = _nfs4_proc_getattr(server, fhandle, fattr, label, inode); + err = _nfs4_proc_getattr(server, fhandle, fattr, inode); trace_nfs4_getattr(server, fhandle, fattr, err); err = nfs4_handle_exception(server, err, &exception); @@ -4251,7 +4229,6 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, struct inode *inode = d_inode(dentry); const struct cred *cred = NULL; struct nfs_open_context *ctx = NULL; - struct nfs4_label *label = NULL; int status; if (pnfs_ld_layoutret_on_setattr(inode) && @@ -4277,26 +4254,21 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, cred = ctx->cred; } - label = nfs4_label_alloc(NFS_SERVER(inode), GFP_KERNEL); - if (IS_ERR(label)) - return PTR_ERR(label); - /* Return any delegations if we're going to change ACLs */ if ((sattr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) nfs4_inode_make_writeable(inode); - status = nfs4_do_setattr(inode, cred, fattr, sattr, ctx, NULL, label); + status = nfs4_do_setattr(inode, cred, fattr, sattr, ctx, NULL); if (status == 0) { nfs_setattr_update_inode(inode, sattr, fattr); - nfs_setsecurity(inode, fattr, label); + nfs_setsecurity(inode, fattr); } - nfs4_label_free(label); return status; } static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct dentry *dentry, struct nfs_fh *fhandle, - struct nfs_fattr *fattr, struct nfs4_label *label) + struct nfs_fattr *fattr) { struct nfs_server *server = NFS_SERVER(dir); int status; @@ -4308,7 +4280,6 @@ static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct nfs4_lookup_res res = { .server = server, .fattr = fattr, - .label = label, .fh = fhandle, }; struct rpc_message msg = { @@ -4325,7 +4296,7 @@ static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, if (nfs_lookup_is_soft_revalidate(dentry)) task_flags |= RPC_TASK_TIMEOUT; - args.bitmask = nfs4_bitmask(server, label); + args.bitmask = nfs4_bitmask(server, fattr->label); nfs_fattr_init(fattr); @@ -4347,7 +4318,7 @@ static void nfs_fixup_secinfo_attributes(struct nfs_fattr *fattr) static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir, struct dentry *dentry, struct nfs_fh *fhandle, - struct nfs_fattr *fattr, struct nfs4_label *label) + struct nfs_fattr *fattr) { struct nfs4_exception exception = { .interruptible = true, @@ -4356,7 +4327,7 @@ static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir, const struct qstr *name = &dentry->d_name; int err; do { - err = _nfs4_proc_lookup(client, dir, dentry, fhandle, fattr, label); + err = _nfs4_proc_lookup(client, dir, dentry, fhandle, fattr); trace_nfs4_lookup(dir, name, err); switch (err) { case -NFS4ERR_BADNAME: @@ -4392,13 +4363,12 @@ out: } static int nfs4_proc_lookup(struct inode *dir, struct dentry *dentry, - struct nfs_fh *fhandle, struct nfs_fattr *fattr, - struct nfs4_label *label) + struct nfs_fh *fhandle, struct nfs_fattr *fattr) { int status; struct rpc_clnt *client = NFS_CLIENT(dir); - status = nfs4_proc_lookup_common(&client, dir, dentry, fhandle, fattr, label); + status = nfs4_proc_lookup_common(&client, dir, dentry, fhandle, fattr); if (client != NFS_CLIENT(dir)) { rpc_shutdown_client(client); nfs_fixup_secinfo_attributes(fattr); @@ -4413,15 +4383,14 @@ nfs4_proc_lookup_mountpoint(struct inode *dir, struct dentry *dentry, struct rpc_clnt *client = NFS_CLIENT(dir); int status; - status = nfs4_proc_lookup_common(&client, dir, dentry, fhandle, fattr, NULL); + status = nfs4_proc_lookup_common(&client, dir, dentry, fhandle, fattr); if (status < 0) return ERR_PTR(status); return (client == NFS_CLIENT(dir)) ? rpc_clone_client(client) : client; } static int _nfs4_proc_lookupp(struct inode *inode, - struct nfs_fh *fhandle, struct nfs_fattr *fattr, - struct nfs4_label *label) + struct nfs_fh *fhandle, struct nfs_fattr *fattr) { struct rpc_clnt *clnt = NFS_CLIENT(inode); struct nfs_server *server = NFS_SERVER(inode); @@ -4433,7 +4402,6 @@ static int _nfs4_proc_lookupp(struct inode *inode, struct nfs4_lookupp_res res = { .server = server, .fattr = fattr, - .label = label, .fh = fhandle, }; struct rpc_message msg = { @@ -4446,7 +4414,7 @@ static int _nfs4_proc_lookupp(struct inode *inode, if (NFS_SERVER(inode)->flags & NFS_MOUNT_SOFTREVAL) task_flags |= RPC_TASK_TIMEOUT; - args.bitmask = nfs4_bitmask(server, label); + args.bitmask = nfs4_bitmask(server, fattr->label); nfs_fattr_init(fattr); @@ -4458,14 +4426,14 @@ static int _nfs4_proc_lookupp(struct inode *inode, } static int nfs4_proc_lookupp(struct inode *inode, struct nfs_fh *fhandle, - struct nfs_fattr *fattr, struct nfs4_label *label) + struct nfs_fattr *fattr) { struct nfs4_exception exception = { .interruptible = true, }; int err; do { - err = _nfs4_proc_lookupp(inode, fhandle, fattr, label); + err = _nfs4_proc_lookupp(inode, fhandle, fattr); trace_nfs4_lookupp(inode, err); err = nfs4_handle_exception(NFS_SERVER(inode), err, &exception); @@ -4792,7 +4760,6 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, const struct }; struct nfs4_link_res res = { .server = server, - .label = NULL, }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LINK], @@ -4801,18 +4768,12 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, const struct }; int status = -ENOMEM; - res.fattr = nfs_alloc_fattr(); + res.fattr = nfs_alloc_fattr_with_label(server); if (res.fattr == NULL) goto out; - res.label = nfs4_label_alloc(server, GFP_KERNEL); - if (IS_ERR(res.label)) { - status = PTR_ERR(res.label); - goto out; - } - nfs4_inode_make_writeable(inode); - nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, res.label), inode, + nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, res.fattr->label), inode, NFS_INO_INVALID_CHANGE); status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); if (!status) { @@ -4821,12 +4782,9 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, const struct nfs4_inc_nlink(inode); status = nfs_post_op_update_inode(inode, res.fattr); if (!status) - nfs_setsecurity(inode, res.fattr, res.label); + nfs_setsecurity(inode, res.fattr); } - - nfs4_label_free(res.label); - out: nfs_free_fattr(res.fattr); return status; @@ -4852,7 +4810,6 @@ struct nfs4_createdata { struct nfs4_create_res res; struct nfs_fh fh; struct nfs_fattr fattr; - struct nfs4_label *label; }; static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir, @@ -4864,8 +4821,8 @@ static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir, if (data != NULL) { struct nfs_server *server = NFS_SERVER(dir); - data->label = nfs4_label_alloc(server, GFP_KERNEL); - if (IS_ERR(data->label)) + data->fattr.label = nfs4_label_alloc(server, GFP_KERNEL); + if (IS_ERR(data->fattr.label)) goto out_free; data->msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE]; @@ -4876,12 +4833,11 @@ static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir, data->arg.name = name; data->arg.attrs = sattr; data->arg.ftype = ftype; - data->arg.bitmask = nfs4_bitmask(server, data->label); + data->arg.bitmask = nfs4_bitmask(server, data->fattr.label); data->arg.umask = current_umask(); data->res.server = server; data->res.fh = &data->fh; data->res.fattr = &data->fattr; - data->res.label = data->label; nfs_fattr_init(data->res.fattr); } return data; @@ -4903,14 +4859,14 @@ static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_ data->res.fattr->time_start, NFS_INO_INVALID_DATA); spin_unlock(&dir->i_lock); - status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, data->res.label); + status = nfs_instantiate(dentry, data->res.fh, data->res.fattr); } return status; } static void nfs4_free_createdata(struct nfs4_createdata *data) { - nfs4_label_free(data->label); + nfs4_label_free(data->fattr.label); kfree(data); } @@ -5348,8 +5304,6 @@ static bool nfs4_read_plus_not_supported(struct rpc_task *task, static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr) { - dprintk("--> %s\n", __func__); - if (!nfs4_sequence_done(task, &hdr->res.seq_res)) return -EAGAIN; if (nfs4_read_stateid_changed(task, &hdr->args)) @@ -6005,17 +5959,18 @@ static int _nfs4_get_security_label(struct inode *inode, void *buf, size_t buflen) { struct nfs_server *server = NFS_SERVER(inode); - struct nfs_fattr fattr; struct nfs4_label label = {0, 0, buflen, buf}; u32 bitmask[3] = { 0, 0, FATTR4_WORD2_SECURITY_LABEL }; + struct nfs_fattr fattr = { + .label = &label, + }; struct nfs4_getattr_arg arg = { .fh = NFS_FH(inode), .bitmask = bitmask, }; struct nfs4_getattr_res res = { .fattr = &fattr, - .label = &label, .server = server, }; struct rpc_message msg = { @@ -6057,8 +6012,7 @@ static int nfs4_get_security_label(struct inode *inode, void *buf, static int _nfs4_do_set_security_label(struct inode *inode, struct nfs4_label *ilabel, - struct nfs_fattr *fattr, - struct nfs4_label *olabel) + struct nfs_fattr *fattr) { struct iattr sattr = {0}; @@ -6073,7 +6027,6 @@ static int _nfs4_do_set_security_label(struct inode *inode, }; struct nfs_setattrres res = { .fattr = fattr, - .label = olabel, .server = server, }; struct rpc_message msg = { @@ -6094,15 +6047,13 @@ static int _nfs4_do_set_security_label(struct inode *inode, static int nfs4_do_set_security_label(struct inode *inode, struct nfs4_label *ilabel, - struct nfs_fattr *fattr, - struct nfs4_label *olabel) + struct nfs_fattr *fattr) { struct nfs4_exception exception = { }; int err; do { - err = _nfs4_do_set_security_label(inode, ilabel, - fattr, olabel); + err = _nfs4_do_set_security_label(inode, ilabel, fattr); trace_nfs4_set_security_label(inode, err); err = nfs4_handle_exception(NFS_SERVER(inode), err, &exception); @@ -6113,32 +6064,21 @@ static int nfs4_do_set_security_label(struct inode *inode, static int nfs4_set_security_label(struct inode *inode, const void *buf, size_t buflen) { - struct nfs4_label ilabel, *olabel = NULL; - struct nfs_fattr fattr; + struct nfs4_label ilabel = {0, 0, buflen, (char *)buf }; + struct nfs_fattr *fattr; int status; if (!nfs_server_capable(inode, NFS_CAP_SECURITY_LABEL)) return -EOPNOTSUPP; - nfs_fattr_init(&fattr); - - ilabel.pi = 0; - ilabel.lfs = 0; - ilabel.label = (char *)buf; - ilabel.len = buflen; - - olabel = nfs4_label_alloc(NFS_SERVER(inode), GFP_KERNEL); - if (IS_ERR(olabel)) { - status = -PTR_ERR(olabel); - goto out; - } + fattr = nfs_alloc_fattr_with_label(NFS_SERVER(inode)); + if (fattr == NULL) + return -ENOMEM; - status = nfs4_do_set_security_label(inode, &ilabel, &fattr, olabel); + status = nfs4_do_set_security_label(inode, &ilabel, fattr); if (status == 0) - nfs_setsecurity(inode, &fattr, olabel); + nfs_setsecurity(inode, fattr); - nfs4_label_free(olabel); -out: return status; } #endif /* CONFIG_NFS_V4_SECURITY_LABEL */ @@ -7004,7 +6944,6 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata) struct nfs4_lockdata *data = calldata; struct nfs4_state *state = data->lsp->ls_state; - dprintk("%s: begin!\n", __func__); if (nfs_wait_on_sequence(data->arg.lock_seqid, task) != 0) goto out_wait; /* Do we need to do an open_to_lock_owner? */ @@ -7038,7 +6977,7 @@ out_release_lock_seqid: nfs_release_seqid(data->arg.lock_seqid); out_wait: nfs4_sequence_done(task, &data->res.seq_res); - dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status); + dprintk("%s: ret = %d\n", __func__, data->rpc_status); } static void nfs4_lock_done(struct rpc_task *task, void *calldata) @@ -7046,8 +6985,6 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) struct nfs4_lockdata *data = calldata; struct nfs4_lock_state *lsp = data->lsp; - dprintk("%s: begin!\n", __func__); - if (!nfs4_sequence_done(task, &data->res.seq_res)) return; @@ -7081,7 +7018,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) goto out_restart; } out_done: - dprintk("%s: done, ret = %d!\n", __func__, data->rpc_status); + dprintk("%s: ret = %d!\n", __func__, data->rpc_status); return; out_restart: if (!data->cancelled) @@ -7093,7 +7030,6 @@ static void nfs4_lock_release(void *calldata) { struct nfs4_lockdata *data = calldata; - dprintk("%s: begin!\n", __func__); nfs_free_seqid(data->arg.open_seqid); if (data->cancelled && data->rpc_status == 0) { struct rpc_task *task; @@ -7107,7 +7043,6 @@ static void nfs4_lock_release(void *calldata) nfs4_put_lock_state(data->lsp); put_nfs_open_context(data->ctx); kfree(data); - dprintk("%s: done!\n", __func__); } static const struct rpc_call_ops nfs4_lock_ops = { @@ -7154,7 +7089,6 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f if (client->cl_minorversion) task_setup_data.flags |= RPC_TASK_MOVEABLE; - dprintk("%s: begin!\n", __func__); data = nfs4_alloc_lockdata(fl, nfs_file_open_context(fl->fl_file), fl->fl_u.nfs4_fl.owner, recovery_type == NFS_LOCK_NEW ? GFP_KERNEL : GFP_NOFS); @@ -7185,7 +7119,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f data->cancelled = true; trace_nfs4_set_lock(fl, state, &data->res.stateid, cmd, ret); rpc_put_task(task); - dprintk("%s: done, ret = %d!\n", __func__, ret); + dprintk("%s: ret = %d\n", __func__, ret); return ret; } @@ -8856,14 +8790,12 @@ static void nfs4_get_lease_time_prepare(struct rpc_task *task, struct nfs4_get_lease_time_data *data = (struct nfs4_get_lease_time_data *)calldata; - dprintk("--> %s\n", __func__); /* just setup sequence, do not trigger session recovery since we're invoked within one */ nfs4_setup_sequence(data->clp, &data->args->la_seq_args, &data->res->lr_seq_res, task); - dprintk("<-- %s\n", __func__); } /* @@ -8875,13 +8807,11 @@ static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata) struct nfs4_get_lease_time_data *data = (struct nfs4_get_lease_time_data *)calldata; - dprintk("--> %s\n", __func__); if (!nfs4_sequence_done(task, &data->res->lr_seq_res)) return; switch (task->tk_status) { case -NFS4ERR_DELAY: case -NFS4ERR_GRACE: - dprintk("%s Retry: tk_status %d\n", __func__, task->tk_status); rpc_delay(task, NFS4_POLL_RETRY_MIN); task->tk_status = 0; fallthrough; @@ -8889,7 +8819,6 @@ static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata) rpc_restart_call_prepare(task); return; } - dprintk("<-- %s\n", __func__); } static const struct rpc_call_ops nfs4_get_lease_time_ops = { @@ -9121,7 +9050,6 @@ int nfs4_proc_create_session(struct nfs_client *clp, const struct cred *cred) dprintk("%s client>seqid %d sessionid %u:%u:%u:%u\n", __func__, clp->cl_seqid, ptr[0], ptr[1], ptr[2], ptr[3]); out: - dprintk("<-- %s\n", __func__); return status; } @@ -9139,8 +9067,6 @@ int nfs4_proc_destroy_session(struct nfs4_session *session, }; int status = 0; - dprintk("--> nfs4_proc_destroy_session\n"); - /* session is still being setup */ if (!test_and_clear_bit(NFS4_SESSION_ESTABLISHED, &session->session_state)) return 0; @@ -9152,8 +9078,6 @@ int nfs4_proc_destroy_session(struct nfs4_session *session, if (status) dprintk("NFS: Got error %d from the server on DESTROY_SESSION. " "Session has been destroyed regardless...\n", status); - - dprintk("<-- nfs4_proc_destroy_session\n"); return status; } @@ -9201,7 +9125,7 @@ static void nfs41_sequence_call_done(struct rpc_task *task, void *data) if (task->tk_status < 0) { dprintk("%s ERROR %d\n", __func__, task->tk_status); if (refcount_read(&clp->cl_count) == 1) - goto out; + return; if (nfs41_sequence_handle_errors(task, clp) == -EAGAIN) { rpc_restart_call_prepare(task); @@ -9209,8 +9133,6 @@ static void nfs41_sequence_call_done(struct rpc_task *task, void *data) } } dprintk("%s rpc_cred %p\n", __func__, task->tk_msg.rpc_cred); -out: - dprintk("<-- %s\n", __func__); } static void nfs41_sequence_prepare(struct rpc_task *task, void *data) @@ -9357,7 +9279,6 @@ static void nfs4_reclaim_complete_done(struct rpc_task *task, void *data) struct nfs_client *clp = calldata->clp; struct nfs4_sequence_res *res = &calldata->res.seq_res; - dprintk("--> %s\n", __func__); if (!nfs41_sequence_done(task, res)) return; @@ -9366,7 +9287,6 @@ static void nfs4_reclaim_complete_done(struct rpc_task *task, void *data) rpc_restart_call_prepare(task); return; } - dprintk("<-- %s\n", __func__); } static void nfs4_free_reclaim_complete_data(void *data) @@ -9401,7 +9321,6 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp, }; int status = -ENOMEM; - dprintk("--> %s\n", __func__); calldata = kzalloc(sizeof(*calldata), GFP_NOFS); if (calldata == NULL) goto out; @@ -9424,19 +9343,15 @@ nfs4_layoutget_prepare(struct rpc_task *task, void *calldata) struct nfs4_layoutget *lgp = calldata; struct nfs_server *server = NFS_SERVER(lgp->args.inode); - dprintk("--> %s\n", __func__); nfs4_setup_sequence(server->nfs_client, &lgp->args.seq_args, &lgp->res.seq_res, task); - dprintk("<-- %s\n", __func__); } static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) { struct nfs4_layoutget *lgp = calldata; - dprintk("--> %s\n", __func__); nfs41_sequence_process(task, &lgp->res.seq_res); - dprintk("<-- %s\n", __func__); } static int @@ -9525,7 +9440,6 @@ nfs4_layoutget_handle_exception(struct rpc_task *task, status = err; } out: - dprintk("<-- %s\n", __func__); return status; } @@ -9539,10 +9453,8 @@ static void nfs4_layoutget_release(void *calldata) { struct nfs4_layoutget *lgp = calldata; - dprintk("--> %s\n", __func__); nfs4_sequence_free_slot(&lgp->res.seq_res); pnfs_layoutget_free(lgp); - dprintk("<-- %s\n", __func__); } static const struct rpc_call_ops nfs4_layoutget_call_ops = { @@ -9578,8 +9490,6 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout) }; int status = 0; - dprintk("--> %s\n", __func__); - nfs4_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0, 0); task = rpc_run_task(&task_setup_data); @@ -9615,7 +9525,6 @@ nfs4_layoutreturn_prepare(struct rpc_task *task, void *calldata) { struct nfs4_layoutreturn *lrp = calldata; - dprintk("--> %s\n", __func__); nfs4_setup_sequence(lrp->clp, &lrp->args.seq_args, &lrp->res.seq_res, @@ -9629,8 +9538,6 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) struct nfs4_layoutreturn *lrp = calldata; struct nfs_server *server; - dprintk("--> %s\n", __func__); - if (!nfs41_sequence_process(task, &lrp->res.seq_res)) return; @@ -9661,7 +9568,6 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) break; goto out_restart; } - dprintk("<-- %s\n", __func__); return; out_restart: task->tk_status = 0; @@ -9674,7 +9580,6 @@ static void nfs4_layoutreturn_release(void *calldata) struct nfs4_layoutreturn *lrp = calldata; struct pnfs_layout_hdr *lo = lrp->args.layout; - dprintk("--> %s\n", __func__); pnfs_layoutreturn_free_lsegs(lo, &lrp->args.stateid, &lrp->args.range, lrp->res.lrs_present ? &lrp->res.stateid : NULL); nfs4_sequence_free_slot(&lrp->res.seq_res); @@ -9684,7 +9589,6 @@ static void nfs4_layoutreturn_release(void *calldata) nfs_iput_and_deactive(lrp->inode); put_cred(lrp->cred); kfree(calldata); - dprintk("<-- %s\n", __func__); } static const struct rpc_call_ops nfs4_layoutreturn_call_ops = { @@ -9715,7 +9619,6 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync) NFS_SP4_MACH_CRED_PNFS_CLEANUP, &task_setup_data.rpc_client, &msg); - dprintk("--> %s\n", __func__); lrp->inode = nfs_igrab_and_active(lrp->args.inode); if (!sync) { if (!lrp->inode) { @@ -9762,7 +9665,6 @@ _nfs4_proc_getdeviceinfo(struct nfs_server *server, }; int status; - dprintk("--> %s\n", __func__); status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); if (res.notification & ~args.notify_types) dprintk("%s: unsupported notification\n", __func__); @@ -9934,7 +9836,6 @@ _nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, msg.rpc_cred = cred; } - dprintk("--> %s\n", __func__); nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 0); status = nfs4_call_sync_custom(&task_setup); dprintk("<-- %s status=%d\n", __func__, status); @@ -10158,6 +10059,10 @@ static void nfs41_free_stateid_done(struct rpc_task *task, void *calldata) static void nfs41_free_stateid_release(void *calldata) { + struct nfs_free_stateid_data *data = calldata; + struct nfs_client *clp = data->server->nfs_client; + + nfs_put_client(clp); kfree(calldata); } @@ -10194,6 +10099,10 @@ static int nfs41_free_stateid(struct nfs_server *server, }; struct nfs_free_stateid_data *data; struct rpc_task *task; + struct nfs_client *clp = server->nfs_client; + + if (!refcount_inc_not_zero(&clp->cl_count)) + return -EIO; nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_STATEID, &task_setup.rpc_client, &msg); diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c index 4145a0138907..5db460476bf2 100644 --- a/fs/nfs/nfs4session.c +++ b/fs/nfs/nfs4session.c @@ -511,12 +511,16 @@ void nfs41_update_target_slotid(struct nfs4_slot_table *tbl, struct nfs4_slot *slot, struct nfs4_sequence_res *res) { + u32 target_highest_slotid = min(res->sr_target_highest_slotid, + NFS4_MAX_SLOTID); + u32 highest_slotid = min(res->sr_highest_slotid, NFS4_MAX_SLOTID); + spin_lock(&tbl->slot_tbl_lock); - if (!nfs41_is_outlier_target_slotid(tbl, res->sr_target_highest_slotid)) - nfs41_set_target_slotid_locked(tbl, res->sr_target_highest_slotid); + if (!nfs41_is_outlier_target_slotid(tbl, target_highest_slotid)) + nfs41_set_target_slotid_locked(tbl, target_highest_slotid); if (tbl->generation == slot->generation) - nfs41_set_server_slotid_locked(tbl, res->sr_highest_slotid); - nfs41_set_max_slotid_locked(tbl, res->sr_target_highest_slotid); + nfs41_set_server_slotid_locked(tbl, highest_slotid); + nfs41_set_max_slotid_locked(tbl, target_highest_slotid); spin_unlock(&tbl->slot_tbl_lock); } diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h index 3de425f59b3a..351616c61df5 100644 --- a/fs/nfs/nfs4session.h +++ b/fs/nfs/nfs4session.h @@ -12,6 +12,7 @@ #define NFS4_DEF_SLOT_TABLE_SIZE (64U) #define NFS4_DEF_CB_SLOT_TABLE_SIZE (16U) #define NFS4_MAX_SLOT_TABLE (1024U) +#define NFS4_MAX_SLOTID (NFS4_MAX_SLOT_TABLE - 1U) #define NFS4_NO_SLOT ((u32)-1) #if IS_ENABLED(CONFIG_NFS_V4) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index f22818a80c2c..ecc4594299d6 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1194,10 +1194,7 @@ static int nfs4_run_state_manager(void *); static void nfs4_clear_state_manager_bit(struct nfs_client *clp) { - smp_mb__before_atomic(); - clear_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state); - smp_mb__after_atomic(); - wake_up_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING); + clear_and_wake_up_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state); rpc_wake_up(&clp->cl_rpcwaitq); } diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 7a2567aa2b86..6ee6ad3674a2 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -9,322 +9,10 @@ #define _TRACE_NFS4_H #include <linux/tracepoint.h> +#include <trace/events/sunrpc_base.h> -TRACE_DEFINE_ENUM(EPERM); -TRACE_DEFINE_ENUM(ENOENT); -TRACE_DEFINE_ENUM(EIO); -TRACE_DEFINE_ENUM(ENXIO); -TRACE_DEFINE_ENUM(EACCES); -TRACE_DEFINE_ENUM(EEXIST); -TRACE_DEFINE_ENUM(EXDEV); -TRACE_DEFINE_ENUM(ENOTDIR); -TRACE_DEFINE_ENUM(EISDIR); -TRACE_DEFINE_ENUM(EFBIG); -TRACE_DEFINE_ENUM(ENOSPC); -TRACE_DEFINE_ENUM(EROFS); -TRACE_DEFINE_ENUM(EMLINK); -TRACE_DEFINE_ENUM(ENAMETOOLONG); -TRACE_DEFINE_ENUM(ENOTEMPTY); -TRACE_DEFINE_ENUM(EDQUOT); -TRACE_DEFINE_ENUM(ESTALE); -TRACE_DEFINE_ENUM(EBADHANDLE); -TRACE_DEFINE_ENUM(EBADCOOKIE); -TRACE_DEFINE_ENUM(ENOTSUPP); -TRACE_DEFINE_ENUM(ETOOSMALL); -TRACE_DEFINE_ENUM(EREMOTEIO); -TRACE_DEFINE_ENUM(EBADTYPE); -TRACE_DEFINE_ENUM(EAGAIN); -TRACE_DEFINE_ENUM(ELOOP); -TRACE_DEFINE_ENUM(EOPNOTSUPP); -TRACE_DEFINE_ENUM(EDEADLK); -TRACE_DEFINE_ENUM(ENOMEM); -TRACE_DEFINE_ENUM(EKEYEXPIRED); -TRACE_DEFINE_ENUM(ETIMEDOUT); -TRACE_DEFINE_ENUM(ERESTARTSYS); -TRACE_DEFINE_ENUM(ECONNREFUSED); -TRACE_DEFINE_ENUM(ECONNRESET); -TRACE_DEFINE_ENUM(ENETUNREACH); -TRACE_DEFINE_ENUM(EHOSTUNREACH); -TRACE_DEFINE_ENUM(EHOSTDOWN); -TRACE_DEFINE_ENUM(EPIPE); -TRACE_DEFINE_ENUM(EPFNOSUPPORT); -TRACE_DEFINE_ENUM(EPROTONOSUPPORT); - -TRACE_DEFINE_ENUM(NFS4_OK); -TRACE_DEFINE_ENUM(NFS4ERR_ACCESS); -TRACE_DEFINE_ENUM(NFS4ERR_ATTRNOTSUPP); -TRACE_DEFINE_ENUM(NFS4ERR_ADMIN_REVOKED); -TRACE_DEFINE_ENUM(NFS4ERR_BACK_CHAN_BUSY); -TRACE_DEFINE_ENUM(NFS4ERR_BADCHAR); -TRACE_DEFINE_ENUM(NFS4ERR_BADHANDLE); -TRACE_DEFINE_ENUM(NFS4ERR_BADIOMODE); -TRACE_DEFINE_ENUM(NFS4ERR_BADLAYOUT); -TRACE_DEFINE_ENUM(NFS4ERR_BADLABEL); -TRACE_DEFINE_ENUM(NFS4ERR_BADNAME); -TRACE_DEFINE_ENUM(NFS4ERR_BADOWNER); -TRACE_DEFINE_ENUM(NFS4ERR_BADSESSION); -TRACE_DEFINE_ENUM(NFS4ERR_BADSLOT); -TRACE_DEFINE_ENUM(NFS4ERR_BADTYPE); -TRACE_DEFINE_ENUM(NFS4ERR_BADXDR); -TRACE_DEFINE_ENUM(NFS4ERR_BAD_COOKIE); -TRACE_DEFINE_ENUM(NFS4ERR_BAD_HIGH_SLOT); -TRACE_DEFINE_ENUM(NFS4ERR_BAD_RANGE); -TRACE_DEFINE_ENUM(NFS4ERR_BAD_SEQID); -TRACE_DEFINE_ENUM(NFS4ERR_BAD_SESSION_DIGEST); -TRACE_DEFINE_ENUM(NFS4ERR_BAD_STATEID); -TRACE_DEFINE_ENUM(NFS4ERR_CB_PATH_DOWN); -TRACE_DEFINE_ENUM(NFS4ERR_CLID_INUSE); -TRACE_DEFINE_ENUM(NFS4ERR_CLIENTID_BUSY); -TRACE_DEFINE_ENUM(NFS4ERR_COMPLETE_ALREADY); -TRACE_DEFINE_ENUM(NFS4ERR_CONN_NOT_BOUND_TO_SESSION); -TRACE_DEFINE_ENUM(NFS4ERR_DEADLOCK); -TRACE_DEFINE_ENUM(NFS4ERR_DEADSESSION); -TRACE_DEFINE_ENUM(NFS4ERR_DELAY); -TRACE_DEFINE_ENUM(NFS4ERR_DELEG_ALREADY_WANTED); -TRACE_DEFINE_ENUM(NFS4ERR_DELEG_REVOKED); -TRACE_DEFINE_ENUM(NFS4ERR_DENIED); -TRACE_DEFINE_ENUM(NFS4ERR_DIRDELEG_UNAVAIL); -TRACE_DEFINE_ENUM(NFS4ERR_DQUOT); -TRACE_DEFINE_ENUM(NFS4ERR_ENCR_ALG_UNSUPP); -TRACE_DEFINE_ENUM(NFS4ERR_EXIST); -TRACE_DEFINE_ENUM(NFS4ERR_EXPIRED); -TRACE_DEFINE_ENUM(NFS4ERR_FBIG); -TRACE_DEFINE_ENUM(NFS4ERR_FHEXPIRED); -TRACE_DEFINE_ENUM(NFS4ERR_FILE_OPEN); -TRACE_DEFINE_ENUM(NFS4ERR_GRACE); -TRACE_DEFINE_ENUM(NFS4ERR_HASH_ALG_UNSUPP); -TRACE_DEFINE_ENUM(NFS4ERR_INVAL); -TRACE_DEFINE_ENUM(NFS4ERR_IO); -TRACE_DEFINE_ENUM(NFS4ERR_ISDIR); -TRACE_DEFINE_ENUM(NFS4ERR_LAYOUTTRYLATER); -TRACE_DEFINE_ENUM(NFS4ERR_LAYOUTUNAVAILABLE); -TRACE_DEFINE_ENUM(NFS4ERR_LEASE_MOVED); -TRACE_DEFINE_ENUM(NFS4ERR_LOCKED); -TRACE_DEFINE_ENUM(NFS4ERR_LOCKS_HELD); -TRACE_DEFINE_ENUM(NFS4ERR_LOCK_RANGE); -TRACE_DEFINE_ENUM(NFS4ERR_MINOR_VERS_MISMATCH); -TRACE_DEFINE_ENUM(NFS4ERR_MLINK); -TRACE_DEFINE_ENUM(NFS4ERR_MOVED); -TRACE_DEFINE_ENUM(NFS4ERR_NAMETOOLONG); -TRACE_DEFINE_ENUM(NFS4ERR_NOENT); -TRACE_DEFINE_ENUM(NFS4ERR_NOFILEHANDLE); -TRACE_DEFINE_ENUM(NFS4ERR_NOMATCHING_LAYOUT); -TRACE_DEFINE_ENUM(NFS4ERR_NOSPC); -TRACE_DEFINE_ENUM(NFS4ERR_NOTDIR); -TRACE_DEFINE_ENUM(NFS4ERR_NOTEMPTY); -TRACE_DEFINE_ENUM(NFS4ERR_NOTSUPP); -TRACE_DEFINE_ENUM(NFS4ERR_NOT_ONLY_OP); -TRACE_DEFINE_ENUM(NFS4ERR_NOT_SAME); -TRACE_DEFINE_ENUM(NFS4ERR_NO_GRACE); -TRACE_DEFINE_ENUM(NFS4ERR_NXIO); -TRACE_DEFINE_ENUM(NFS4ERR_OLD_STATEID); -TRACE_DEFINE_ENUM(NFS4ERR_OPENMODE); -TRACE_DEFINE_ENUM(NFS4ERR_OP_ILLEGAL); -TRACE_DEFINE_ENUM(NFS4ERR_OP_NOT_IN_SESSION); -TRACE_DEFINE_ENUM(NFS4ERR_PERM); -TRACE_DEFINE_ENUM(NFS4ERR_PNFS_IO_HOLE); -TRACE_DEFINE_ENUM(NFS4ERR_PNFS_NO_LAYOUT); -TRACE_DEFINE_ENUM(NFS4ERR_RECALLCONFLICT); -TRACE_DEFINE_ENUM(NFS4ERR_RECLAIM_BAD); -TRACE_DEFINE_ENUM(NFS4ERR_RECLAIM_CONFLICT); -TRACE_DEFINE_ENUM(NFS4ERR_REJECT_DELEG); -TRACE_DEFINE_ENUM(NFS4ERR_REP_TOO_BIG); -TRACE_DEFINE_ENUM(NFS4ERR_REP_TOO_BIG_TO_CACHE); -TRACE_DEFINE_ENUM(NFS4ERR_REQ_TOO_BIG); -TRACE_DEFINE_ENUM(NFS4ERR_RESOURCE); -TRACE_DEFINE_ENUM(NFS4ERR_RESTOREFH); -TRACE_DEFINE_ENUM(NFS4ERR_RETRY_UNCACHED_REP); -TRACE_DEFINE_ENUM(NFS4ERR_RETURNCONFLICT); -TRACE_DEFINE_ENUM(NFS4ERR_ROFS); -TRACE_DEFINE_ENUM(NFS4ERR_SAME); -TRACE_DEFINE_ENUM(NFS4ERR_SHARE_DENIED); -TRACE_DEFINE_ENUM(NFS4ERR_SEQUENCE_POS); -TRACE_DEFINE_ENUM(NFS4ERR_SEQ_FALSE_RETRY); -TRACE_DEFINE_ENUM(NFS4ERR_SEQ_MISORDERED); -TRACE_DEFINE_ENUM(NFS4ERR_SERVERFAULT); -TRACE_DEFINE_ENUM(NFS4ERR_STALE); -TRACE_DEFINE_ENUM(NFS4ERR_STALE_CLIENTID); -TRACE_DEFINE_ENUM(NFS4ERR_STALE_STATEID); -TRACE_DEFINE_ENUM(NFS4ERR_SYMLINK); -TRACE_DEFINE_ENUM(NFS4ERR_TOOSMALL); -TRACE_DEFINE_ENUM(NFS4ERR_TOO_MANY_OPS); -TRACE_DEFINE_ENUM(NFS4ERR_UNKNOWN_LAYOUTTYPE); -TRACE_DEFINE_ENUM(NFS4ERR_UNSAFE_COMPOUND); -TRACE_DEFINE_ENUM(NFS4ERR_WRONGSEC); -TRACE_DEFINE_ENUM(NFS4ERR_WRONG_CRED); -TRACE_DEFINE_ENUM(NFS4ERR_WRONG_TYPE); -TRACE_DEFINE_ENUM(NFS4ERR_XDEV); - -TRACE_DEFINE_ENUM(NFS4ERR_RESET_TO_MDS); -TRACE_DEFINE_ENUM(NFS4ERR_RESET_TO_PNFS); - -#define show_nfsv4_errors(error) \ - __print_symbolic(error, \ - { NFS4_OK, "OK" }, \ - /* Mapped by nfs4_stat_to_errno() */ \ - { EPERM, "EPERM" }, \ - { ENOENT, "ENOENT" }, \ - { EIO, "EIO" }, \ - { ENXIO, "ENXIO" }, \ - { EACCES, "EACCES" }, \ - { EEXIST, "EEXIST" }, \ - { EXDEV, "EXDEV" }, \ - { ENOTDIR, "ENOTDIR" }, \ - { EISDIR, "EISDIR" }, \ - { EFBIG, "EFBIG" }, \ - { ENOSPC, "ENOSPC" }, \ - { EROFS, "EROFS" }, \ - { EMLINK, "EMLINK" }, \ - { ENAMETOOLONG, "ENAMETOOLONG" }, \ - { ENOTEMPTY, "ENOTEMPTY" }, \ - { EDQUOT, "EDQUOT" }, \ - { ESTALE, "ESTALE" }, \ - { EBADHANDLE, "EBADHANDLE" }, \ - { EBADCOOKIE, "EBADCOOKIE" }, \ - { ENOTSUPP, "ENOTSUPP" }, \ - { ETOOSMALL, "ETOOSMALL" }, \ - { EREMOTEIO, "EREMOTEIO" }, \ - { EBADTYPE, "EBADTYPE" }, \ - { EAGAIN, "EAGAIN" }, \ - { ELOOP, "ELOOP" }, \ - { EOPNOTSUPP, "EOPNOTSUPP" }, \ - { EDEADLK, "EDEADLK" }, \ - /* RPC errors */ \ - { ENOMEM, "ENOMEM" }, \ - { EKEYEXPIRED, "EKEYEXPIRED" }, \ - { ETIMEDOUT, "ETIMEDOUT" }, \ - { ERESTARTSYS, "ERESTARTSYS" }, \ - { ECONNREFUSED, "ECONNREFUSED" }, \ - { ECONNRESET, "ECONNRESET" }, \ - { ENETUNREACH, "ENETUNREACH" }, \ - { EHOSTUNREACH, "EHOSTUNREACH" }, \ - { EHOSTDOWN, "EHOSTDOWN" }, \ - { EPIPE, "EPIPE" }, \ - { EPFNOSUPPORT, "EPFNOSUPPORT" }, \ - { EPROTONOSUPPORT, "EPROTONOSUPPORT" }, \ - /* NFSv4 native errors */ \ - { NFS4ERR_ACCESS, "ACCESS" }, \ - { NFS4ERR_ATTRNOTSUPP, "ATTRNOTSUPP" }, \ - { NFS4ERR_ADMIN_REVOKED, "ADMIN_REVOKED" }, \ - { NFS4ERR_BACK_CHAN_BUSY, "BACK_CHAN_BUSY" }, \ - { NFS4ERR_BADCHAR, "BADCHAR" }, \ - { NFS4ERR_BADHANDLE, "BADHANDLE" }, \ - { NFS4ERR_BADIOMODE, "BADIOMODE" }, \ - { NFS4ERR_BADLAYOUT, "BADLAYOUT" }, \ - { NFS4ERR_BADLABEL, "BADLABEL" }, \ - { NFS4ERR_BADNAME, "BADNAME" }, \ - { NFS4ERR_BADOWNER, "BADOWNER" }, \ - { NFS4ERR_BADSESSION, "BADSESSION" }, \ - { NFS4ERR_BADSLOT, "BADSLOT" }, \ - { NFS4ERR_BADTYPE, "BADTYPE" }, \ - { NFS4ERR_BADXDR, "BADXDR" }, \ - { NFS4ERR_BAD_COOKIE, "BAD_COOKIE" }, \ - { NFS4ERR_BAD_HIGH_SLOT, "BAD_HIGH_SLOT" }, \ - { NFS4ERR_BAD_RANGE, "BAD_RANGE" }, \ - { NFS4ERR_BAD_SEQID, "BAD_SEQID" }, \ - { NFS4ERR_BAD_SESSION_DIGEST, "BAD_SESSION_DIGEST" }, \ - { NFS4ERR_BAD_STATEID, "BAD_STATEID" }, \ - { NFS4ERR_CB_PATH_DOWN, "CB_PATH_DOWN" }, \ - { NFS4ERR_CLID_INUSE, "CLID_INUSE" }, \ - { NFS4ERR_CLIENTID_BUSY, "CLIENTID_BUSY" }, \ - { NFS4ERR_COMPLETE_ALREADY, "COMPLETE_ALREADY" }, \ - { NFS4ERR_CONN_NOT_BOUND_TO_SESSION, \ - "CONN_NOT_BOUND_TO_SESSION" }, \ - { NFS4ERR_DEADLOCK, "DEADLOCK" }, \ - { NFS4ERR_DEADSESSION, "DEAD_SESSION" }, \ - { NFS4ERR_DELAY, "DELAY" }, \ - { NFS4ERR_DELEG_ALREADY_WANTED, \ - "DELEG_ALREADY_WANTED" }, \ - { NFS4ERR_DELEG_REVOKED, "DELEG_REVOKED" }, \ - { NFS4ERR_DENIED, "DENIED" }, \ - { NFS4ERR_DIRDELEG_UNAVAIL, "DIRDELEG_UNAVAIL" }, \ - { NFS4ERR_DQUOT, "DQUOT" }, \ - { NFS4ERR_ENCR_ALG_UNSUPP, "ENCR_ALG_UNSUPP" }, \ - { NFS4ERR_EXIST, "EXIST" }, \ - { NFS4ERR_EXPIRED, "EXPIRED" }, \ - { NFS4ERR_FBIG, "FBIG" }, \ - { NFS4ERR_FHEXPIRED, "FHEXPIRED" }, \ - { NFS4ERR_FILE_OPEN, "FILE_OPEN" }, \ - { NFS4ERR_GRACE, "GRACE" }, \ - { NFS4ERR_HASH_ALG_UNSUPP, "HASH_ALG_UNSUPP" }, \ - { NFS4ERR_INVAL, "INVAL" }, \ - { NFS4ERR_IO, "IO" }, \ - { NFS4ERR_ISDIR, "ISDIR" }, \ - { NFS4ERR_LAYOUTTRYLATER, "LAYOUTTRYLATER" }, \ - { NFS4ERR_LAYOUTUNAVAILABLE, "LAYOUTUNAVAILABLE" }, \ - { NFS4ERR_LEASE_MOVED, "LEASE_MOVED" }, \ - { NFS4ERR_LOCKED, "LOCKED" }, \ - { NFS4ERR_LOCKS_HELD, "LOCKS_HELD" }, \ - { NFS4ERR_LOCK_RANGE, "LOCK_RANGE" }, \ - { NFS4ERR_MINOR_VERS_MISMATCH, "MINOR_VERS_MISMATCH" }, \ - { NFS4ERR_MLINK, "MLINK" }, \ - { NFS4ERR_MOVED, "MOVED" }, \ - { NFS4ERR_NAMETOOLONG, "NAMETOOLONG" }, \ - { NFS4ERR_NOENT, "NOENT" }, \ - { NFS4ERR_NOFILEHANDLE, "NOFILEHANDLE" }, \ - { NFS4ERR_NOMATCHING_LAYOUT, "NOMATCHING_LAYOUT" }, \ - { NFS4ERR_NOSPC, "NOSPC" }, \ - { NFS4ERR_NOTDIR, "NOTDIR" }, \ - { NFS4ERR_NOTEMPTY, "NOTEMPTY" }, \ - { NFS4ERR_NOTSUPP, "NOTSUPP" }, \ - { NFS4ERR_NOT_ONLY_OP, "NOT_ONLY_OP" }, \ - { NFS4ERR_NOT_SAME, "NOT_SAME" }, \ - { NFS4ERR_NO_GRACE, "NO_GRACE" }, \ - { NFS4ERR_NXIO, "NXIO" }, \ - { NFS4ERR_OLD_STATEID, "OLD_STATEID" }, \ - { NFS4ERR_OPENMODE, "OPENMODE" }, \ - { NFS4ERR_OP_ILLEGAL, "OP_ILLEGAL" }, \ - { NFS4ERR_OP_NOT_IN_SESSION, "OP_NOT_IN_SESSION" }, \ - { NFS4ERR_PERM, "PERM" }, \ - { NFS4ERR_PNFS_IO_HOLE, "PNFS_IO_HOLE" }, \ - { NFS4ERR_PNFS_NO_LAYOUT, "PNFS_NO_LAYOUT" }, \ - { NFS4ERR_RECALLCONFLICT, "RECALLCONFLICT" }, \ - { NFS4ERR_RECLAIM_BAD, "RECLAIM_BAD" }, \ - { NFS4ERR_RECLAIM_CONFLICT, "RECLAIM_CONFLICT" }, \ - { NFS4ERR_REJECT_DELEG, "REJECT_DELEG" }, \ - { NFS4ERR_REP_TOO_BIG, "REP_TOO_BIG" }, \ - { NFS4ERR_REP_TOO_BIG_TO_CACHE, \ - "REP_TOO_BIG_TO_CACHE" }, \ - { NFS4ERR_REQ_TOO_BIG, "REQ_TOO_BIG" }, \ - { NFS4ERR_RESOURCE, "RESOURCE" }, \ - { NFS4ERR_RESTOREFH, "RESTOREFH" }, \ - { NFS4ERR_RETRY_UNCACHED_REP, "RETRY_UNCACHED_REP" }, \ - { NFS4ERR_RETURNCONFLICT, "RETURNCONFLICT" }, \ - { NFS4ERR_ROFS, "ROFS" }, \ - { NFS4ERR_SAME, "SAME" }, \ - { NFS4ERR_SHARE_DENIED, "SHARE_DENIED" }, \ - { NFS4ERR_SEQUENCE_POS, "SEQUENCE_POS" }, \ - { NFS4ERR_SEQ_FALSE_RETRY, "SEQ_FALSE_RETRY" }, \ - { NFS4ERR_SEQ_MISORDERED, "SEQ_MISORDERED" }, \ - { NFS4ERR_SERVERFAULT, "SERVERFAULT" }, \ - { NFS4ERR_STALE, "STALE" }, \ - { NFS4ERR_STALE_CLIENTID, "STALE_CLIENTID" }, \ - { NFS4ERR_STALE_STATEID, "STALE_STATEID" }, \ - { NFS4ERR_SYMLINK, "SYMLINK" }, \ - { NFS4ERR_TOOSMALL, "TOOSMALL" }, \ - { NFS4ERR_TOO_MANY_OPS, "TOO_MANY_OPS" }, \ - { NFS4ERR_UNKNOWN_LAYOUTTYPE, "UNKNOWN_LAYOUTTYPE" }, \ - { NFS4ERR_UNSAFE_COMPOUND, "UNSAFE_COMPOUND" }, \ - { NFS4ERR_WRONGSEC, "WRONGSEC" }, \ - { NFS4ERR_WRONG_CRED, "WRONG_CRED" }, \ - { NFS4ERR_WRONG_TYPE, "WRONG_TYPE" }, \ - { NFS4ERR_XDEV, "XDEV" }, \ - /* ***** Internal to Linux NFS client ***** */ \ - { NFS4ERR_RESET_TO_MDS, "RESET_TO_MDS" }, \ - { NFS4ERR_RESET_TO_PNFS, "RESET_TO_PNFS" }) - -#define show_open_flags(flags) \ - __print_flags(flags, "|", \ - { O_CREAT, "O_CREAT" }, \ - { O_EXCL, "O_EXCL" }, \ - { O_TRUNC, "O_TRUNC" }, \ - { O_DIRECT, "O_DIRECT" }) - -#define show_fmode_flags(mode) \ - __print_flags(mode, "|", \ - { ((__force unsigned long)FMODE_READ), "READ" }, \ - { ((__force unsigned long)FMODE_WRITE), "WRITE" }, \ - { ((__force unsigned long)FMODE_EXEC), "EXEC" }) +#include <trace/events/fs.h> +#include <trace/events/nfs.h> #define show_nfs_fattr_flags(valid) \ __print_flags((unsigned long)valid, "|", \ @@ -365,7 +53,7 @@ DECLARE_EVENT_CLASS(nfs4_clientid_event, TP_printk( "error=%ld (%s) dstaddr=%s", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), __get_str(dstaddr) ) ); @@ -389,29 +77,6 @@ DEFINE_NFS4_CLIENTID_EVENT(nfs4_bind_conn_to_session); DEFINE_NFS4_CLIENTID_EVENT(nfs4_sequence); DEFINE_NFS4_CLIENTID_EVENT(nfs4_reclaim_complete); -#define show_nfs4_sequence_status_flags(status) \ - __print_flags((unsigned long)status, "|", \ - { SEQ4_STATUS_CB_PATH_DOWN, "CB_PATH_DOWN" }, \ - { SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRING, \ - "CB_GSS_CONTEXTS_EXPIRING" }, \ - { SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRED, \ - "CB_GSS_CONTEXTS_EXPIRED" }, \ - { SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED, \ - "EXPIRED_ALL_STATE_REVOKED" }, \ - { SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED, \ - "EXPIRED_SOME_STATE_REVOKED" }, \ - { SEQ4_STATUS_ADMIN_STATE_REVOKED, \ - "ADMIN_STATE_REVOKED" }, \ - { SEQ4_STATUS_RECALLABLE_STATE_REVOKED, \ - "RECALLABLE_STATE_REVOKED" }, \ - { SEQ4_STATUS_LEASE_MOVED, "LEASE_MOVED" }, \ - { SEQ4_STATUS_RESTART_RECLAIM_NEEDED, \ - "RESTART_RECLAIM_NEEDED" }, \ - { SEQ4_STATUS_CB_PATH_DOWN_SESSION, \ - "CB_PATH_DOWN_SESSION" }, \ - { SEQ4_STATUS_BACKCHANNEL_FAULT, \ - "BACKCHANNEL_FAULT" }) - TRACE_EVENT(nfs4_sequence_done, TP_PROTO( const struct nfs4_session *session, @@ -425,7 +90,7 @@ TRACE_EVENT(nfs4_sequence_done, __field(unsigned int, seq_nr) __field(unsigned int, highest_slotid) __field(unsigned int, target_highest_slotid) - __field(unsigned int, status_flags) + __field(unsigned long, status_flags) __field(unsigned long, error) ), @@ -444,16 +109,16 @@ TRACE_EVENT(nfs4_sequence_done, TP_printk( "error=%ld (%s) session=0x%08x slot_nr=%u seq_nr=%u " "highest_slotid=%u target_highest_slotid=%u " - "status_flags=%u (%s)", + "status_flags=0x%lx (%s)", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), __entry->session, __entry->slot_nr, __entry->seq_nr, __entry->highest_slotid, __entry->target_highest_slotid, __entry->status_flags, - show_nfs4_sequence_status_flags(__entry->status_flags) + show_nfs4_seq4_status(__entry->status_flags) ) ); @@ -490,7 +155,7 @@ TRACE_EVENT(nfs4_cb_sequence, "error=%ld (%s) session=0x%08x slot_nr=%u seq_nr=%u " "highest_slotid=%u", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), __entry->session, __entry->slot_nr, __entry->seq_nr, @@ -527,7 +192,7 @@ TRACE_EVENT(nfs4_cb_seqid_err, "error=%ld (%s) session=0x%08x slot_nr=%u seq_nr=%u " "highest_slotid=%u", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), __entry->session, __entry->slot_nr, __entry->seq_nr, @@ -535,6 +200,49 @@ TRACE_EVENT(nfs4_cb_seqid_err, ) ); +TRACE_EVENT(nfs4_cb_offload, + TP_PROTO( + const struct nfs_fh *cb_fh, + const nfs4_stateid *cb_stateid, + uint64_t cb_count, + int cb_error, + int cb_how_stable + ), + + TP_ARGS(cb_fh, cb_stateid, cb_count, cb_error, + cb_how_stable), + + TP_STRUCT__entry( + __field(unsigned long, error) + __field(u32, fhandle) + __field(loff_t, cb_count) + __field(int, cb_how) + __field(int, cb_stateid_seq) + __field(u32, cb_stateid_hash) + ), + + TP_fast_assign( + __entry->error = cb_error < 0 ? -cb_error : 0; + __entry->fhandle = nfs_fhandle_hash(cb_fh); + __entry->cb_stateid_seq = + be32_to_cpu(cb_stateid->seqid); + __entry->cb_stateid_hash = + nfs_stateid_hash(cb_stateid); + __entry->cb_count = cb_count; + __entry->cb_how = cb_how_stable; + ), + + TP_printk( + "error=%ld (%s) fhandle=0x%08x cb_stateid=%d:0x%08x " + "cb_count=%llu cb_how=%s", + -__entry->error, + show_nfs4_status(__entry->error), + __entry->fhandle, + __entry->cb_stateid_seq, __entry->cb_stateid_hash, + __entry->cb_count, + show_nfs_stable_how(__entry->cb_how) + ) +); #endif /* CONFIG_NFS_V4_1 */ TRACE_EVENT(nfs4_setup_sequence, @@ -661,7 +369,7 @@ TRACE_EVENT(nfs4_state_mgr_failed, "hostname=%s clp state=%s error=%ld (%s) section=%s", __get_str(hostname), show_nfs4_clp_state(__entry->state), -__entry->error, - show_nfsv4_errors(__entry->error), __get_str(section) + show_nfs4_status(__entry->error), __get_str(section) ) ) @@ -694,8 +402,8 @@ TRACE_EVENT(nfs4_xdr_bad_operation, __entry->expected = expected; ), - TP_printk( - "task:%u@%d xid=0x%08x operation=%u, expected=%u", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " xid=0x%08x operation=%u, expected=%u", __entry->task_id, __entry->client_id, __entry->xid, __entry->op, __entry->expected ) @@ -729,10 +437,10 @@ DECLARE_EVENT_CLASS(nfs4_xdr_event, __entry->error = error; ), - TP_printk( - "task:%u@%d xid=0x%08x error=%ld (%s) operation=%u", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " xid=0x%08x error=%ld (%s) operation=%u", __entry->task_id, __entry->client_id, __entry->xid, - -__entry->error, show_nfsv4_errors(__entry->error), + -__entry->error, show_nfs4_status(__entry->error), __entry->op ) ); @@ -793,8 +501,8 @@ DECLARE_EVENT_CLASS(nfs4_open_event, TP_STRUCT__entry( __field(unsigned long, error) - __field(unsigned int, flags) - __field(unsigned int, fmode) + __field(unsigned long, flags) + __field(unsigned long, fmode) __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) @@ -812,7 +520,7 @@ DECLARE_EVENT_CLASS(nfs4_open_event, __entry->error = -error; __entry->flags = flags; - __entry->fmode = (__force unsigned int)ctx->mode; + __entry->fmode = (__force unsigned long)ctx->mode; __entry->dev = ctx->dentry->d_sb->s_dev; if (!IS_ERR_OR_NULL(state)) { inode = state->inode; @@ -842,15 +550,15 @@ DECLARE_EVENT_CLASS(nfs4_open_event, ), TP_printk( - "error=%ld (%s) flags=%d (%s) fmode=%s " + "error=%ld (%s) flags=%lu (%s) fmode=%s " "fileid=%02x:%02x:%llu fhandle=0x%08x " "name=%02x:%02x:%llu/%s stateid=%d:0x%08x " "openstateid=%d:0x%08x", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), __entry->flags, - show_open_flags(__entry->flags), - show_fmode_flags(__entry->fmode), + show_fs_fcntl_open_flags(__entry->flags), + show_fs_fmode_flags(__entry->fmode), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, @@ -904,7 +612,7 @@ TRACE_EVENT(nfs4_cached_open, TP_printk( "fmode=%s fileid=%02x:%02x:%llu " "fhandle=0x%08x stateid=%d:0x%08x", - __entry->fmode ? show_fmode_flags(__entry->fmode) : + __entry->fmode ? show_fs_fmode_flags(__entry->fmode) : "closed", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, @@ -951,8 +659,8 @@ TRACE_EVENT(nfs4_close, "error=%ld (%s) fmode=%s fileid=%02x:%02x:%llu " "fhandle=0x%08x openstateid=%d:0x%08x", -__entry->error, - show_nfsv4_errors(__entry->error), - __entry->fmode ? show_fmode_flags(__entry->fmode) : + show_nfs4_status(__entry->error), + __entry->fmode ? show_fs_fmode_flags(__entry->fmode) : "closed", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, @@ -961,24 +669,6 @@ TRACE_EVENT(nfs4_close, ) ); -TRACE_DEFINE_ENUM(F_GETLK); -TRACE_DEFINE_ENUM(F_SETLK); -TRACE_DEFINE_ENUM(F_SETLKW); -TRACE_DEFINE_ENUM(F_RDLCK); -TRACE_DEFINE_ENUM(F_WRLCK); -TRACE_DEFINE_ENUM(F_UNLCK); - -#define show_lock_cmd(type) \ - __print_symbolic((int)type, \ - { F_GETLK, "GETLK" }, \ - { F_SETLK, "SETLK" }, \ - { F_SETLKW, "SETLKW" }) -#define show_lock_type(type) \ - __print_symbolic((int)type, \ - { F_RDLCK, "RDLCK" }, \ - { F_WRLCK, "WRLCK" }, \ - { F_UNLCK, "UNLCK" }) - DECLARE_EVENT_CLASS(nfs4_lock_event, TP_PROTO( const struct file_lock *request, @@ -991,8 +681,8 @@ DECLARE_EVENT_CLASS(nfs4_lock_event, TP_STRUCT__entry( __field(unsigned long, error) - __field(int, cmd) - __field(char, type) + __field(unsigned long, cmd) + __field(unsigned long, type) __field(loff_t, start) __field(loff_t, end) __field(dev_t, dev) @@ -1024,9 +714,9 @@ DECLARE_EVENT_CLASS(nfs4_lock_event, "fileid=%02x:%02x:%llu fhandle=0x%08x " "stateid=%d:0x%08x", -__entry->error, - show_nfsv4_errors(__entry->error), - show_lock_cmd(__entry->cmd), - show_lock_type(__entry->type), + show_nfs4_status(__entry->error), + show_fs_fcntl_cmd(__entry->cmd), + show_fs_fcntl_lock_type(__entry->type), (long long)__entry->start, (long long)__entry->end, MAJOR(__entry->dev), MINOR(__entry->dev), @@ -1061,8 +751,8 @@ TRACE_EVENT(nfs4_set_lock, TP_STRUCT__entry( __field(unsigned long, error) - __field(int, cmd) - __field(char, type) + __field(unsigned long, cmd) + __field(unsigned long, type) __field(loff_t, start) __field(loff_t, end) __field(dev_t, dev) @@ -1100,9 +790,9 @@ TRACE_EVENT(nfs4_set_lock, "fileid=%02x:%02x:%llu fhandle=0x%08x " "stateid=%d:0x%08x lockstateid=%d:0x%08x", -__entry->error, - show_nfsv4_errors(__entry->error), - show_lock_cmd(__entry->cmd), - show_lock_type(__entry->type), + show_nfs4_status(__entry->error), + show_fs_fcntl_cmd(__entry->cmd), + show_fs_fcntl_lock_type(__entry->type), (long long)__entry->start, (long long)__entry->end, MAJOR(__entry->dev), MINOR(__entry->dev), @@ -1219,7 +909,7 @@ DECLARE_EVENT_CLASS(nfs4_set_delegation_event, TP_printk( "fmode=%s fileid=%02x:%02x:%llu fhandle=0x%08x", - show_fmode_flags(__entry->fmode), + show_fs_fmode_flags(__entry->fmode), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle @@ -1266,7 +956,7 @@ TRACE_EVENT(nfs4_delegreturn_exit, "error=%ld (%s) dev=%02x:%02x fhandle=0x%08x " "stateid=%d:0x%08x", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), __entry->fhandle, __entry->stateid_seq, __entry->stateid_hash @@ -1309,7 +999,7 @@ DECLARE_EVENT_CLASS(nfs4_test_stateid_event, "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "stateid=%d:0x%08x", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, @@ -1356,7 +1046,7 @@ DECLARE_EVENT_CLASS(nfs4_lookup_event, TP_printk( "error=%ld (%s) name=%02x:%02x:%llu/%s", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) @@ -1403,7 +1093,7 @@ TRACE_EVENT(nfs4_lookupp, TP_printk( "error=%ld (%s) inode=%02x:%02x:%llu", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->ino ) @@ -1442,7 +1132,7 @@ TRACE_EVENT(nfs4_rename, "error=%ld (%s) oldname=%02x:%02x:%llu/%s " "newname=%02x:%02x:%llu/%s", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->olddir, __get_str(oldname), @@ -1477,7 +1167,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_event, TP_printk( "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle @@ -1535,7 +1225,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_event, "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "stateid=%d:0x%08x", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, @@ -1588,7 +1278,7 @@ DECLARE_EVENT_CLASS(nfs4_getattr_event, "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "valid=%s", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, @@ -1644,7 +1334,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_callback_event, "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "dstaddr=%s", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, @@ -1705,7 +1395,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_callback_event, "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "stateid=%d:0x%08x dstaddr=%s", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, @@ -1754,7 +1444,7 @@ DECLARE_EVENT_CLASS(nfs4_idmap_event, TP_printk( "error=%ld (%s) id=%u name=%s", - -__entry->error, show_nfsv4_errors(__entry->error), + -__entry->error, show_nfs4_status(__entry->error), __entry->id, __get_str(name) ) @@ -1832,7 +1522,7 @@ DECLARE_EVENT_CLASS(nfs4_read_event, "offset=%lld count=%u res=%u stateid=%d:0x%08x " "layoutstateid=%d:0x%08x", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, @@ -1906,7 +1596,7 @@ DECLARE_EVENT_CLASS(nfs4_write_event, "offset=%lld count=%u res=%u stateid=%d:0x%08x " "layoutstateid=%d:0x%08x", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, @@ -1970,7 +1660,7 @@ DECLARE_EVENT_CLASS(nfs4_commit_event, "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "offset=%lld count=%u layoutstateid=%d:0x%08x", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, @@ -1990,16 +1680,6 @@ DEFINE_NFS4_COMMIT_EVENT(nfs4_commit); #ifdef CONFIG_NFS_V4_1 DEFINE_NFS4_COMMIT_EVENT(nfs4_pnfs_commit_ds); -TRACE_DEFINE_ENUM(IOMODE_READ); -TRACE_DEFINE_ENUM(IOMODE_RW); -TRACE_DEFINE_ENUM(IOMODE_ANY); - -#define show_pnfs_iomode(iomode) \ - __print_symbolic(iomode, \ - { IOMODE_READ, "READ" }, \ - { IOMODE_RW, "RW" }, \ - { IOMODE_ANY, "ANY" }) - TRACE_EVENT(nfs4_layoutget, TP_PROTO( const struct nfs_open_context *ctx, @@ -2055,11 +1735,11 @@ TRACE_EVENT(nfs4_layoutget, "iomode=%s offset=%llu count=%llu stateid=%d:0x%08x " "layoutstateid=%d:0x%08x", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, - show_pnfs_iomode(__entry->iomode), + show_pnfs_layout_iomode(__entry->iomode), (unsigned long long)__entry->offset, (unsigned long long)__entry->count, __entry->stateid_seq, __entry->stateid_hash, @@ -2153,7 +1833,7 @@ TRACE_EVENT(pnfs_update_layout, MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, - show_pnfs_iomode(__entry->iomode), + show_pnfs_layout_iomode(__entry->iomode), (unsigned long long)__entry->pos, (unsigned long long)__entry->count, __entry->layoutstateid_seq, __entry->layoutstateid_hash, @@ -2207,7 +1887,7 @@ DECLARE_EVENT_CLASS(pnfs_layout_event, MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, - show_pnfs_iomode(__entry->iomode), + show_pnfs_layout_iomode(__entry->iomode), (unsigned long long)__entry->pos, (unsigned long long)__entry->count, __entry->layoutstateid_seq, __entry->layoutstateid_hash, @@ -2352,7 +2032,7 @@ DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event, "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "offset=%llu count=%u stateid=%d:0x%08x dstaddr=%s", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, @@ -2408,7 +2088,7 @@ TRACE_EVENT(ff_layout_commit_error, "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "offset=%llu count=%u dstaddr=%s", -__entry->error, - show_nfsv4_errors(__entry->error), + show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, @@ -2417,6 +2097,406 @@ TRACE_EVENT(ff_layout_commit_error, ) ); +TRACE_DEFINE_ENUM(NFS4_CONTENT_DATA); +TRACE_DEFINE_ENUM(NFS4_CONTENT_HOLE); + +#define show_llseek_mode(what) \ + __print_symbolic(what, \ + { NFS4_CONTENT_DATA, "DATA" }, \ + { NFS4_CONTENT_HOLE, "HOLE" }) + +#ifdef CONFIG_NFS_V4_2 +TRACE_EVENT(nfs4_llseek, + TP_PROTO( + const struct inode *inode, + const struct nfs42_seek_args *args, + const struct nfs42_seek_res *res, + int error + ), + + TP_ARGS(inode, args, res, error), + + TP_STRUCT__entry( + __field(unsigned long, error) + __field(u32, fhandle) + __field(u32, fileid) + __field(dev_t, dev) + __field(int, stateid_seq) + __field(u32, stateid_hash) + __field(loff_t, offset_s) + __field(u32, what) + __field(loff_t, offset_r) + __field(u32, eof) + ), + + TP_fast_assign( + const struct nfs_inode *nfsi = NFS_I(inode); + const struct nfs_fh *fh = args->sa_fh; + + __entry->fileid = nfsi->fileid; + __entry->dev = inode->i_sb->s_dev; + __entry->fhandle = nfs_fhandle_hash(fh); + __entry->offset_s = args->sa_offset; + __entry->stateid_seq = + be32_to_cpu(args->sa_stateid.seqid); + __entry->stateid_hash = + nfs_stateid_hash(&args->sa_stateid); + __entry->what = args->sa_what; + if (error) { + __entry->error = -error; + __entry->offset_r = 0; + __entry->eof = 0; + } else { + __entry->error = 0; + __entry->offset_r = res->sr_offset; + __entry->eof = res->sr_eof; + } + ), + + TP_printk( + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " + "stateid=%d:0x%08x offset_s=%llu what=%s " + "offset_r=%llu eof=%u", + -__entry->error, + show_nfs4_status(__entry->error), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, + __entry->stateid_seq, __entry->stateid_hash, + __entry->offset_s, + show_llseek_mode(__entry->what), + __entry->offset_r, + __entry->eof + ) +); + +DECLARE_EVENT_CLASS(nfs4_sparse_event, + TP_PROTO( + const struct inode *inode, + const struct nfs42_falloc_args *args, + int error + ), + + TP_ARGS(inode, args, error), + + TP_STRUCT__entry( + __field(unsigned long, error) + __field(loff_t, offset) + __field(loff_t, len) + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + __field(int, stateid_seq) + __field(u32, stateid_hash) + ), + + TP_fast_assign( + __entry->error = error < 0 ? -error : 0; + __entry->offset = args->falloc_offset; + __entry->len = args->falloc_length; + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = NFS_FILEID(inode); + __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); + __entry->stateid_seq = + be32_to_cpu(args->falloc_stateid.seqid); + __entry->stateid_hash = + nfs_stateid_hash(&args->falloc_stateid); + ), + + TP_printk( + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " + "stateid=%d:0x%08x offset=%llu len=%llu", + -__entry->error, + show_nfs4_status(__entry->error), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, + __entry->stateid_seq, __entry->stateid_hash, + (long long)__entry->offset, + (long long)__entry->len + ) +); +#define DEFINE_NFS4_SPARSE_EVENT(name) \ + DEFINE_EVENT(nfs4_sparse_event, name, \ + TP_PROTO( \ + const struct inode *inode, \ + const struct nfs42_falloc_args *args, \ + int error \ + ), \ + TP_ARGS(inode, args, error)) +DEFINE_NFS4_SPARSE_EVENT(nfs4_fallocate); +DEFINE_NFS4_SPARSE_EVENT(nfs4_deallocate); + +TRACE_EVENT(nfs4_copy, + TP_PROTO( + const struct inode *src_inode, + const struct inode *dst_inode, + const struct nfs42_copy_args *args, + const struct nfs42_copy_res *res, + const struct nl4_server *nss, + int error + ), + + TP_ARGS(src_inode, dst_inode, args, res, nss, error), + + TP_STRUCT__entry( + __field(unsigned long, error) + __field(u32, src_fhandle) + __field(u32, src_fileid) + __field(u32, dst_fhandle) + __field(u32, dst_fileid) + __field(dev_t, src_dev) + __field(dev_t, dst_dev) + __field(int, src_stateid_seq) + __field(u32, src_stateid_hash) + __field(int, dst_stateid_seq) + __field(u32, dst_stateid_hash) + __field(loff_t, src_offset) + __field(loff_t, dst_offset) + __field(bool, sync) + __field(loff_t, len) + __field(int, res_stateid_seq) + __field(u32, res_stateid_hash) + __field(loff_t, res_count) + __field(bool, res_sync) + __field(bool, res_cons) + __field(bool, intra) + ), + + TP_fast_assign( + const struct nfs_inode *src_nfsi = NFS_I(src_inode); + const struct nfs_inode *dst_nfsi = NFS_I(dst_inode); + + __entry->src_fileid = src_nfsi->fileid; + __entry->src_dev = src_inode->i_sb->s_dev; + __entry->src_fhandle = nfs_fhandle_hash(args->src_fh); + __entry->src_offset = args->src_pos; + __entry->dst_fileid = dst_nfsi->fileid; + __entry->dst_dev = dst_inode->i_sb->s_dev; + __entry->dst_fhandle = nfs_fhandle_hash(args->dst_fh); + __entry->dst_offset = args->dst_pos; + __entry->len = args->count; + __entry->sync = args->sync; + __entry->src_stateid_seq = + be32_to_cpu(args->src_stateid.seqid); + __entry->src_stateid_hash = + nfs_stateid_hash(&args->src_stateid); + __entry->dst_stateid_seq = + be32_to_cpu(args->dst_stateid.seqid); + __entry->dst_stateid_hash = + nfs_stateid_hash(&args->dst_stateid); + __entry->intra = nss ? 0 : 1; + if (error) { + __entry->error = -error; + __entry->res_stateid_seq = 0; + __entry->res_stateid_hash = 0; + __entry->res_count = 0; + __entry->res_sync = 0; + __entry->res_cons = 0; + } else { + __entry->error = 0; + __entry->res_stateid_seq = + be32_to_cpu(res->write_res.stateid.seqid); + __entry->res_stateid_hash = + nfs_stateid_hash(&res->write_res.stateid); + __entry->res_count = res->write_res.count; + __entry->res_sync = res->synchronous; + __entry->res_cons = res->consecutive; + } + ), + + TP_printk( + "error=%ld (%s) intra=%d src_fileid=%02x:%02x:%llu " + "src_fhandle=0x%08x dst_fileid=%02x:%02x:%llu " + "dst_fhandle=0x%08x src_stateid=%d:0x%08x " + "dst_stateid=%d:0x%08x src_offset=%llu dst_offset=%llu " + "len=%llu sync=%d cb_stateid=%d:0x%08x res_sync=%d " + "res_cons=%d res_count=%llu", + -__entry->error, + show_nfs4_status(__entry->error), + __entry->intra, + MAJOR(__entry->src_dev), MINOR(__entry->src_dev), + (unsigned long long)__entry->src_fileid, + __entry->src_fhandle, + MAJOR(__entry->dst_dev), MINOR(__entry->dst_dev), + (unsigned long long)__entry->dst_fileid, + __entry->dst_fhandle, + __entry->src_stateid_seq, __entry->src_stateid_hash, + __entry->dst_stateid_seq, __entry->dst_stateid_hash, + __entry->src_offset, + __entry->dst_offset, + __entry->len, + __entry->sync, + __entry->res_stateid_seq, __entry->res_stateid_hash, + __entry->res_sync, + __entry->res_cons, + __entry->res_count + ) +); + +TRACE_EVENT(nfs4_clone, + TP_PROTO( + const struct inode *src_inode, + const struct inode *dst_inode, + const struct nfs42_clone_args *args, + int error + ), + + TP_ARGS(src_inode, dst_inode, args, error), + + TP_STRUCT__entry( + __field(unsigned long, error) + __field(u32, src_fhandle) + __field(u32, src_fileid) + __field(u32, dst_fhandle) + __field(u32, dst_fileid) + __field(dev_t, src_dev) + __field(dev_t, dst_dev) + __field(loff_t, src_offset) + __field(loff_t, dst_offset) + __field(int, src_stateid_seq) + __field(u32, src_stateid_hash) + __field(int, dst_stateid_seq) + __field(u32, dst_stateid_hash) + __field(loff_t, len) + ), + + TP_fast_assign( + const struct nfs_inode *src_nfsi = NFS_I(src_inode); + const struct nfs_inode *dst_nfsi = NFS_I(dst_inode); + + __entry->src_fileid = src_nfsi->fileid; + __entry->src_dev = src_inode->i_sb->s_dev; + __entry->src_fhandle = nfs_fhandle_hash(args->src_fh); + __entry->src_offset = args->src_offset; + __entry->dst_fileid = dst_nfsi->fileid; + __entry->dst_dev = dst_inode->i_sb->s_dev; + __entry->dst_fhandle = nfs_fhandle_hash(args->dst_fh); + __entry->dst_offset = args->dst_offset; + __entry->len = args->count; + __entry->error = error < 0 ? -error : 0; + __entry->src_stateid_seq = + be32_to_cpu(args->src_stateid.seqid); + __entry->src_stateid_hash = + nfs_stateid_hash(&args->src_stateid); + __entry->dst_stateid_seq = + be32_to_cpu(args->dst_stateid.seqid); + __entry->dst_stateid_hash = + nfs_stateid_hash(&args->dst_stateid); + ), + + TP_printk( + "error=%ld (%s) src_fileid=%02x:%02x:%llu " + "src_fhandle=0x%08x dst_fileid=%02x:%02x:%llu " + "dst_fhandle=0x%08x src_stateid=%d:0x%08x " + "dst_stateid=%d:0x%08x src_offset=%llu " + "dst_offset=%llu len=%llu", + -__entry->error, + show_nfs4_status(__entry->error), + MAJOR(__entry->src_dev), MINOR(__entry->src_dev), + (unsigned long long)__entry->src_fileid, + __entry->src_fhandle, + MAJOR(__entry->dst_dev), MINOR(__entry->dst_dev), + (unsigned long long)__entry->dst_fileid, + __entry->dst_fhandle, + __entry->src_stateid_seq, __entry->src_stateid_hash, + __entry->dst_stateid_seq, __entry->dst_stateid_hash, + __entry->src_offset, + __entry->dst_offset, + __entry->len + ) +); + +TRACE_EVENT(nfs4_copy_notify, + TP_PROTO( + const struct inode *inode, + const struct nfs42_copy_notify_args *args, + const struct nfs42_copy_notify_res *res, + int error + ), + + TP_ARGS(inode, args, res, error), + + TP_STRUCT__entry( + __field(unsigned long, error) + __field(u32, fhandle) + __field(u32, fileid) + __field(dev_t, dev) + __field(int, stateid_seq) + __field(u32, stateid_hash) + __field(int, res_stateid_seq) + __field(u32, res_stateid_hash) + ), + + TP_fast_assign( + const struct nfs_inode *nfsi = NFS_I(inode); + + __entry->fileid = nfsi->fileid; + __entry->dev = inode->i_sb->s_dev; + __entry->fhandle = nfs_fhandle_hash(args->cna_src_fh); + __entry->stateid_seq = + be32_to_cpu(args->cna_src_stateid.seqid); + __entry->stateid_hash = + nfs_stateid_hash(&args->cna_src_stateid); + if (error) { + __entry->error = -error; + __entry->res_stateid_seq = 0; + __entry->res_stateid_hash = 0; + } else { + __entry->error = 0; + __entry->res_stateid_seq = + be32_to_cpu(res->cnr_stateid.seqid); + __entry->res_stateid_hash = + nfs_stateid_hash(&res->cnr_stateid); + } + ), + + TP_printk( + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " + "stateid=%d:0x%08x res_stateid=%d:0x%08x", + -__entry->error, + show_nfs4_status(__entry->error), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, + __entry->stateid_seq, __entry->stateid_hash, + __entry->res_stateid_seq, __entry->res_stateid_hash + ) +); + +TRACE_EVENT(nfs4_offload_cancel, + TP_PROTO( + const struct nfs42_offload_status_args *args, + int error + ), + + TP_ARGS(args, error), + + TP_STRUCT__entry( + __field(unsigned long, error) + __field(u32, fhandle) + __field(int, stateid_seq) + __field(u32, stateid_hash) + ), + + TP_fast_assign( + __entry->fhandle = nfs_fhandle_hash(args->osa_src_fh); + __entry->error = error < 0 ? -error : 0; + __entry->stateid_seq = + be32_to_cpu(args->osa_stateid.seqid); + __entry->stateid_hash = + nfs_stateid_hash(&args->osa_stateid); + ), + + TP_printk( + "error=%ld (%s) fhandle=0x%08x stateid=%d:0x%08x", + -__entry->error, + show_nfs4_status(__entry->error), + __entry->fhandle, + __entry->stateid_seq, __entry->stateid_hash + ) +); +#endif /* CONFIG_NFS_V4_2 */ #endif /* CONFIG_NFS_V4_1 */ diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index a8cff19c6f00..69862bf6db00 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -3168,20 +3168,23 @@ static int decode_opaque_inline(struct xdr_stream *xdr, unsigned int *len, char static int decode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr) { - __be32 *p; + ssize_t ret; + void *ptr; + u32 tmp; - p = xdr_inline_decode(xdr, 8); - if (unlikely(!p)) + if (xdr_stream_decode_u32(xdr, &tmp) < 0) return -EIO; - hdr->status = be32_to_cpup(p++); - hdr->taglen = be32_to_cpup(p); + hdr->status = tmp; - p = xdr_inline_decode(xdr, hdr->taglen + 4); - if (unlikely(!p)) + ret = xdr_stream_decode_opaque_inline(xdr, &ptr, NFS4_OPAQUE_LIMIT); + if (ret < 0) + return -EIO; + hdr->taglen = ret; + hdr->tag = ptr; + + if (xdr_stream_decode_u32(xdr, &tmp) < 0) return -EIO; - hdr->tag = (char *)p; - p += XDR_QUADLEN(hdr->taglen); - hdr->nops = be32_to_cpup(p); + hdr->nops = tmp; if (unlikely(hdr->nops < 1)) return nfs4_stat_to_errno(hdr->status); return 0; @@ -4582,8 +4585,7 @@ static int decode_attr_mdsthreshold(struct xdr_stream *xdr, static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fattr *fattr, struct nfs_fh *fh, - struct nfs4_fs_locations *fs_loc, struct nfs4_label *label, - const struct nfs_server *server) + struct nfs4_fs_locations *fs_loc, const struct nfs_server *server) { int status; umode_t fmode = 0; @@ -4698,8 +4700,8 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, if (status < 0) goto xdr_error; - if (label) { - status = decode_attr_security_label(xdr, bitmap, label); + if (fattr->label) { + status = decode_attr_security_label(xdr, bitmap, fattr->label); if (status < 0) goto xdr_error; fattr->valid |= status; @@ -4712,7 +4714,7 @@ xdr_error: static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fattr, struct nfs_fh *fh, struct nfs4_fs_locations *fs_loc, - struct nfs4_label *label, const struct nfs_server *server) + const struct nfs_server *server) { unsigned int savep; uint32_t attrlen, @@ -4731,8 +4733,7 @@ static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fat if (status < 0) goto xdr_error; - status = decode_getfattr_attrs(xdr, bitmap, fattr, fh, fs_loc, - label, server); + status = decode_getfattr_attrs(xdr, bitmap, fattr, fh, fs_loc, server); if (status < 0) goto xdr_error; @@ -4742,16 +4743,10 @@ xdr_error: return status; } -static int decode_getfattr_label(struct xdr_stream *xdr, struct nfs_fattr *fattr, - struct nfs4_label *label, const struct nfs_server *server) -{ - return decode_getfattr_generic(xdr, fattr, NULL, NULL, label, server); -} - static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, const struct nfs_server *server) { - return decode_getfattr_generic(xdr, fattr, NULL, NULL, NULL, server); + return decode_getfattr_generic(xdr, fattr, NULL, NULL, server); } /* @@ -5572,20 +5567,9 @@ static int decode_secinfo_no_name(struct xdr_stream *xdr, struct nfs4_secinfo_re static int decode_op_map(struct xdr_stream *xdr, struct nfs4_op_map *op_map) { - __be32 *p; - uint32_t bitmap_words; - unsigned int i; - - p = xdr_inline_decode(xdr, 4); - if (!p) - return -EIO; - bitmap_words = be32_to_cpup(p++); - if (bitmap_words > NFS4_OP_MAP_NUM_WORDS) + if (xdr_stream_decode_uint32_array(xdr, op_map->u.words, + ARRAY_SIZE(op_map->u.words)) < 0) return -EIO; - p = xdr_inline_decode(xdr, 4 * bitmap_words); - for (i = 0; i < bitmap_words; i++) - op_map->u.words[i] = be32_to_cpup(p++); - return 0; } @@ -6179,7 +6163,7 @@ static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_getfh(xdr, res->fh); if (status) goto out; - status = decode_getfattr_label(xdr, res->fattr, res->label, res->server); + status = decode_getfattr(xdr, res->fattr, res->server); out: return status; } @@ -6209,7 +6193,7 @@ static int nfs4_xdr_dec_lookupp(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_getfh(xdr, res->fh); if (status) goto out; - status = decode_getfattr_label(xdr, res->fattr, res->label, res->server); + status = decode_getfattr(xdr, res->fattr, res->server); out: return status; } @@ -6236,8 +6220,7 @@ static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp, goto out; status = decode_getfh(xdr, res->fh); if (status == 0) - status = decode_getfattr_label(xdr, res->fattr, - res->label, res->server); + status = decode_getfattr(xdr, res->fattr, res->server); out: return status; } @@ -6331,7 +6314,7 @@ static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_restorefh(xdr); if (status) goto out; - decode_getfattr_label(xdr, res->fattr, res->label, res->server); + decode_getfattr(xdr, res->fattr, res->server); out: return status; } @@ -6361,7 +6344,7 @@ static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_getfh(xdr, res->fh); if (status) goto out; - decode_getfattr_label(xdr, res->fattr, res->label, res->server); + decode_getfattr(xdr, res->fattr, res->server); out: return status; } @@ -6394,7 +6377,7 @@ static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_putfh(xdr); if (status) goto out; - status = decode_getfattr_label(xdr, res->fattr, res->label, res->server); + status = decode_getfattr(xdr, res->fattr, res->server); out: return status; } @@ -6532,7 +6515,7 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr, goto out; if (res->access_request) decode_access(xdr, &res->access_supported, &res->access_result); - decode_getfattr_label(xdr, res->f_attr, res->f_label, res->server); + decode_getfattr(xdr, res->f_attr, res->server); if (res->lg_res) decode_layoutget(xdr, rqstp, res->lg_res); out: @@ -6616,7 +6599,7 @@ static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp, status = decode_setattr(xdr); if (status) goto out; - decode_getfattr_label(xdr, res->fattr, res->label, res->server); + decode_getfattr(xdr, res->fattr, res->server); out: return status; } @@ -7031,7 +7014,7 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, status = decode_getfattr_generic(xdr, &res->fs_locations->fattr, NULL, res->fs_locations, - NULL, res->fs_locations->server); + res->fs_locations->server); if (status) goto out; if (res->renew) @@ -7044,7 +7027,7 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, status = decode_getfattr_generic(xdr, &res->fs_locations->fattr, NULL, res->fs_locations, - NULL, res->fs_locations->server); + res->fs_locations->server); } out: return status; @@ -7475,7 +7458,7 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, return -EAGAIN; if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh, - NULL, entry->label, entry->server) < 0) + NULL, entry->server) < 0) return -EAGAIN; if (entry->fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) entry->ino = entry->fattr->mounted_on_fileid; diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 8a224871be74..21dac847f1e4 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -11,45 +11,9 @@ #include <linux/tracepoint.h> #include <linux/iversion.h> -TRACE_DEFINE_ENUM(DT_UNKNOWN); -TRACE_DEFINE_ENUM(DT_FIFO); -TRACE_DEFINE_ENUM(DT_CHR); -TRACE_DEFINE_ENUM(DT_DIR); -TRACE_DEFINE_ENUM(DT_BLK); -TRACE_DEFINE_ENUM(DT_REG); -TRACE_DEFINE_ENUM(DT_LNK); -TRACE_DEFINE_ENUM(DT_SOCK); -TRACE_DEFINE_ENUM(DT_WHT); - -#define nfs_show_file_type(ftype) \ - __print_symbolic(ftype, \ - { DT_UNKNOWN, "UNKNOWN" }, \ - { DT_FIFO, "FIFO" }, \ - { DT_CHR, "CHR" }, \ - { DT_DIR, "DIR" }, \ - { DT_BLK, "BLK" }, \ - { DT_REG, "REG" }, \ - { DT_LNK, "LNK" }, \ - { DT_SOCK, "SOCK" }, \ - { DT_WHT, "WHT" }) - -TRACE_DEFINE_ENUM(NFS_INO_INVALID_DATA); -TRACE_DEFINE_ENUM(NFS_INO_INVALID_ATIME); -TRACE_DEFINE_ENUM(NFS_INO_INVALID_ACCESS); -TRACE_DEFINE_ENUM(NFS_INO_INVALID_ACL); -TRACE_DEFINE_ENUM(NFS_INO_REVAL_PAGECACHE); -TRACE_DEFINE_ENUM(NFS_INO_REVAL_FORCED); -TRACE_DEFINE_ENUM(NFS_INO_INVALID_LABEL); -TRACE_DEFINE_ENUM(NFS_INO_INVALID_CHANGE); -TRACE_DEFINE_ENUM(NFS_INO_INVALID_CTIME); -TRACE_DEFINE_ENUM(NFS_INO_INVALID_MTIME); -TRACE_DEFINE_ENUM(NFS_INO_INVALID_SIZE); -TRACE_DEFINE_ENUM(NFS_INO_INVALID_OTHER); -TRACE_DEFINE_ENUM(NFS_INO_DATA_INVAL_DEFER); -TRACE_DEFINE_ENUM(NFS_INO_INVALID_BLOCKS); -TRACE_DEFINE_ENUM(NFS_INO_INVALID_XATTR); -TRACE_DEFINE_ENUM(NFS_INO_INVALID_NLINK); -TRACE_DEFINE_ENUM(NFS_INO_INVALID_MODE); +#include <trace/events/fs.h> +#include <trace/events/nfs.h> +#include <trace/events/sunrpc_base.h> #define nfs_show_cache_validity(v) \ __print_flags(v, "|", \ @@ -71,17 +35,6 @@ TRACE_DEFINE_ENUM(NFS_INO_INVALID_MODE); { NFS_INO_INVALID_NLINK, "INVALID_NLINK" }, \ { NFS_INO_INVALID_MODE, "INVALID_MODE" }) -TRACE_DEFINE_ENUM(NFS_INO_ADVISE_RDPLUS); -TRACE_DEFINE_ENUM(NFS_INO_STALE); -TRACE_DEFINE_ENUM(NFS_INO_ACL_LRU_SET); -TRACE_DEFINE_ENUM(NFS_INO_INVALIDATING); -TRACE_DEFINE_ENUM(NFS_INO_FSCACHE); -TRACE_DEFINE_ENUM(NFS_INO_FSCACHE_LOCK); -TRACE_DEFINE_ENUM(NFS_INO_LAYOUTCOMMIT); -TRACE_DEFINE_ENUM(NFS_INO_LAYOUTCOMMITTING); -TRACE_DEFINE_ENUM(NFS_INO_LAYOUTSTATS); -TRACE_DEFINE_ENUM(NFS_INO_ODIRECT); - #define nfs_show_nfsi_flags(v) \ __print_flags(v, "|", \ { BIT(NFS_INO_ADVISE_RDPLUS), "ADVISE_RDPLUS" }, \ @@ -163,12 +116,12 @@ DECLARE_EVENT_CLASS(nfs_inode_event_done, "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "type=%u (%s) version=%llu size=%lld " "cache_validity=0x%lx (%s) nfs_flags=0x%lx (%s)", - -__entry->error, nfs_show_status(__entry->error), + -__entry->error, show_nfs_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, __entry->type, - nfs_show_file_type(__entry->type), + show_fs_dirent_type(__entry->type), (unsigned long long)__entry->version, (long long)__entry->size, __entry->cache_validity, @@ -254,12 +207,12 @@ TRACE_EVENT(nfs_access_exit, "type=%u (%s) version=%llu size=%lld " "cache_validity=0x%lx (%s) nfs_flags=0x%lx (%s) " "mask=0x%x permitted=0x%x", - -__entry->error, nfs_show_status(__entry->error), + -__entry->error, show_nfs_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, __entry->type, - nfs_show_file_type(__entry->type), + show_fs_dirent_type(__entry->type), (unsigned long long)__entry->version, (long long)__entry->size, __entry->cache_validity, @@ -270,33 +223,55 @@ TRACE_EVENT(nfs_access_exit, ) ); -TRACE_DEFINE_ENUM(LOOKUP_FOLLOW); -TRACE_DEFINE_ENUM(LOOKUP_DIRECTORY); -TRACE_DEFINE_ENUM(LOOKUP_AUTOMOUNT); -TRACE_DEFINE_ENUM(LOOKUP_PARENT); -TRACE_DEFINE_ENUM(LOOKUP_REVAL); -TRACE_DEFINE_ENUM(LOOKUP_RCU); -TRACE_DEFINE_ENUM(LOOKUP_OPEN); -TRACE_DEFINE_ENUM(LOOKUP_CREATE); -TRACE_DEFINE_ENUM(LOOKUP_EXCL); -TRACE_DEFINE_ENUM(LOOKUP_RENAME_TARGET); -TRACE_DEFINE_ENUM(LOOKUP_EMPTY); -TRACE_DEFINE_ENUM(LOOKUP_DOWN); - -#define show_lookup_flags(flags) \ - __print_flags(flags, "|", \ - { LOOKUP_FOLLOW, "FOLLOW" }, \ - { LOOKUP_DIRECTORY, "DIRECTORY" }, \ - { LOOKUP_AUTOMOUNT, "AUTOMOUNT" }, \ - { LOOKUP_PARENT, "PARENT" }, \ - { LOOKUP_REVAL, "REVAL" }, \ - { LOOKUP_RCU, "RCU" }, \ - { LOOKUP_OPEN, "OPEN" }, \ - { LOOKUP_CREATE, "CREATE" }, \ - { LOOKUP_EXCL, "EXCL" }, \ - { LOOKUP_RENAME_TARGET, "RENAME_TARGET" }, \ - { LOOKUP_EMPTY, "EMPTY" }, \ - { LOOKUP_DOWN, "DOWN" }) +DECLARE_EVENT_CLASS(nfs_update_size_class, + TP_PROTO( + const struct inode *inode, + loff_t new_size + ), + + TP_ARGS(inode, new_size), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + __field(u64, version) + __field(loff_t, cur_size) + __field(loff_t, new_size) + ), + + TP_fast_assign( + const struct nfs_inode *nfsi = NFS_I(inode); + + __entry->dev = inode->i_sb->s_dev; + __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); + __entry->fileid = nfsi->fileid; + __entry->version = inode_peek_iversion_raw(inode); + __entry->cur_size = i_size_read(inode); + __entry->new_size = new_size; + ), + + TP_printk( + "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu cursize=%lld newsize=%lld", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, __entry->version, + __entry->cur_size, __entry->new_size + ) +); + +#define DEFINE_NFS_UPDATE_SIZE_EVENT(name) \ + DEFINE_EVENT(nfs_update_size_class, nfs_size_##name, \ + TP_PROTO( \ + const struct inode *inode, \ + loff_t new_size \ + ), \ + TP_ARGS(inode, new_size)) + +DEFINE_NFS_UPDATE_SIZE_EVENT(truncate); +DEFINE_NFS_UPDATE_SIZE_EVENT(wcc); +DEFINE_NFS_UPDATE_SIZE_EVENT(update); +DEFINE_NFS_UPDATE_SIZE_EVENT(grow); DECLARE_EVENT_CLASS(nfs_lookup_event, TP_PROTO( @@ -324,7 +299,7 @@ DECLARE_EVENT_CLASS(nfs_lookup_event, TP_printk( "flags=0x%lx (%s) name=%02x:%02x:%llu/%s", __entry->flags, - show_lookup_flags(__entry->flags), + show_fs_lookup_flags(__entry->flags), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) @@ -368,9 +343,9 @@ DECLARE_EVENT_CLASS(nfs_lookup_event_done, TP_printk( "error=%ld (%s) flags=0x%lx (%s) name=%02x:%02x:%llu/%s", - -__entry->error, nfs_show_status(__entry->error), + -__entry->error, show_nfs_status(__entry->error), __entry->flags, - show_lookup_flags(__entry->flags), + show_fs_lookup_flags(__entry->flags), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) @@ -392,46 +367,6 @@ DEFINE_NFS_LOOKUP_EVENT_DONE(nfs_lookup_exit); DEFINE_NFS_LOOKUP_EVENT(nfs_lookup_revalidate_enter); DEFINE_NFS_LOOKUP_EVENT_DONE(nfs_lookup_revalidate_exit); -TRACE_DEFINE_ENUM(O_WRONLY); -TRACE_DEFINE_ENUM(O_RDWR); -TRACE_DEFINE_ENUM(O_CREAT); -TRACE_DEFINE_ENUM(O_EXCL); -TRACE_DEFINE_ENUM(O_NOCTTY); -TRACE_DEFINE_ENUM(O_TRUNC); -TRACE_DEFINE_ENUM(O_APPEND); -TRACE_DEFINE_ENUM(O_NONBLOCK); -TRACE_DEFINE_ENUM(O_DSYNC); -TRACE_DEFINE_ENUM(O_DIRECT); -TRACE_DEFINE_ENUM(O_LARGEFILE); -TRACE_DEFINE_ENUM(O_DIRECTORY); -TRACE_DEFINE_ENUM(O_NOFOLLOW); -TRACE_DEFINE_ENUM(O_NOATIME); -TRACE_DEFINE_ENUM(O_CLOEXEC); - -#define show_open_flags(flags) \ - __print_flags(flags, "|", \ - { O_WRONLY, "O_WRONLY" }, \ - { O_RDWR, "O_RDWR" }, \ - { O_CREAT, "O_CREAT" }, \ - { O_EXCL, "O_EXCL" }, \ - { O_NOCTTY, "O_NOCTTY" }, \ - { O_TRUNC, "O_TRUNC" }, \ - { O_APPEND, "O_APPEND" }, \ - { O_NONBLOCK, "O_NONBLOCK" }, \ - { O_DSYNC, "O_DSYNC" }, \ - { O_DIRECT, "O_DIRECT" }, \ - { O_LARGEFILE, "O_LARGEFILE" }, \ - { O_DIRECTORY, "O_DIRECTORY" }, \ - { O_NOFOLLOW, "O_NOFOLLOW" }, \ - { O_NOATIME, "O_NOATIME" }, \ - { O_CLOEXEC, "O_CLOEXEC" }) - -#define show_fmode_flags(mode) \ - __print_flags(mode, "|", \ - { ((__force unsigned long)FMODE_READ), "READ" }, \ - { ((__force unsigned long)FMODE_WRITE), "WRITE" }, \ - { ((__force unsigned long)FMODE_EXEC), "EXEC" }) - TRACE_EVENT(nfs_atomic_open_enter, TP_PROTO( const struct inode *dir, @@ -443,7 +378,7 @@ TRACE_EVENT(nfs_atomic_open_enter, TP_STRUCT__entry( __field(unsigned long, flags) - __field(unsigned int, fmode) + __field(unsigned long, fmode) __field(dev_t, dev) __field(u64, dir) __string(name, ctx->dentry->d_name.name) @@ -453,15 +388,15 @@ TRACE_EVENT(nfs_atomic_open_enter, __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); __entry->flags = flags; - __entry->fmode = (__force unsigned int)ctx->mode; + __entry->fmode = (__force unsigned long)ctx->mode; __assign_str(name, ctx->dentry->d_name.name); ), TP_printk( "flags=0x%lx (%s) fmode=%s name=%02x:%02x:%llu/%s", __entry->flags, - show_open_flags(__entry->flags), - show_fmode_flags(__entry->fmode), + show_fs_fcntl_open_flags(__entry->flags), + show_fs_fmode_flags(__entry->fmode), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) @@ -481,7 +416,7 @@ TRACE_EVENT(nfs_atomic_open_exit, TP_STRUCT__entry( __field(unsigned long, error) __field(unsigned long, flags) - __field(unsigned int, fmode) + __field(unsigned long, fmode) __field(dev_t, dev) __field(u64, dir) __string(name, ctx->dentry->d_name.name) @@ -492,17 +427,17 @@ TRACE_EVENT(nfs_atomic_open_exit, __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); __entry->flags = flags; - __entry->fmode = (__force unsigned int)ctx->mode; + __entry->fmode = (__force unsigned long)ctx->mode; __assign_str(name, ctx->dentry->d_name.name); ), TP_printk( "error=%ld (%s) flags=0x%lx (%s) fmode=%s " "name=%02x:%02x:%llu/%s", - -__entry->error, nfs_show_status(__entry->error), + -__entry->error, show_nfs_status(__entry->error), __entry->flags, - show_open_flags(__entry->flags), - show_fmode_flags(__entry->fmode), + show_fs_fcntl_open_flags(__entry->flags), + show_fs_fmode_flags(__entry->fmode), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) @@ -535,7 +470,7 @@ TRACE_EVENT(nfs_create_enter, TP_printk( "flags=0x%lx (%s) name=%02x:%02x:%llu/%s", __entry->flags, - show_open_flags(__entry->flags), + show_fs_fcntl_open_flags(__entry->flags), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) @@ -570,9 +505,9 @@ TRACE_EVENT(nfs_create_exit, TP_printk( "error=%ld (%s) flags=0x%lx (%s) name=%02x:%02x:%llu/%s", - -__entry->error, nfs_show_status(__entry->error), + -__entry->error, show_nfs_status(__entry->error), __entry->flags, - show_open_flags(__entry->flags), + show_fs_fcntl_open_flags(__entry->flags), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) @@ -640,7 +575,7 @@ DECLARE_EVENT_CLASS(nfs_directory_event_done, TP_printk( "error=%ld (%s) name=%02x:%02x:%llu/%s", - -__entry->error, nfs_show_status(__entry->error), + -__entry->error, show_nfs_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) @@ -730,7 +665,7 @@ TRACE_EVENT(nfs_link_exit, TP_printk( "error=%ld (%s) fileid=%02x:%02x:%llu name=%02x:%02x:%llu/%s", - -__entry->error, nfs_show_status(__entry->error), + -__entry->error, show_nfs_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), __entry->fileid, MAJOR(__entry->dev), MINOR(__entry->dev), @@ -817,7 +752,7 @@ DECLARE_EVENT_CLASS(nfs_rename_event_done, TP_printk( "error=%ld (%s) old_name=%02x:%02x:%llu/%s " "new_name=%02x:%02x:%llu/%s", - -__entry->error, nfs_show_status(__entry->error), + -__entry->error, show_nfs_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->old_dir, __get_str(old_name), @@ -871,13 +806,163 @@ TRACE_EVENT(nfs_sillyrename_unlink, TP_printk( "error=%ld (%s) name=%02x:%02x:%llu/%s", - -__entry->error, nfs_show_status(__entry->error), + -__entry->error, show_nfs_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) ) ); +TRACE_EVENT(nfs_aop_readpage, + TP_PROTO( + const struct inode *inode, + struct page *page + ), + + TP_ARGS(inode, page), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + __field(u64, version) + __field(loff_t, offset) + ), + + TP_fast_assign( + const struct nfs_inode *nfsi = NFS_I(inode); + + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = nfsi->fileid; + __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); + __entry->version = inode_peek_iversion_raw(inode); + __entry->offset = page_index(page) << PAGE_SHIFT; + ), + + TP_printk( + "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu offset=%lld", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, __entry->version, + __entry->offset + ) +); + +TRACE_EVENT(nfs_aop_readpage_done, + TP_PROTO( + const struct inode *inode, + struct page *page, + int ret + ), + + TP_ARGS(inode, page, ret), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u32, fhandle) + __field(int, ret) + __field(u64, fileid) + __field(u64, version) + __field(loff_t, offset) + ), + + TP_fast_assign( + const struct nfs_inode *nfsi = NFS_I(inode); + + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = nfsi->fileid; + __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); + __entry->version = inode_peek_iversion_raw(inode); + __entry->offset = page_index(page) << PAGE_SHIFT; + __entry->ret = ret; + ), + + TP_printk( + "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu offset=%lld ret=%d", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, __entry->version, + __entry->offset, __entry->ret + ) +); + +TRACE_EVENT(nfs_aop_readahead, + TP_PROTO( + const struct inode *inode, + struct page *page, + unsigned int nr_pages + ), + + TP_ARGS(inode, page, nr_pages), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + __field(u64, version) + __field(loff_t, offset) + __field(unsigned int, nr_pages) + ), + + TP_fast_assign( + const struct nfs_inode *nfsi = NFS_I(inode); + + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = nfsi->fileid; + __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); + __entry->version = inode_peek_iversion_raw(inode); + __entry->offset = page_index(page) << PAGE_SHIFT; + __entry->nr_pages = nr_pages; + ), + + TP_printk( + "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu offset=%lld nr_pages=%u", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, __entry->version, + __entry->offset, __entry->nr_pages + ) +); + +TRACE_EVENT(nfs_aop_readahead_done, + TP_PROTO( + const struct inode *inode, + unsigned int nr_pages, + int ret + ), + + TP_ARGS(inode, nr_pages, ret), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u32, fhandle) + __field(int, ret) + __field(u64, fileid) + __field(u64, version) + __field(loff_t, offset) + __field(unsigned int, nr_pages) + ), + + TP_fast_assign( + const struct nfs_inode *nfsi = NFS_I(inode); + + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = nfsi->fileid; + __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); + __entry->version = inode_peek_iversion_raw(inode); + __entry->nr_pages = nr_pages; + __entry->ret = ret; + ), + + TP_printk( + "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu nr_pages=%u ret=%d", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, __entry->version, + __entry->nr_pages, __entry->ret + ) +); + TRACE_EVENT(nfs_initiate_read, TP_PROTO( const struct nfs_pgio_header *hdr @@ -1054,16 +1139,6 @@ TRACE_EVENT(nfs_pgio_error, ) ); -TRACE_DEFINE_ENUM(NFS_UNSTABLE); -TRACE_DEFINE_ENUM(NFS_DATA_SYNC); -TRACE_DEFINE_ENUM(NFS_FILE_SYNC); - -#define nfs_show_stable(stable) \ - __print_symbolic(stable, \ - { NFS_UNSTABLE, "UNSTABLE" }, \ - { NFS_DATA_SYNC, "DATA_SYNC" }, \ - { NFS_FILE_SYNC, "FILE_SYNC" }) - TRACE_EVENT(nfs_initiate_write, TP_PROTO( const struct nfs_pgio_header *hdr @@ -1077,7 +1152,7 @@ TRACE_EVENT(nfs_initiate_write, __field(u64, fileid) __field(loff_t, offset) __field(u32, count) - __field(enum nfs3_stable_how, stable) + __field(unsigned long, stable) ), TP_fast_assign( @@ -1101,7 +1176,7 @@ TRACE_EVENT(nfs_initiate_write, (unsigned long long)__entry->fileid, __entry->fhandle, (long long)__entry->offset, __entry->count, - nfs_show_stable(__entry->stable) + show_nfs_stable_how(__entry->stable) ) ); @@ -1121,7 +1196,7 @@ TRACE_EVENT(nfs_writeback_done, __field(u32, arg_count) __field(u32, res_count) __field(int, status) - __field(enum nfs3_stable_how, stable) + __field(unsigned long, stable) __array(char, verifier, NFS4_VERIFIER_SIZE) ), @@ -1154,8 +1229,8 @@ TRACE_EVENT(nfs_writeback_done, __entry->fhandle, (long long)__entry->offset, __entry->arg_count, __entry->res_count, __entry->status, - nfs_show_stable(__entry->stable), - __print_hex_str(__entry->verifier, NFS4_VERIFIER_SIZE) + show_nfs_stable_how(__entry->stable), + show_nfs4_verifier(__entry->verifier) ) ); @@ -1256,7 +1331,7 @@ TRACE_EVENT(nfs_commit_done, __field(u64, fileid) __field(loff_t, offset) __field(int, status) - __field(enum nfs3_stable_how, stable) + __field(unsigned long, stable) __array(char, verifier, NFS4_VERIFIER_SIZE) ), @@ -1285,8 +1360,8 @@ TRACE_EVENT(nfs_commit_done, (unsigned long long)__entry->fileid, __entry->fhandle, (long long)__entry->offset, __entry->status, - nfs_show_stable(__entry->stable), - __print_hex_str(__entry->verifier, NFS4_VERIFIER_SIZE) + show_nfs_stable_how(__entry->stable), + show_nfs4_verifier(__entry->verifier) ) ); @@ -1323,76 +1398,6 @@ TRACE_EVENT(nfs_fh_to_dentry, ) ); -TRACE_DEFINE_ENUM(NFS_OK); -TRACE_DEFINE_ENUM(NFSERR_PERM); -TRACE_DEFINE_ENUM(NFSERR_NOENT); -TRACE_DEFINE_ENUM(NFSERR_IO); -TRACE_DEFINE_ENUM(NFSERR_NXIO); -TRACE_DEFINE_ENUM(ECHILD); -TRACE_DEFINE_ENUM(NFSERR_EAGAIN); -TRACE_DEFINE_ENUM(NFSERR_ACCES); -TRACE_DEFINE_ENUM(NFSERR_EXIST); -TRACE_DEFINE_ENUM(NFSERR_XDEV); -TRACE_DEFINE_ENUM(NFSERR_NODEV); -TRACE_DEFINE_ENUM(NFSERR_NOTDIR); -TRACE_DEFINE_ENUM(NFSERR_ISDIR); -TRACE_DEFINE_ENUM(NFSERR_INVAL); -TRACE_DEFINE_ENUM(NFSERR_FBIG); -TRACE_DEFINE_ENUM(NFSERR_NOSPC); -TRACE_DEFINE_ENUM(NFSERR_ROFS); -TRACE_DEFINE_ENUM(NFSERR_MLINK); -TRACE_DEFINE_ENUM(NFSERR_OPNOTSUPP); -TRACE_DEFINE_ENUM(NFSERR_NAMETOOLONG); -TRACE_DEFINE_ENUM(NFSERR_NOTEMPTY); -TRACE_DEFINE_ENUM(NFSERR_DQUOT); -TRACE_DEFINE_ENUM(NFSERR_STALE); -TRACE_DEFINE_ENUM(NFSERR_REMOTE); -TRACE_DEFINE_ENUM(NFSERR_WFLUSH); -TRACE_DEFINE_ENUM(NFSERR_BADHANDLE); -TRACE_DEFINE_ENUM(NFSERR_NOT_SYNC); -TRACE_DEFINE_ENUM(NFSERR_BAD_COOKIE); -TRACE_DEFINE_ENUM(NFSERR_NOTSUPP); -TRACE_DEFINE_ENUM(NFSERR_TOOSMALL); -TRACE_DEFINE_ENUM(NFSERR_SERVERFAULT); -TRACE_DEFINE_ENUM(NFSERR_BADTYPE); -TRACE_DEFINE_ENUM(NFSERR_JUKEBOX); - -#define nfs_show_status(x) \ - __print_symbolic(x, \ - { NFS_OK, "OK" }, \ - { NFSERR_PERM, "PERM" }, \ - { NFSERR_NOENT, "NOENT" }, \ - { NFSERR_IO, "IO" }, \ - { NFSERR_NXIO, "NXIO" }, \ - { ECHILD, "CHILD" }, \ - { NFSERR_EAGAIN, "AGAIN" }, \ - { NFSERR_ACCES, "ACCES" }, \ - { NFSERR_EXIST, "EXIST" }, \ - { NFSERR_XDEV, "XDEV" }, \ - { NFSERR_NODEV, "NODEV" }, \ - { NFSERR_NOTDIR, "NOTDIR" }, \ - { NFSERR_ISDIR, "ISDIR" }, \ - { NFSERR_INVAL, "INVAL" }, \ - { NFSERR_FBIG, "FBIG" }, \ - { NFSERR_NOSPC, "NOSPC" }, \ - { NFSERR_ROFS, "ROFS" }, \ - { NFSERR_MLINK, "MLINK" }, \ - { NFSERR_OPNOTSUPP, "OPNOTSUPP" }, \ - { NFSERR_NAMETOOLONG, "NAMETOOLONG" }, \ - { NFSERR_NOTEMPTY, "NOTEMPTY" }, \ - { NFSERR_DQUOT, "DQUOT" }, \ - { NFSERR_STALE, "STALE" }, \ - { NFSERR_REMOTE, "REMOTE" }, \ - { NFSERR_WFLUSH, "WFLUSH" }, \ - { NFSERR_BADHANDLE, "BADHANDLE" }, \ - { NFSERR_NOT_SYNC, "NOTSYNC" }, \ - { NFSERR_BAD_COOKIE, "BADCOOKIE" }, \ - { NFSERR_NOTSUPP, "NOTSUPP" }, \ - { NFSERR_TOOSMALL, "TOOSMALL" }, \ - { NFSERR_SERVERFAULT, "REMOTEIO" }, \ - { NFSERR_BADTYPE, "BADTYPE" }, \ - { NFSERR_JUKEBOX, "JUKEBOX" }) - DECLARE_EVENT_CLASS(nfs_xdr_event, TP_PROTO( const struct xdr_stream *xdr, @@ -1427,12 +1432,12 @@ DECLARE_EVENT_CLASS(nfs_xdr_event, __assign_str(procedure, task->tk_msg.rpc_proc->p_name); ), - TP_printk( - "task:%u@%d xid=0x%08x %sv%d %s error=%ld (%s)", + TP_printk(SUNRPC_TRACE_TASK_SPECIFIER + " xid=0x%08x %sv%d %s error=%ld (%s)", __entry->task_id, __entry->client_id, __entry->xid, __get_str(program), __entry->version, __get_str(procedure), -__entry->error, - nfs_show_status(__entry->error) + show_nfs_status(__entry->error) ) ); #define DEFINE_NFS_XDR_EVENT(name) \ diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index cc232d1f16f2..ad7f83dc9a2d 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -271,8 +271,7 @@ nfs_page_set_headlock(struct nfs_page *req) void nfs_page_clear_headlock(struct nfs_page *req) { - smp_mb__before_atomic(); - clear_bit(PG_HEADLOCK, &req->wb_flags); + clear_bit_unlock(PG_HEADLOCK, &req->wb_flags); smp_mb__after_atomic(); if (!test_bit(PG_CONTENDED1, &req->wb_flags)) return; @@ -525,12 +524,7 @@ nfs_create_subreq(struct nfs_page *req, */ void nfs_unlock_request(struct nfs_page *req) { - if (!NFS_WBACK_BUSY(req)) { - printk(KERN_ERR "NFS: Invalid unlock attempted\n"); - BUG(); - } - smp_mb__before_atomic(); - clear_bit(PG_BUSY, &req->wb_flags); + clear_bit_unlock(PG_BUSY, &req->wb_flags); smp_mb__after_atomic(); if (!test_bit(PG_CONTENDED2, &req->wb_flags)) return; @@ -870,9 +864,6 @@ static void nfs_pgio_result(struct rpc_task *task, void *calldata) struct nfs_pgio_header *hdr = calldata; struct inode *inode = hdr->inode; - dprintk("NFS: %s: %5u, (status %d)\n", __func__, - task->tk_pid, task->tk_status); - if (hdr->rw_ops->rw_done(task, hdr, inode) != 0) return; if (task->tk_status < 0) diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index d810ae674f4e..f4d7548d67b2 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -82,10 +82,6 @@ enum pnfs_try_status { PNFS_TRY_AGAIN = 2, }; -/* error codes for internal use */ -#define NFS4ERR_RESET_TO_MDS 12001 -#define NFS4ERR_RESET_TO_PNFS 12002 - #ifdef CONFIG_NFS_V4_1 #define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4" @@ -517,7 +513,7 @@ pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, { struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds; - if (!lseg || !fl_cinfo->ops->mark_request_commit) + if (!lseg || !fl_cinfo->ops || !fl_cinfo->ops->mark_request_commit) return false; fl_cinfo->ops->mark_request_commit(req, lseg, cinfo, ds_commit_idx); return true; diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index cf19914fec81..316f68f96e57 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -468,7 +468,6 @@ pnfs_bucket_alloc_ds_commits(struct list_head *list, goto out_error; data->ds_commit_index = i; list_add_tail(&data->list, list); - atomic_inc(&cinfo->mds->rpcs_out); nreq++; } mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); @@ -520,7 +519,6 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages, data->ds_commit_index = -1; list_splice_init(mds_pages, &data->pages); list_add_tail(&data->list, &list); - atomic_inc(&cinfo->mds->rpcs_out); nreq++; } @@ -895,7 +893,7 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv, } smp_wmb(); - ds->ds_clp = clp; + WRITE_ONCE(ds->ds_clp, clp); dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr); out: return status; @@ -973,7 +971,7 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv, } smp_wmb(); - ds->ds_clp = clp; + WRITE_ONCE(ds->ds_clp, clp); dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr); out: return status; diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index ea19dbf12301..73dcaa99fa9b 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -91,7 +91,7 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, info->dtpref = fsinfo.tsize; info->maxfilesize = 0x7FFFFFFF; info->lease_time = 0; - info->change_attr_type = NFS4_CHANGE_TYPE_IS_TIME_METADATA; + info->change_attr_type = NFS4_CHANGE_TYPE_IS_UNDEFINED; return 0; } @@ -100,8 +100,7 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, */ static int nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fattr *fattr, struct nfs4_label *label, - struct inode *inode) + struct nfs_fattr *fattr, struct inode *inode) { struct rpc_message msg = { .rpc_proc = &nfs_procedures[NFSPROC_GETATTR], @@ -154,8 +153,7 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, static int nfs_proc_lookup(struct inode *dir, struct dentry *dentry, - struct nfs_fh *fhandle, struct nfs_fattr *fattr, - struct nfs4_label *label) + struct nfs_fh *fhandle, struct nfs_fattr *fattr) { struct nfs_diropargs arg = { .fh = NFS_FH(dir), @@ -257,7 +255,7 @@ nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); nfs_mark_for_revalidate(dir); if (status == 0) - status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, NULL); + status = nfs_instantiate(dentry, data->res.fh, data->res.fattr); nfs_free_createdata(data); out: dprintk("NFS reply create: %d\n", status); @@ -304,7 +302,7 @@ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); } if (status == 0) - status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, NULL); + status = nfs_instantiate(dentry, data->res.fh, data->res.fattr); nfs_free_createdata(data); out: dprintk("NFS reply mknod: %d\n", status); @@ -436,7 +434,7 @@ nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page, * should fill in the data with a LOOKUP call on the wire. */ if (status == 0) - status = nfs_instantiate(dentry, fh, fattr, NULL); + status = nfs_instantiate(dentry, fh, fattr); out_free: nfs_free_fattr(fattr); @@ -465,7 +463,7 @@ nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); nfs_mark_for_revalidate(dir); if (status == 0) - status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, NULL); + status = nfs_instantiate(dentry, data->res.fh, data->res.fattr); nfs_free_createdata(data); out: dprintk("NFS reply mkdir: %d\n", status); diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 08d6cc57cbc3..d11af2a9299c 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -337,8 +337,7 @@ int nfs_readpage(struct file *file, struct page *page) struct inode *inode = page_file_mapping(page)->host; int ret; - dprintk("NFS: nfs_readpage (%p %ld@%lu)\n", - page, PAGE_SIZE, page_index(page)); + trace_nfs_aop_readpage(inode, page); nfs_inc_stats(inode, NFSIOS_VFSREADPAGE); /* @@ -390,9 +389,11 @@ out_wait: } out: put_nfs_open_context(desc.ctx); + trace_nfs_aop_readpage_done(inode, page, ret); return ret; out_unlock: unlock_page(page); + trace_nfs_aop_readpage_done(inode, page, ret); return ret; } @@ -403,10 +404,7 @@ int nfs_readpages(struct file *file, struct address_space *mapping, struct inode *inode = mapping->host; int ret; - dprintk("NFS: nfs_readpages (%s/%Lu %d)\n", - inode->i_sb->s_id, - (unsigned long long)NFS_FILEID(inode), - nr_pages); + trace_nfs_aop_readahead(inode, lru_to_page(pages), nr_pages); nfs_inc_stats(inode, NFSIOS_VFSREADPAGES); ret = -ESTALE; @@ -439,6 +437,7 @@ int nfs_readpages(struct file *file, struct address_space *mapping, read_complete: put_nfs_open_context(desc.ctx); out: + trace_nfs_aop_readahead_done(inode, nr_pages, ret); return ret; } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index e65c83494c05..3aced401735c 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1004,6 +1004,7 @@ int nfs_reconfigure(struct fs_context *fc) struct nfs_fs_context *ctx = nfs_fc2context(fc); struct super_block *sb = fc->root->d_sb; struct nfs_server *nfss = sb->s_fs_info; + int ret; sync_filesystem(sb); @@ -1028,7 +1029,11 @@ int nfs_reconfigure(struct fs_context *fc) } /* compare new mount options with old ones */ - return nfs_compare_remount_data(nfss, ctx); + ret = nfs_compare_remount_data(nfss, ctx); + if (ret) + return ret; + + return nfs_probe_server(nfss, NFS_FH(d_inode(fc->root))); } EXPORT_SYMBOL_GPL(nfs_reconfigure); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index eae9bf114041..9b7619ce17a7 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -288,6 +288,7 @@ static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int c end = page_file_offset(page) + ((loff_t)offset+count); if (i_size >= end) goto out; + trace_nfs_size_grow(inode, end); i_size_write(inode, end); NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_SIZE; nfs_inc_stats(inode, NFSIOS_EXTENDWRITE); @@ -1038,25 +1039,11 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst, struct nfs_page *req, *tmp; int ret = 0; -restart: list_for_each_entry_safe(req, tmp, src, wb_list) { kref_get(&req->wb_kref); if (!nfs_lock_request(req)) { - int status; - - /* Prevent deadlock with nfs_lock_and_join_requests */ - if (!list_empty(dst)) { - nfs_release_request(req); - continue; - } - /* Ensure we make progress to prevent livelock */ - mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); - status = nfs_wait_on_request(req); nfs_release_request(req); - mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); - if (status < 0) - break; - goto restart; + continue; } nfs_request_remove_commit_list(req, cinfo); clear_bit(PG_COMMIT_TO_DS, &req->wb_flags); @@ -1246,7 +1233,7 @@ nfs_key_timeout_notify(struct file *filp, struct inode *inode) struct nfs_open_context *ctx = nfs_file_open_context(filp); if (nfs_ctx_key_to_expire(ctx, inode) && - !ctx->ll_cred) + !rcu_access_pointer(ctx->ll_cred)) /* Already expired! */ return -EACCES; return 0; @@ -1258,23 +1245,38 @@ nfs_key_timeout_notify(struct file *filp, struct inode *inode) bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx, struct inode *inode) { struct rpc_auth *auth = NFS_SERVER(inode)->client->cl_auth; - struct rpc_cred *cred = ctx->ll_cred; + struct rpc_cred *cred, *new, *old = NULL; struct auth_cred acred = { .cred = ctx->cred, }; + bool ret = false; - if (cred && !cred->cr_ops->crmatch(&acred, cred, 0)) { - put_rpccred(cred); - ctx->ll_cred = NULL; - cred = NULL; - } - if (!cred) - cred = auth->au_ops->lookup_cred(auth, &acred, 0); - if (!cred || IS_ERR(cred)) + rcu_read_lock(); + cred = rcu_dereference(ctx->ll_cred); + if (cred && !(cred->cr_ops->crkey_timeout && + cred->cr_ops->crkey_timeout(cred))) + goto out; + rcu_read_unlock(); + + new = auth->au_ops->lookup_cred(auth, &acred, 0); + if (new == cred) { + put_rpccred(new); return true; - ctx->ll_cred = cred; - return !!(cred->cr_ops->crkey_timeout && - cred->cr_ops->crkey_timeout(cred)); + } + if (IS_ERR_OR_NULL(new)) { + new = NULL; + ret = true; + } else if (new->cr_ops->crkey_timeout && + new->cr_ops->crkey_timeout(new)) + ret = true; + + rcu_read_lock(); + old = rcu_dereference_protected(xchg(&ctx->ll_cred, + RCU_INITIALIZER(new)), 1); +out: + rcu_read_unlock(); + put_rpccred(old); + return ret; } /* @@ -1382,8 +1384,6 @@ int nfs_updatepage(struct file *file, struct page *page, status = nfs_writepage_setup(ctx, page, offset, count); if (status < 0) nfs_set_pageerror(mapping); - else - __set_page_dirty_nobuffers(page); out: dprintk("NFS: nfs_updatepage returns %d (isize %lld)\n", status, (long long)i_size_read(inode)); @@ -1671,10 +1671,13 @@ static void nfs_commit_begin(struct nfs_mds_commit_info *cinfo) atomic_inc(&cinfo->rpcs_out); } -static void nfs_commit_end(struct nfs_mds_commit_info *cinfo) +bool nfs_commit_end(struct nfs_mds_commit_info *cinfo) { - if (atomic_dec_and_test(&cinfo->rpcs_out)) + if (atomic_dec_and_test(&cinfo->rpcs_out)) { wake_up_var(&cinfo->rpcs_out); + return true; + } + return false; } void nfs_commitdata_release(struct nfs_commit_data *data) @@ -1774,6 +1777,7 @@ void nfs_init_commit(struct nfs_commit_data *data, data->res.fattr = &data->fattr; data->res.verf = &data->verf; nfs_fattr_init(&data->fattr); + nfs_commit_begin(cinfo->mds); } EXPORT_SYMBOL_GPL(nfs_init_commit); @@ -1820,7 +1824,6 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how, /* Set up the argument struct */ nfs_init_commit(data, head, NULL, cinfo); - atomic_inc(&cinfo->mds->rpcs_out); if (NFS_SERVER(inode)->nfs_client->cl_minorversion) task_flags = RPC_TASK_MOVEABLE; return nfs_initiate_commit(NFS_CLIENT(inode), data, NFS_PROTO(inode), @@ -1835,9 +1838,6 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata) { struct nfs_commit_data *data = calldata; - dprintk("NFS: %5u nfs_commit_done (status %d)\n", - task->tk_pid, task->tk_status); - /* Call the NFS version-specific code */ NFS_PROTO(data->inode)->commit_done(task, data); trace_nfs_commit_done(task, data); @@ -1936,6 +1936,7 @@ static int __nfs_commit_inode(struct inode *inode, int how, int may_wait = how & FLUSH_SYNC; int ret, nscan; + how &= ~FLUSH_SYNC; nfs_init_cinfo_from_inode(&cinfo, inode); nfs_commit_begin(cinfo.mds); for (;;) { diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index be3c1aad50ea..fdf89fcf1a0c 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -602,6 +602,9 @@ nfsd_file_fsnotify_handle_event(struct fsnotify_mark *mark, u32 mask, struct inode *inode, struct inode *dir, const struct qstr *name, u32 cookie) { + if (WARN_ON_ONCE(!inode)) + return 0; + trace_nfsd_file_fsnotify_handle_event(inode, mask); /* Should be no marks on non-regular files */ diff --git a/fs/nfsd/flexfilelayout.c b/fs/nfsd/flexfilelayout.c index db7ef07ae50c..2e2f1d5e9f62 100644 --- a/fs/nfsd/flexfilelayout.c +++ b/fs/nfsd/flexfilelayout.c @@ -61,7 +61,7 @@ nfsd4_ff_proc_layoutget(struct inode *inode, const struct svc_fh *fhp, goto out_error; fl->fh.size = fhp->fh_handle.fh_size; - memcpy(fl->fh.data, &fhp->fh_handle.fh_base, fl->fh.size); + memcpy(fl->fh.data, &fhp->fh_handle.fh_raw, fl->fh.size); /* Give whole file layout segments */ seg->offset = 0; diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c index 606fa155c28a..46a7f9b813e5 100644 --- a/fs/nfsd/lockd.c +++ b/fs/nfsd/lockd.c @@ -35,7 +35,7 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp, /* must initialize before using! but maxsize doesn't matter */ fh_init(&fh,0); fh.fh_handle.fh_size = f->size; - memcpy((char*)&fh.fh_handle.fh_base, f->data, f->size); + memcpy(&fh.fh_handle.fh_raw, f->data, f->size); fh.fh_export = NULL; access = (mode == O_WRONLY) ? NFSD_MAY_WRITE : NFSD_MAY_READ; diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index 4b43929c1f25..367551bddfc6 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -188,51 +188,51 @@ out: * XDR decode functions */ -static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p) +static bool +nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_getaclargs *argp = rqstp->rq_argp; if (!svcxdr_decode_fhandle(xdr, &argp->fh)) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &argp->mask) < 0) - return 0; + return false; - return 1; + return true; } -static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p) +static bool +nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_setaclargs *argp = rqstp->rq_argp; if (!svcxdr_decode_fhandle(xdr, &argp->fh)) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &argp->mask) < 0) - return 0; + return false; if (argp->mask & ~NFS_ACL_MASK) - return 0; + return false; if (!nfs_stream_decode_acl(xdr, NULL, (argp->mask & NFS_ACL) ? &argp->acl_access : NULL)) - return 0; + return false; if (!nfs_stream_decode_acl(xdr, NULL, (argp->mask & NFS_DFACL) ? &argp->acl_default : NULL)) - return 0; + return false; - return 1; + return true; } -static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p) +static bool +nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_accessargs *args = rqstp->rq_argp; if (!svcxdr_decode_fhandle(xdr, &args->fh)) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->access) < 0) - return 0; + return false; - return 1; + return true; } /* @@ -240,9 +240,9 @@ static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p) */ /* GETACL */ -static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p) +static bool +nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_getaclres *resp = rqstp->rq_resp; struct dentry *dentry = resp->fh.fh_dentry; struct inode *inode; @@ -280,9 +280,9 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p) } /* ACCESS */ -static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, __be32 *p) +static bool +nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_accessres *resp = rqstp->rq_resp; if (!svcxdr_encode_stat(xdr, resp->status)) diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c index 5dfe7644a517..35b2ebda14da 100644 --- a/fs/nfsd/nfs3acl.c +++ b/fs/nfsd/nfs3acl.c @@ -127,38 +127,38 @@ out: * XDR decode functions */ -static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p) +static bool +nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_getaclargs *args = rqstp->rq_argp; if (!svcxdr_decode_nfs_fh3(xdr, &args->fh)) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->mask) < 0) - return 0; + return false; - return 1; + return true; } -static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p) +static bool +nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_setaclargs *argp = rqstp->rq_argp; if (!svcxdr_decode_nfs_fh3(xdr, &argp->fh)) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &argp->mask) < 0) - return 0; + return false; if (argp->mask & ~NFS_ACL_MASK) - return 0; + return false; if (!nfs_stream_decode_acl(xdr, NULL, (argp->mask & NFS_ACL) ? &argp->acl_access : NULL)) - return 0; + return false; if (!nfs_stream_decode_acl(xdr, NULL, (argp->mask & NFS_DFACL) ? &argp->acl_default : NULL)) - return 0; + return false; - return 1; + return true; } /* @@ -166,9 +166,9 @@ static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p) */ /* GETACL */ -static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p) +static bool +nfs3svc_encode_getaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_getaclres *resp = rqstp->rq_resp; struct dentry *dentry = resp->fh.fh_dentry; struct kvec *head = rqstp->rq_res.head; @@ -178,14 +178,14 @@ static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p) int w; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: inode = d_inode(dentry); if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) - return 0; + return false; if (xdr_stream_encode_u32(xdr, resp->mask) < 0) - return 0; + return false; base = (char *)xdr->p - (char *)head->iov_base; @@ -194,7 +194,7 @@ static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p) (resp->mask & NFS_DFACL) ? resp->acl_default : NULL); while (w > 0) { if (!*(rqstp->rq_next_page++)) - return 0; + return false; w -= PAGE_SIZE; } @@ -207,20 +207,20 @@ static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p) resp->mask & NFS_DFACL, NFS_ACL_DEFAULT); if (n <= 0) - return 0; + return false; break; default: if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) - return 0; + return false; } - return 1; + return true; } /* SETACL */ -static int nfs3svc_encode_setaclres(struct svc_rqst *rqstp, __be32 *p) +static bool +nfs3svc_encode_setaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_attrstat *resp = rqstp->rq_resp; return svcxdr_encode_nfsstat3(xdr, resp->status) && diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 17715a6c7a40..4418517f6f12 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -201,8 +201,7 @@ nfsd3_proc_write(struct svc_rqst *rqstp) fh_copy(&resp->fh, &argp->fh); resp->committed = argp->stable; - nvecs = svc_fill_write_vector(rqstp, rqstp->rq_arg.pages, - &argp->first, cnt); + nvecs = svc_fill_write_vector(rqstp, &argp->payload); if (!nvecs) { resp->status = nfserr_io; goto out; diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 0a5ebc52e6a9..c3ac1b6aa3aa 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -92,7 +92,7 @@ svcxdr_decode_nfs_fh3(struct xdr_stream *xdr, struct svc_fh *fhp) return false; fh_init(fhp, NFS3_FHSIZE); fhp->fh_handle.fh_size = size; - memcpy(&fhp->fh_handle.fh_base, p, size); + memcpy(&fhp->fh_handle.fh_raw, p, size); return true; } @@ -131,7 +131,7 @@ svcxdr_encode_nfs_fh3(struct xdr_stream *xdr, const struct svc_fh *fhp) *p++ = cpu_to_be32(size); if (size) p[XDR_QUADLEN(size) - 1] = 0; - memcpy(p, &fhp->fh_handle.fh_base, size); + memcpy(p, &fhp->fh_handle.fh_raw, size); return true; } @@ -556,19 +556,17 @@ void fill_post_wcc(struct svc_fh *fhp) * XDR decode functions */ -int -nfs3svc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd_fhandle *args = rqstp->rq_argp; return svcxdr_decode_nfs_fh3(xdr, &args->fh); } -int -nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_sattrargs *args = rqstp->rq_argp; return svcxdr_decode_nfs_fh3(xdr, &args->fh) && @@ -576,96 +574,83 @@ nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p) svcxdr_decode_sattrguard3(xdr, args); } -int -nfs3svc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_diropargs *args = rqstp->rq_argp; return svcxdr_decode_diropargs3(xdr, &args->fh, &args->name, &args->len); } -int -nfs3svc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_decode_accessargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_accessargs *args = rqstp->rq_argp; if (!svcxdr_decode_nfs_fh3(xdr, &args->fh)) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->access) < 0) - return 0; + return false; - return 1; + return true; } -int -nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_readargs *args = rqstp->rq_argp; if (!svcxdr_decode_nfs_fh3(xdr, &args->fh)) - return 0; + return false; if (xdr_stream_decode_u64(xdr, &args->offset) < 0) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->count) < 0) - return 0; + return false; - return 1; + return true; } -int -nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_writeargs *args = rqstp->rq_argp; u32 max_blocksize = svc_max_payload(rqstp); - struct kvec *head = rqstp->rq_arg.head; - struct kvec *tail = rqstp->rq_arg.tail; - size_t remaining; if (!svcxdr_decode_nfs_fh3(xdr, &args->fh)) - return 0; + return false; if (xdr_stream_decode_u64(xdr, &args->offset) < 0) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->count) < 0) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->stable) < 0) - return 0; + return false; /* opaque data */ if (xdr_stream_decode_u32(xdr, &args->len) < 0) - return 0; + return false; /* request sanity */ if (args->count != args->len) - return 0; - remaining = head->iov_len + rqstp->rq_arg.page_len + tail->iov_len; - remaining -= xdr_stream_pos(xdr); - if (remaining < xdr_align_size(args->len)) - return 0; + return false; if (args->count > max_blocksize) { args->count = max_blocksize; args->len = max_blocksize; } + if (!xdr_stream_subsegment(xdr, &args->payload, args->count)) + return false; - args->first.iov_base = xdr->p; - args->first.iov_len = head->iov_len - xdr_stream_pos(xdr); - - return 1; + return true; } -int -nfs3svc_decode_createargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_createargs *args = rqstp->rq_argp; if (!svcxdr_decode_diropargs3(xdr, &args->fh, &args->name, &args->len)) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->createmode) < 0) - return 0; + return false; switch (args->createmode) { case NFS3_CREATE_UNCHECKED: case NFS3_CREATE_GUARDED: @@ -673,18 +658,17 @@ nfs3svc_decode_createargs(struct svc_rqst *rqstp, __be32 *p) case NFS3_CREATE_EXCLUSIVE: args->verf = xdr_inline_decode(xdr, NFS3_CREATEVERFSIZE); if (!args->verf) - return 0; + return false; break; default: - return 0; + return false; } - return 1; + return true; } -int -nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_createargs *args = rqstp->rq_argp; return svcxdr_decode_diropargs3(xdr, &args->fh, @@ -692,44 +676,42 @@ nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, __be32 *p) svcxdr_decode_sattr3(rqstp, xdr, &args->attrs); } -int -nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_symlinkargs *args = rqstp->rq_argp; struct kvec *head = rqstp->rq_arg.head; struct kvec *tail = rqstp->rq_arg.tail; size_t remaining; if (!svcxdr_decode_diropargs3(xdr, &args->ffh, &args->fname, &args->flen)) - return 0; + return false; if (!svcxdr_decode_sattr3(rqstp, xdr, &args->attrs)) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->tlen) < 0) - return 0; + return false; /* request sanity */ remaining = head->iov_len + rqstp->rq_arg.page_len + tail->iov_len; remaining -= xdr_stream_pos(xdr); if (remaining < xdr_align_size(args->tlen)) - return 0; + return false; args->first.iov_base = xdr->p; args->first.iov_len = head->iov_len - xdr_stream_pos(xdr); - return 1; + return true; } -int -nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_mknodargs *args = rqstp->rq_argp; if (!svcxdr_decode_diropargs3(xdr, &args->fh, &args->name, &args->len)) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->ftype) < 0) - return 0; + return false; switch (args->ftype) { case NF3CHR: case NF3BLK: @@ -743,16 +725,15 @@ nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, __be32 *p) /* Valid XDR but illegal file types */ break; default: - return 0; + return false; } - return 1; + return true; } -int -nfs3svc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_renameargs *args = rqstp->rq_argp; return svcxdr_decode_diropargs3(xdr, &args->ffh, @@ -761,10 +742,9 @@ nfs3svc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p) &args->tname, &args->tlen); } -int -nfs3svc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_linkargs *args = rqstp->rq_argp; return svcxdr_decode_nfs_fh3(xdr, &args->ffh) && @@ -772,62 +752,59 @@ nfs3svc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p) &args->tname, &args->tlen); } -int -nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_readdirargs *args = rqstp->rq_argp; if (!svcxdr_decode_nfs_fh3(xdr, &args->fh)) - return 0; + return false; if (xdr_stream_decode_u64(xdr, &args->cookie) < 0) - return 0; + return false; args->verf = xdr_inline_decode(xdr, NFS3_COOKIEVERFSIZE); if (!args->verf) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->count) < 0) - return 0; + return false; - return 1; + return true; } -int -nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_readdirargs *args = rqstp->rq_argp; u32 dircount; if (!svcxdr_decode_nfs_fh3(xdr, &args->fh)) - return 0; + return false; if (xdr_stream_decode_u64(xdr, &args->cookie) < 0) - return 0; + return false; args->verf = xdr_inline_decode(xdr, NFS3_COOKIEVERFSIZE); if (!args->verf) - return 0; + return false; /* dircount is ignored */ if (xdr_stream_decode_u32(xdr, &dircount) < 0) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->count) < 0) - return 0; + return false; - return 1; + return true; } -int -nfs3svc_decode_commitargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_decode_commitargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd3_commitargs *args = rqstp->rq_argp; if (!svcxdr_decode_nfs_fh3(xdr, &args->fh)) - return 0; + return false; if (xdr_stream_decode_u64(xdr, &args->offset) < 0) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->count) < 0) - return 0; + return false; - return 1; + return true; } /* @@ -835,30 +812,28 @@ nfs3svc_decode_commitargs(struct svc_rqst *rqstp, __be32 *p) */ /* GETATTR */ -int -nfs3svc_encode_getattrres(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_encode_getattrres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_attrstat *resp = rqstp->rq_resp; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: lease_get_mtime(d_inode(resp->fh.fh_dentry), &resp->stat.mtime); if (!svcxdr_encode_fattr3(rqstp, xdr, &resp->fh, &resp->stat)) - return 0; + return false; break; } - return 1; + return true; } /* SETATTR, REMOVE, RMDIR */ -int -nfs3svc_encode_wccstat(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_encode_wccstat(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_attrstat *resp = rqstp->rq_resp; return svcxdr_encode_nfsstat3(xdr, resp->status) && @@ -866,174 +841,168 @@ nfs3svc_encode_wccstat(struct svc_rqst *rqstp, __be32 *p) } /* LOOKUP */ -int nfs3svc_encode_lookupres(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_encode_lookupres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_diropres *resp = rqstp->rq_resp; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: if (!svcxdr_encode_nfs_fh3(xdr, &resp->fh)) - return 0; + return false; if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) - return 0; + return false; if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->dirfh)) - return 0; + return false; break; default: if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->dirfh)) - return 0; + return false; } - return 1; + return true; } /* ACCESS */ -int -nfs3svc_encode_accessres(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_encode_accessres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_accessres *resp = rqstp->rq_resp; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) - return 0; + return false; if (xdr_stream_encode_u32(xdr, resp->access) < 0) - return 0; + return false; break; default: if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) - return 0; + return false; } - return 1; + return true; } /* READLINK */ -int -nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_readlinkres *resp = rqstp->rq_resp; struct kvec *head = rqstp->rq_res.head; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) - return 0; + return false; if (xdr_stream_encode_u32(xdr, resp->len) < 0) - return 0; + return false; xdr_write_pages(xdr, resp->pages, 0, resp->len); if (svc_encode_result_payload(rqstp, head->iov_len, resp->len) < 0) - return 0; + return false; break; default: if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) - return 0; + return false; } - return 1; + return true; } /* READ */ -int -nfs3svc_encode_readres(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_readres *resp = rqstp->rq_resp; struct kvec *head = rqstp->rq_res.head; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) - return 0; + return false; if (xdr_stream_encode_u32(xdr, resp->count) < 0) - return 0; + return false; if (xdr_stream_encode_bool(xdr, resp->eof) < 0) - return 0; + return false; if (xdr_stream_encode_u32(xdr, resp->count) < 0) - return 0; + return false; xdr_write_pages(xdr, resp->pages, rqstp->rq_res.page_base, resp->count); if (svc_encode_result_payload(rqstp, head->iov_len, resp->count) < 0) - return 0; + return false; break; default: if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) - return 0; + return false; } - return 1; + return true; } /* WRITE */ -int -nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_encode_writeres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_writeres *resp = rqstp->rq_resp; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh)) - return 0; + return false; if (xdr_stream_encode_u32(xdr, resp->count) < 0) - return 0; + return false; if (xdr_stream_encode_u32(xdr, resp->committed) < 0) - return 0; + return false; if (!svcxdr_encode_writeverf3(xdr, resp->verf)) - return 0; + return false; break; default: if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh)) - return 0; + return false; } - return 1; + return true; } /* CREATE, MKDIR, SYMLINK, MKNOD */ -int -nfs3svc_encode_createres(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_encode_createres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_diropres *resp = rqstp->rq_resp; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: if (!svcxdr_encode_post_op_fh3(xdr, &resp->fh)) - return 0; + return false; if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) - return 0; + return false; if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->dirfh)) - return 0; + return false; break; default: if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->dirfh)) - return 0; + return false; } - return 1; + return true; } /* RENAME */ -int -nfs3svc_encode_renameres(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_encode_renameres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_renameres *resp = rqstp->rq_resp; return svcxdr_encode_nfsstat3(xdr, resp->status) && @@ -1042,10 +1011,9 @@ nfs3svc_encode_renameres(struct svc_rqst *rqstp, __be32 *p) } /* LINK */ -int -nfs3svc_encode_linkres(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_encode_linkres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_linkres *resp = rqstp->rq_resp; return svcxdr_encode_nfsstat3(xdr, resp->status) && @@ -1054,34 +1022,33 @@ nfs3svc_encode_linkres(struct svc_rqst *rqstp, __be32 *p) } /* READDIR */ -int -nfs3svc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_readdirres *resp = rqstp->rq_resp; struct xdr_buf *dirlist = &resp->dirlist; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) - return 0; + return false; if (!svcxdr_encode_cookieverf3(xdr, resp->verf)) - return 0; + return false; xdr_write_pages(xdr, dirlist->pages, 0, dirlist->len); /* no more entries */ if (xdr_stream_encode_item_absent(xdr) < 0) - return 0; + return false; if (xdr_stream_encode_bool(xdr, resp->common.err == nfserr_eof) < 0) - return 0; + return false; break; default: if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) - return 0; + return false; } - return 1; + return true; } static __be32 @@ -1308,27 +1275,26 @@ svcxdr_encode_fsstat3resok(struct xdr_stream *xdr, } /* FSSTAT */ -int -nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_fsstatres *resp = rqstp->rq_resp; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh)) - return 0; + return false; if (!svcxdr_encode_fsstat3resok(xdr, resp)) - return 0; + return false; break; default: if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh)) - return 0; + return false; } - return 1; + return true; } static bool @@ -1355,27 +1321,26 @@ svcxdr_encode_fsinfo3resok(struct xdr_stream *xdr, } /* FSINFO */ -int -nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_fsinfores *resp = rqstp->rq_resp; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh)) - return 0; + return false; if (!svcxdr_encode_fsinfo3resok(xdr, resp)) - return 0; + return false; break; default: if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh)) - return 0; + return false; } - return 1; + return true; } static bool @@ -1398,51 +1363,49 @@ svcxdr_encode_pathconf3resok(struct xdr_stream *xdr, } /* PATHCONF */ -int -nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_pathconfres *resp = rqstp->rq_resp; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh)) - return 0; + return false; if (!svcxdr_encode_pathconf3resok(xdr, resp)) - return 0; + return false; break; default: if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh)) - return 0; + return false; } - return 1; + return true; } /* COMMIT */ -int -nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_encode_commitres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd3_commitres *resp = rqstp->rq_resp; if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh)) - return 0; + return false; if (!svcxdr_encode_writeverf3(xdr, resp->verf)) - return 0; + return false; break; default: if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh)) - return 0; + return false; } - return 1; + return true; } /* diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 0f8b10f363e7..11f8715d92d6 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -121,7 +121,7 @@ static void encode_nfs_fh4(struct xdr_stream *xdr, const struct knfsd_fh *fh) BUG_ON(length > NFS4_FHSIZE); p = xdr_reserve_space(xdr, 4 + length); - xdr_encode_opaque(p, &fh->fh_base, length); + xdr_encode_opaque(p, &fh->fh_raw, length); } /* diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 486c5dba4b65..a36261f89bdf 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -519,7 +519,7 @@ nfsd4_putfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, fh_put(&cstate->current_fh); cstate->current_fh.fh_handle.fh_size = putfh->pf_fhlen; - memcpy(&cstate->current_fh.fh_handle.fh_base, putfh->pf_fhval, + memcpy(&cstate->current_fh.fh_handle.fh_raw, putfh->pf_fhval, putfh->pf_fhlen); ret = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_BYPASS_GSS); #ifdef CONFIG_NFSD_V4_2_INTER_SSC @@ -1033,8 +1033,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, write->wr_how_written = write->wr_stable_how; - nvecs = svc_fill_write_vector(rqstp, write->wr_payload.pages, - write->wr_payload.head, write->wr_buflen); + nvecs = svc_fill_write_vector(rqstp, &write->wr_payload); WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec)); status = nfsd_vfs_write(rqstp, &cstate->current_fh, nf, @@ -1178,7 +1177,7 @@ extern void nfs_sb_deactive(struct super_block *sb); static __be32 nfsd4_ssc_setup_dul(struct nfsd_net *nn, char *ipaddr, struct nfsd4_ssc_umount_item **retwork, struct vfsmount **ss_mnt) { - struct nfsd4_ssc_umount_item *ni = 0; + struct nfsd4_ssc_umount_item *ni = NULL; struct nfsd4_ssc_umount_item *work = NULL; struct nfsd4_ssc_umount_item *tmp; DEFINE_WAIT(wait); @@ -1383,7 +1382,7 @@ nfsd4_setup_inter_ssc(struct svc_rqst *rqstp, s_fh = &cstate->save_fh; copy->c_fh.size = s_fh->fh_handle.fh_size; - memcpy(copy->c_fh.data, &s_fh->fh_handle.fh_base, copy->c_fh.size); + memcpy(copy->c_fh.data, &s_fh->fh_handle.fh_raw, copy->c_fh.size); copy->stateid.seqid = cpu_to_be32(s_stid->si_generation); memcpy(copy->stateid.other, (void *)&s_stid->si_opaque, sizeof(stateid_opaque_t)); @@ -2462,11 +2461,11 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) __be32 status; resp->xdr = &rqstp->rq_res_stream; + resp->statusp = resp->xdr->p; /* reserve space for: NFS status code */ xdr_reserve_space(resp->xdr, XDR_UNIT); - resp->tagp = resp->xdr->p; /* reserve space for: taglen, tag, and opcnt */ xdr_reserve_space(resp->xdr, XDR_UNIT * 2 + args->taglen); resp->taglen = args->taglen; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 3f4027a5de88..bfad94c70b84 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1010,7 +1010,7 @@ static int delegation_blocked(struct knfsd_fh *fh) } spin_unlock(&blocked_delegations_lock); } - hash = jhash(&fh->fh_base, fh->fh_size, 0); + hash = jhash(&fh->fh_raw, fh->fh_size, 0); if (test_bit(hash&255, bd->set[0]) && test_bit((hash>>8)&255, bd->set[0]) && test_bit((hash>>16)&255, bd->set[0])) @@ -1029,7 +1029,7 @@ static void block_delegations(struct knfsd_fh *fh) u32 hash; struct bloom_pair *bd = &blocked_delegations; - hash = jhash(&fh->fh_base, fh->fh_size, 0); + hash = jhash(&fh->fh_raw, fh->fh_size, 0); spin_lock(&blocked_delegations_lock); __set_bit(hash&255, bd->set[bd->new]); @@ -5541,7 +5541,7 @@ static void nfsd4_ssc_shutdown_umount(struct nfsd_net *nn) static void nfsd4_ssc_expire_umount(struct nfsd_net *nn) { bool do_wakeup = false; - struct nfsd4_ssc_umount_item *ni = 0; + struct nfsd4_ssc_umount_item *ni = NULL; struct nfsd4_ssc_umount_item *tmp; spin_lock(&nn->nfsd_ssc_lock); diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index cf030ebe2827..b2a1d969a172 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2322,7 +2322,7 @@ nfsd4_opnum_in_range(struct nfsd4_compoundargs *argp, struct nfsd4_op *op) return true; } -static int +static bool nfsd4_decode_compound(struct nfsd4_compoundargs *argp) { struct nfsd4_op *op; @@ -2335,25 +2335,25 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) int i; if (xdr_stream_decode_u32(argp->xdr, &argp->taglen) < 0) - return 0; + return false; max_reply += XDR_UNIT; argp->tag = NULL; if (unlikely(argp->taglen)) { if (argp->taglen > NFSD4_MAX_TAGLEN) - return 0; + return false; p = xdr_inline_decode(argp->xdr, argp->taglen); if (!p) - return 0; + return false; argp->tag = svcxdr_savemem(argp, p, argp->taglen); if (!argp->tag) - return 0; + return false; max_reply += xdr_align_size(argp->taglen); } if (xdr_stream_decode_u32(argp->xdr, &argp->minorversion) < 0) - return 0; + return false; if (xdr_stream_decode_u32(argp->xdr, &argp->opcnt) < 0) - return 0; + return false; /* * NFS4ERR_RESOURCE is a more helpful error than GARBAGE_ARGS @@ -2361,14 +2361,14 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) * nfsd4_proc can handle this is an NFS-level error. */ if (argp->opcnt > NFSD_MAX_OPS_PER_COMPOUND) - return 1; + return true; if (argp->opcnt > ARRAY_SIZE(argp->iops)) { argp->ops = kzalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL); if (!argp->ops) { argp->ops = argp->iops; dprintk("nfsd: couldn't allocate room for COMPOUND\n"); - return 0; + return false; } } @@ -2380,7 +2380,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) op->replay = NULL; if (xdr_stream_decode_u32(argp->xdr, &op->opnum) < 0) - return 0; + return false; if (nfsd4_opnum_in_range(argp, op)) { op->status = nfsd4_dec_ops[op->opnum](argp, &op->u); if (op->status != nfs_ok) @@ -2427,7 +2427,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) if (readcount > 1 || max_reply > PAGE_SIZE - auth_slack) clear_bit(RQ_SPLICE_OK, &argp->rqstp->rq_flags); - return 1; + return true; } static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode, @@ -3110,7 +3110,7 @@ out_acl: p = xdr_reserve_space(xdr, fhp->fh_handle.fh_size + 4); if (!p) goto out_resource; - p = xdr_encode_opaque(p, &fhp->fh_handle.fh_base, + p = xdr_encode_opaque(p, &fhp->fh_handle.fh_raw, fhp->fh_handle.fh_size); } if (bmval0 & FATTR4_WORD0_FILEID) { @@ -3670,7 +3670,7 @@ nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh p = xdr_reserve_space(xdr, len + 4); if (!p) return nfserr_resource; - p = xdr_encode_opaque(p, &fhp->fh_handle.fh_base, len); + p = xdr_encode_opaque(p, &fhp->fh_handle.fh_raw, len); return 0; } @@ -5414,40 +5414,46 @@ void nfsd4_release_compoundargs(struct svc_rqst *rqstp) } } -int -nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd4_compoundargs *args = rqstp->rq_argp; /* svcxdr_tmp_alloc */ args->to_free = NULL; - args->xdr = &rqstp->rq_arg_stream; + args->xdr = xdr; args->ops = args->iops; args->rqstp = rqstp; return nfsd4_decode_compound(args); } -int -nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p) +bool +nfs4svc_encode_compoundres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd4_compoundres *resp = rqstp->rq_resp; - struct xdr_buf *buf = resp->xdr->buf; + struct xdr_buf *buf = xdr->buf; + __be32 *p; WARN_ON_ONCE(buf->len != buf->head[0].iov_len + buf->page_len + buf->tail[0].iov_len); - *p = resp->cstate.status; + /* + * Send buffer space for the following items is reserved + * at the top of nfsd4_proc_compound(). + */ + p = resp->statusp; + + *p++ = resp->cstate.status; - rqstp->rq_next_page = resp->xdr->page_ptr + 1; + rqstp->rq_next_page = xdr->page_ptr + 1; - p = resp->tagp; *p++ = htonl(resp->taglen); memcpy(p, resp->tag, resp->taglen); p += XDR_QUADLEN(resp->taglen); *p++ = htonl(resp->opcnt); nfsd4_sequence_done(resp); - return 1; + return true; } diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 96cdf77925f3..6e0b6f3148dc 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -241,8 +241,8 @@ lru_put_end(struct nfsd_drc_bucket *b, struct svc_cacherep *rp) list_move_tail(&rp->c_lru, &b->lru_head); } -static long -prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn) +static long prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn, + unsigned int max) { struct svc_cacherep *rp, *tmp; long freed = 0; @@ -258,11 +258,17 @@ prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn) time_before(jiffies, rp->c_timestamp + RC_EXPIRE)) break; nfsd_reply_cache_free_locked(b, rp, nn); - freed++; + if (max && freed++ > max) + break; } return freed; } +static long nfsd_prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn) +{ + return prune_bucket(b, nn, 3); +} + /* * Walk the LRU list and prune off entries that are older than RC_EXPIRE. * Also prune the oldest ones when the total exceeds the max number of entries. @@ -279,7 +285,7 @@ prune_cache_entries(struct nfsd_net *nn) if (list_empty(&b->lru_head)) continue; spin_lock(&b->cache_lock); - freed += prune_bucket(b, nn); + freed += prune_bucket(b, nn, 0); spin_unlock(&b->cache_lock); } return freed; @@ -453,8 +459,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp) atomic_inc(&nn->num_drc_entries); nfsd_stats_drc_mem_usage_add(nn, sizeof(*rp)); - /* go ahead and prune the cache */ - prune_bucket(b, nn); + nfsd_prune_bucket(b, nn); out_unlock: spin_unlock(&b->cache_lock); diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 070e5dd03e26..af8531c3854a 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -395,12 +395,12 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size) auth_domain_put(dom); if (len) return len; - + mesg = buf; len = SIMPLE_TRANSACTION_LIMIT; - qword_addhex(&mesg, &len, (char*)&fh.fh_base, fh.fh_size); + qword_addhex(&mesg, &len, fh.fh_raw, fh.fh_size); mesg[-1] = '\n'; - return mesg - buf; + return mesg - buf; } /* diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 9664303afdaf..498e5a489826 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -78,8 +78,10 @@ extern const struct seq_operations nfs_exports_op; */ struct nfsd_voidargs { }; struct nfsd_voidres { }; -int nfssvc_decode_voidarg(struct svc_rqst *rqstp, __be32 *p); -int nfssvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p); +bool nfssvc_decode_voidarg(struct svc_rqst *rqstp, + struct xdr_stream *xdr); +bool nfssvc_encode_voidres(struct svc_rqst *rqstp, + struct xdr_stream *xdr); /* * Function prototypes. diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index c475d2271f9c..f3779fa72c89 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -154,11 +154,12 @@ static inline __be32 check_pseudo_root(struct svc_rqst *rqstp, static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) { struct knfsd_fh *fh = &fhp->fh_handle; - struct fid *fid = NULL, sfid; + struct fid *fid = NULL; struct svc_export *exp; struct dentry *dentry; int fileid_type; int data_left = fh->fh_size/4; + int len; __be32 error; error = nfserr_stale; @@ -167,48 +168,35 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) if (rqstp->rq_vers == 4 && fh->fh_size == 0) return nfserr_nofilehandle; - if (fh->fh_version == 1) { - int len; - - if (--data_left < 0) - return error; - if (fh->fh_auth_type != 0) - return error; - len = key_len(fh->fh_fsid_type) / 4; - if (len == 0) - return error; - if (fh->fh_fsid_type == FSID_MAJOR_MINOR) { - /* deprecated, convert to type 3 */ - len = key_len(FSID_ENCODE_DEV)/4; - fh->fh_fsid_type = FSID_ENCODE_DEV; - /* - * struct knfsd_fh uses host-endian fields, which are - * sometimes used to hold net-endian values. This - * confuses sparse, so we must use __force here to - * keep it from complaining. - */ - fh->fh_fsid[0] = new_encode_dev(MKDEV(ntohl((__force __be32)fh->fh_fsid[0]), - ntohl((__force __be32)fh->fh_fsid[1]))); - fh->fh_fsid[1] = fh->fh_fsid[2]; - } - data_left -= len; - if (data_left < 0) - return error; - exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_fsid); - fid = (struct fid *)(fh->fh_fsid + len); - } else { - __u32 tfh[2]; - dev_t xdev; - ino_t xino; - - if (fh->fh_size != NFS_FHSIZE) - return error; - /* assume old filehandle format */ - xdev = old_decode_dev(fh->ofh_xdev); - xino = u32_to_ino_t(fh->ofh_xino); - mk_fsid(FSID_DEV, tfh, xdev, xino, 0, NULL); - exp = rqst_exp_find(rqstp, FSID_DEV, tfh); + if (fh->fh_version != 1) + return error; + + if (--data_left < 0) + return error; + if (fh->fh_auth_type != 0) + return error; + len = key_len(fh->fh_fsid_type) / 4; + if (len == 0) + return error; + if (fh->fh_fsid_type == FSID_MAJOR_MINOR) { + /* deprecated, convert to type 3 */ + len = key_len(FSID_ENCODE_DEV)/4; + fh->fh_fsid_type = FSID_ENCODE_DEV; + /* + * struct knfsd_fh uses host-endian fields, which are + * sometimes used to hold net-endian values. This + * confuses sparse, so we must use __force here to + * keep it from complaining. + */ + fh->fh_fsid[0] = new_encode_dev(MKDEV(ntohl((__force __be32)fh->fh_fsid[0]), + ntohl((__force __be32)fh->fh_fsid[1]))); + fh->fh_fsid[1] = fh->fh_fsid[2]; } + data_left -= len; + if (data_left < 0) + return error; + exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_fsid); + fid = (struct fid *)(fh->fh_fsid + len); error = nfserr_stale; if (IS_ERR(exp)) { @@ -253,18 +241,7 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) if (rqstp->rq_vers > 2) error = nfserr_badhandle; - if (fh->fh_version != 1) { - sfid.i32.ino = fh->ofh_ino; - sfid.i32.gen = fh->ofh_generation; - sfid.i32.parent_ino = fh->ofh_dirino; - fid = &sfid; - data_left = 3; - if (fh->ofh_dirino == 0) - fileid_type = FILEID_INO32_GEN; - else - fileid_type = FILEID_INO32_GEN_PARENT; - } else - fileid_type = fh->fh_fileid_type; + fileid_type = fh->fh_fileid_type; if (fileid_type == FILEID_ROOT) dentry = dget(exp->ex_path.dentry); @@ -452,20 +429,6 @@ static void _fh_update(struct svc_fh *fhp, struct svc_export *exp, } } -/* - * for composing old style file handles - */ -static inline void _fh_update_old(struct dentry *dentry, - struct svc_export *exp, - struct knfsd_fh *fh) -{ - fh->ofh_ino = ino_t_to_u32(d_inode(dentry)->i_ino); - fh->ofh_generation = d_inode(dentry)->i_generation; - if (d_is_dir(dentry) || - (exp->ex_flags & NFSEXP_NOSUBTREECHECK)) - fh->ofh_dirino = 0; -} - static bool is_root_export(struct svc_export *exp) { return exp->ex_path.dentry == exp->ex_path.dentry->d_sb->s_root; @@ -562,9 +525,6 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, /* ref_fh is a reference file handle. * if it is non-null and for the same filesystem, then we should compose * a filehandle which is of the same version, where possible. - * Currently, that means that if ref_fh->fh_handle.fh_version == 0xca - * Then create a 32byte filehandle using nfs_fhbase_old - * */ struct inode * inode = d_inode(dentry); @@ -600,35 +560,21 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, fhp->fh_dentry = dget(dentry); /* our internal copy */ fhp->fh_export = exp_get(exp); - if (fhp->fh_handle.fh_version == 0xca) { - /* old style filehandle please */ - memset(&fhp->fh_handle.fh_base, 0, NFS_FHSIZE); - fhp->fh_handle.fh_size = NFS_FHSIZE; - fhp->fh_handle.ofh_dcookie = 0xfeebbaca; - fhp->fh_handle.ofh_dev = old_encode_dev(ex_dev); - fhp->fh_handle.ofh_xdev = fhp->fh_handle.ofh_dev; - fhp->fh_handle.ofh_xino = - ino_t_to_u32(d_inode(exp->ex_path.dentry)->i_ino); - fhp->fh_handle.ofh_dirino = ino_t_to_u32(parent_ino(dentry)); - if (inode) - _fh_update_old(dentry, exp, &fhp->fh_handle); - } else { - fhp->fh_handle.fh_size = - key_len(fhp->fh_handle.fh_fsid_type) + 4; - fhp->fh_handle.fh_auth_type = 0; - - mk_fsid(fhp->fh_handle.fh_fsid_type, - fhp->fh_handle.fh_fsid, - ex_dev, - d_inode(exp->ex_path.dentry)->i_ino, - exp->ex_fsid, exp->ex_uuid); - - if (inode) - _fh_update(fhp, exp, dentry); - if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) { - fh_put(fhp); - return nfserr_opnotsupp; - } + fhp->fh_handle.fh_size = + key_len(fhp->fh_handle.fh_fsid_type) + 4; + fhp->fh_handle.fh_auth_type = 0; + + mk_fsid(fhp->fh_handle.fh_fsid_type, + fhp->fh_handle.fh_fsid, + ex_dev, + d_inode(exp->ex_path.dentry)->i_ino, + exp->ex_fsid, exp->ex_uuid); + + if (inode) + _fh_update(fhp, exp, dentry); + if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) { + fh_put(fhp); + return nfserr_opnotsupp; } return 0; @@ -649,16 +595,12 @@ fh_update(struct svc_fh *fhp) dentry = fhp->fh_dentry; if (d_really_is_negative(dentry)) goto out_negative; - if (fhp->fh_handle.fh_version != 1) { - _fh_update_old(dentry, fhp->fh_export, &fhp->fh_handle); - } else { - if (fhp->fh_handle.fh_fileid_type != FILEID_ROOT) - return 0; + if (fhp->fh_handle.fh_fileid_type != FILEID_ROOT) + return 0; - _fh_update(fhp, fhp->fh_export, dentry); - if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) - return nfserr_opnotsupp; - } + _fh_update(fhp, fhp->fh_export, dentry); + if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) + return nfserr_opnotsupp; return 0; out_bad: printk(KERN_ERR "fh_update: fh not verified!\n"); @@ -698,16 +640,11 @@ fh_put(struct svc_fh *fhp) char * SVCFH_fmt(struct svc_fh *fhp) { struct knfsd_fh *fh = &fhp->fh_handle; + static char buf[2+1+1+64*3+1]; - static char buf[80]; - sprintf(buf, "%d: %08x %08x %08x %08x %08x %08x", - fh->fh_size, - fh->fh_base.fh_pad[0], - fh->fh_base.fh_pad[1], - fh->fh_base.fh_pad[2], - fh->fh_base.fh_pad[3], - fh->fh_base.fh_pad[4], - fh->fh_base.fh_pad[5]); + if (fh->fh_size < 0 || fh->fh_size> 64) + return "bad-fh"; + sprintf(buf, "%d: %*ph", fh->fh_size, fh->fh_size, fh->fh_raw); return buf; } diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index 6106697adc04..d11e4b6870d6 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -10,9 +10,56 @@ #include <linux/crc32.h> #include <linux/sunrpc/svc.h> -#include <uapi/linux/nfsd/nfsfh.h> #include <linux/iversion.h> #include <linux/exportfs.h> +#include <linux/nfs4.h> + +/* + * The file handle starts with a sequence of four-byte words. + * The first word contains a version number (1) and three descriptor bytes + * that tell how the remaining 3 variable length fields should be handled. + * These three bytes are auth_type, fsid_type and fileid_type. + * + * All four-byte values are in host-byte-order. + * + * The auth_type field is deprecated and must be set to 0. + * + * The fsid_type identifies how the filesystem (or export point) is + * encoded. + * Current values: + * 0 - 4 byte device id (ms-2-bytes major, ls-2-bytes minor), 4byte inode number + * NOTE: we cannot use the kdev_t device id value, because kdev_t.h + * says we mustn't. We must break it up and reassemble. + * 1 - 4 byte user specified identifier + * 2 - 4 byte major, 4 byte minor, 4 byte inode number - DEPRECATED + * 3 - 4 byte device id, encoded for user-space, 4 byte inode number + * 4 - 4 byte inode number and 4 byte uuid + * 5 - 8 byte uuid + * 6 - 16 byte uuid + * 7 - 8 byte inode number and 16 byte uuid + * + * The fileid_type identifies how the file within the filesystem is encoded. + * The values for this field are filesystem specific, exccept that + * filesystems must not use the values '0' or '0xff'. 'See enum fid_type' + * in include/linux/exportfs.h for currently registered values. + */ + +struct knfsd_fh { + unsigned int fh_size; /* + * Points to the current size while + * building a new file handle. + */ + union { + char fh_raw[NFS4_FHSIZE]; + struct { + u8 fh_version; /* == 1 */ + u8 fh_auth_type; /* deprecated */ + u8 fh_fsid_type; + u8 fh_fileid_type; + u32 fh_fsid[]; /* flexible-array member */ + }; + }; +}; static inline __u32 ino_t_to_u32(ino_t ino) { @@ -188,7 +235,7 @@ static inline void fh_copy_shallow(struct knfsd_fh *dst, struct knfsd_fh *src) { dst->fh_size = src->fh_size; - memcpy(&dst->fh_base, &src->fh_base, src->fh_size); + memcpy(&dst->fh_raw, &src->fh_raw, src->fh_size); } static __inline__ struct svc_fh * @@ -203,7 +250,7 @@ static inline bool fh_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2) { if (fh1->fh_size != fh2->fh_size) return false; - if (memcmp(fh1->fh_base.fh_pad, fh2->fh_base.fh_pad, fh1->fh_size) != 0) + if (memcmp(fh1->fh_raw, fh2->fh_raw, fh1->fh_size) != 0) return false; return true; } @@ -227,7 +274,7 @@ static inline bool fh_fsid_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2) */ static inline u32 knfsd_fh_hash(const struct knfsd_fh *fh) { - return ~crc32_le(0xFFFFFFFF, (unsigned char *)&fh->fh_base, fh->fh_size); + return ~crc32_le(0xFFFFFFFF, fh->fh_raw, fh->fh_size); } #else static inline u32 knfsd_fh_hash(const struct knfsd_fh *fh) diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 90fcd6178823..eea5b59b6a6c 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -234,8 +234,7 @@ nfsd_proc_write(struct svc_rqst *rqstp) SVCFH_fmt(&argp->fh), argp->len, argp->offset); - nvecs = svc_fill_write_vector(rqstp, rqstp->rq_arg.pages, - &argp->first, cnt); + nvecs = svc_fill_write_vector(rqstp, &argp->payload); if (!nvecs) { resp->status = nfserr_io; goto out; diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index ccb59e91011b..80431921e5d7 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -1004,9 +1004,6 @@ out: int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) { const struct svc_procedure *proc = rqstp->rq_procinfo; - struct kvec *argv = &rqstp->rq_arg.head[0]; - struct kvec *resv = &rqstp->rq_res.head[0]; - __be32 *p; /* * Give the xdr decoder a chance to change this if it wants @@ -1015,7 +1012,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) rqstp->rq_cachetype = proc->pc_cachetype; svcxdr_init_decode(rqstp); - if (!proc->pc_decode(rqstp, argv->iov_base)) + if (!proc->pc_decode(rqstp, &rqstp->rq_arg_stream)) goto out_decode_err; switch (nfsd_cache_lookup(rqstp)) { @@ -1031,14 +1028,13 @@ int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) * Need to grab the location to store the status, as * NFSv4 does some encoding while processing */ - p = resv->iov_base + resv->iov_len; svcxdr_init_encode(rqstp); *statp = proc->pc_func(rqstp); if (*statp == rpc_drop_reply || test_bit(RQ_DROPME, &rqstp->rq_flags)) goto out_update_drop; - if (!proc->pc_encode(rqstp, p)) + if (!proc->pc_encode(rqstp, &rqstp->rq_res_stream)) goto out_encode_err; nfsd_cache_update(rqstp, rqstp->rq_cachetype, statp + 1); @@ -1065,29 +1061,29 @@ out_encode_err: /** * nfssvc_decode_voidarg - Decode void arguments * @rqstp: Server RPC transaction context - * @p: buffer containing arguments to decode + * @xdr: XDR stream positioned at arguments to decode * * Return values: - * %0: Arguments were not valid - * %1: Decoding was successful + * %false: Arguments were not valid + * %true: Decoding was successful */ -int nfssvc_decode_voidarg(struct svc_rqst *rqstp, __be32 *p) +bool nfssvc_decode_voidarg(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - return 1; + return true; } /** * nfssvc_encode_voidres - Encode void results * @rqstp: Server RPC transaction context - * @p: buffer in which to encode results + * @xdr: XDR stream into which to encode results * * Return values: - * %0: Local error while encoding - * %1: Encoding was successful + * %false: Local error while encoding + * %true: Encoding was successful */ -int nfssvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p) +bool nfssvc_encode_voidres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - return 1; + return true; } int nfsd_pool_stats_open(struct inode *inode, struct file *file) diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index a06c05fe3b42..aba8520b4b8b 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -64,7 +64,7 @@ svcxdr_decode_fhandle(struct xdr_stream *xdr, struct svc_fh *fhp) if (!p) return false; fh_init(fhp, NFS_FHSIZE); - memcpy(&fhp->fh_handle.fh_base, p, NFS_FHSIZE); + memcpy(&fhp->fh_handle.fh_raw, p, NFS_FHSIZE); fhp->fh_handle.fh_size = NFS_FHSIZE; return true; @@ -78,7 +78,7 @@ svcxdr_encode_fhandle(struct xdr_stream *xdr, const struct svc_fh *fhp) p = xdr_reserve_space(xdr, NFS_FHSIZE); if (!p) return false; - memcpy(p, &fhp->fh_handle.fh_base, NFS_FHSIZE); + memcpy(p, &fhp->fh_handle.fh_raw, NFS_FHSIZE); return true; } @@ -272,94 +272,81 @@ svcxdr_encode_fattr(struct svc_rqst *rqstp, struct xdr_stream *xdr, * XDR decode functions */ -int -nfssvc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd_fhandle *args = rqstp->rq_argp; return svcxdr_decode_fhandle(xdr, &args->fh); } -int -nfssvc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd_sattrargs *args = rqstp->rq_argp; return svcxdr_decode_fhandle(xdr, &args->fh) && svcxdr_decode_sattr(rqstp, xdr, &args->attrs); } -int -nfssvc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd_diropargs *args = rqstp->rq_argp; return svcxdr_decode_diropargs(xdr, &args->fh, &args->name, &args->len); } -int -nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd_readargs *args = rqstp->rq_argp; u32 totalcount; if (!svcxdr_decode_fhandle(xdr, &args->fh)) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->offset) < 0) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->count) < 0) - return 0; + return false; /* totalcount is ignored */ if (xdr_stream_decode_u32(xdr, &totalcount) < 0) - return 0; + return false; - return 1; + return true; } -int -nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd_writeargs *args = rqstp->rq_argp; - struct kvec *head = rqstp->rq_arg.head; - struct kvec *tail = rqstp->rq_arg.tail; u32 beginoffset, totalcount; - size_t remaining; if (!svcxdr_decode_fhandle(xdr, &args->fh)) - return 0; + return false; /* beginoffset is ignored */ if (xdr_stream_decode_u32(xdr, &beginoffset) < 0) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->offset) < 0) - return 0; + return false; /* totalcount is ignored */ if (xdr_stream_decode_u32(xdr, &totalcount) < 0) - return 0; + return false; /* opaque data */ if (xdr_stream_decode_u32(xdr, &args->len) < 0) - return 0; + return false; if (args->len > NFSSVC_MAXBLKSIZE_V2) - return 0; - remaining = head->iov_len + rqstp->rq_arg.page_len + tail->iov_len; - remaining -= xdr_stream_pos(xdr); - if (remaining < xdr_align_size(args->len)) - return 0; - args->first.iov_base = xdr->p; - args->first.iov_len = head->iov_len - xdr_stream_pos(xdr); + return false; + if (!xdr_stream_subsegment(xdr, &args->payload, args->len)) + return false; - return 1; + return true; } -int -nfssvc_decode_createargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd_createargs *args = rqstp->rq_argp; return svcxdr_decode_diropargs(xdr, &args->fh, @@ -367,10 +354,9 @@ nfssvc_decode_createargs(struct svc_rqst *rqstp, __be32 *p) svcxdr_decode_sattr(rqstp, xdr, &args->attrs); } -int -nfssvc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd_renameargs *args = rqstp->rq_argp; return svcxdr_decode_diropargs(xdr, &args->ffh, @@ -379,10 +365,9 @@ nfssvc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p) &args->tname, &args->tlen); } -int -nfssvc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd_linkargs *args = rqstp->rq_argp; return svcxdr_decode_fhandle(xdr, &args->ffh) && @@ -390,178 +375,170 @@ nfssvc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p) &args->tname, &args->tlen); } -int -nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd_symlinkargs *args = rqstp->rq_argp; struct kvec *head = rqstp->rq_arg.head; if (!svcxdr_decode_diropargs(xdr, &args->ffh, &args->fname, &args->flen)) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->tlen) < 0) - return 0; + return false; if (args->tlen == 0) - return 0; + return false; args->first.iov_len = head->iov_len - xdr_stream_pos(xdr); args->first.iov_base = xdr_inline_decode(xdr, args->tlen); if (!args->first.iov_base) - return 0; + return false; return svcxdr_decode_sattr(rqstp, xdr, &args->attrs); } -int -nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct nfsd_readdirargs *args = rqstp->rq_argp; if (!svcxdr_decode_fhandle(xdr, &args->fh)) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->cookie) < 0) - return 0; + return false; if (xdr_stream_decode_u32(xdr, &args->count) < 0) - return 0; + return false; - return 1; + return true; } /* * XDR encode functions */ -int -nfssvc_encode_statres(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_encode_statres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd_stat *resp = rqstp->rq_resp; return svcxdr_encode_stat(xdr, resp->status); } -int -nfssvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_encode_attrstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd_attrstat *resp = rqstp->rq_resp; if (!svcxdr_encode_stat(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat)) - return 0; + return false; break; } - return 1; + return true; } -int -nfssvc_encode_diropres(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_encode_diropres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd_diropres *resp = rqstp->rq_resp; if (!svcxdr_encode_stat(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: if (!svcxdr_encode_fhandle(xdr, &resp->fh)) - return 0; + return false; if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat)) - return 0; + return false; break; } - return 1; + return true; } -int -nfssvc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd_readlinkres *resp = rqstp->rq_resp; struct kvec *head = rqstp->rq_res.head; if (!svcxdr_encode_stat(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: if (xdr_stream_encode_u32(xdr, resp->len) < 0) - return 0; + return false; xdr_write_pages(xdr, &resp->page, 0, resp->len); if (svc_encode_result_payload(rqstp, head->iov_len, resp->len) < 0) - return 0; + return false; break; } - return 1; + return true; } -int -nfssvc_encode_readres(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd_readres *resp = rqstp->rq_resp; struct kvec *head = rqstp->rq_res.head; if (!svcxdr_encode_stat(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat)) - return 0; + return false; if (xdr_stream_encode_u32(xdr, resp->count) < 0) - return 0; + return false; xdr_write_pages(xdr, resp->pages, rqstp->rq_res.page_base, resp->count); if (svc_encode_result_payload(rqstp, head->iov_len, resp->count) < 0) - return 0; + return false; break; } - return 1; + return true; } -int -nfssvc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd_readdirres *resp = rqstp->rq_resp; struct xdr_buf *dirlist = &resp->dirlist; if (!svcxdr_encode_stat(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: xdr_write_pages(xdr, dirlist->pages, 0, dirlist->len); /* no more entries */ if (xdr_stream_encode_item_absent(xdr) < 0) - return 0; + return false; if (xdr_stream_encode_bool(xdr, resp->common.err == nfserr_eof) < 0) - return 0; + return false; break; } - return 1; + return true; } -int -nfssvc_encode_statfsres(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_encode_statfsres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct xdr_stream *xdr = &rqstp->rq_res_stream; struct nfsd_statfsres *resp = rqstp->rq_resp; struct kstatfs *stat = &resp->stats; + __be32 *p; if (!svcxdr_encode_stat(xdr, resp->status)) - return 0; + return false; switch (resp->status) { case nfs_ok: p = xdr_reserve_space(xdr, XDR_UNIT * 5); if (!p) - return 0; + return false; *p++ = cpu_to_be32(NFSSVC_MAXBLKSIZE_V2); *p++ = cpu_to_be32(stat->f_bsize); *p++ = cpu_to_be32(stat->f_blocks); @@ -570,7 +547,7 @@ nfssvc_encode_statfsres(struct svc_rqst *rqstp, __be32 *p) break; } - return 1; + return true; } /** diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 538520957a81..f1e0d3c51bc2 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -9,6 +9,7 @@ #define _NFSD_TRACE_H #include <linux/tracepoint.h> + #include "export.h" #include "nfsfh.h" diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 738d564ca4ce..c99857689e2c 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -244,6 +244,7 @@ out_nfserr: * returned. Otherwise the covered directory is returned. * NOTE: this mountpoint crossing is not supported properly by all * clients and is explicitly disallowed for NFSv3 + * NeilBrown <neilb@cse.unsw.edu.au> */ __be32 nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, @@ -729,9 +730,6 @@ __nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, path.dentry = fhp->fh_dentry; inode = d_inode(path.dentry); - /* Disallow write access to files with the append-only bit set - * or any access when mandatory locking enabled - */ err = nfserr_perm; if (IS_APPEND(inode) && (may_flags & NFSD_MAY_WRITE)) goto out; @@ -1410,7 +1408,8 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, if (nfsd_create_is_exclusive(createmode)) { /* solaris7 gets confused (bugid 4218508) if these have - * the high bit set, so just clear the high bits. If this is + * the high bit set, as do xfs filesystems without the + * "bigtime" feature. So just clear the high bits. If this is * ever changed to use different attrs for storing the * verifier, then do_open_lookup() will also need to be fixed * accordingly. diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h index f45b4bc93f52..528fb299430e 100644 --- a/fs/nfsd/xdr.h +++ b/fs/nfsd/xdr.h @@ -33,7 +33,7 @@ struct nfsd_writeargs { svc_fh fh; __u32 offset; int len; - struct kvec first; + struct xdr_buf payload; }; struct nfsd_createargs { @@ -141,23 +141,24 @@ union nfsd_xdrstore { #define NFS2_SVC_XDRSIZE sizeof(union nfsd_xdrstore) -int nfssvc_decode_fhandleargs(struct svc_rqst *, __be32 *); -int nfssvc_decode_sattrargs(struct svc_rqst *, __be32 *); -int nfssvc_decode_diropargs(struct svc_rqst *, __be32 *); -int nfssvc_decode_readargs(struct svc_rqst *, __be32 *); -int nfssvc_decode_writeargs(struct svc_rqst *, __be32 *); -int nfssvc_decode_createargs(struct svc_rqst *, __be32 *); -int nfssvc_decode_renameargs(struct svc_rqst *, __be32 *); -int nfssvc_decode_linkargs(struct svc_rqst *, __be32 *); -int nfssvc_decode_symlinkargs(struct svc_rqst *, __be32 *); -int nfssvc_decode_readdirargs(struct svc_rqst *, __be32 *); -int nfssvc_encode_statres(struct svc_rqst *, __be32 *); -int nfssvc_encode_attrstatres(struct svc_rqst *, __be32 *); -int nfssvc_encode_diropres(struct svc_rqst *, __be32 *); -int nfssvc_encode_readlinkres(struct svc_rqst *, __be32 *); -int nfssvc_encode_readres(struct svc_rqst *, __be32 *); -int nfssvc_encode_statfsres(struct svc_rqst *, __be32 *); -int nfssvc_encode_readdirres(struct svc_rqst *, __be32 *); +bool nfssvc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); + +bool nfssvc_encode_statres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_encode_attrstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_encode_diropres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_encode_statfsres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr); void nfssvc_encode_nfscookie(struct nfsd_readdirres *resp, u32 offset); int nfssvc_encode_entry(void *data, const char *name, int namlen, diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h index 933008382bbe..03fe4e21306c 100644 --- a/fs/nfsd/xdr3.h +++ b/fs/nfsd/xdr3.h @@ -40,7 +40,7 @@ struct nfsd3_writeargs { __u32 count; int stable; __u32 len; - struct kvec first; + struct xdr_buf payload; }; struct nfsd3_createargs { @@ -265,36 +265,37 @@ union nfsd3_xdrstore { #define NFS3_SVC_XDRSIZE sizeof(union nfsd3_xdrstore) -int nfs3svc_decode_fhandleargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_sattrargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_diropargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_accessargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_readargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_writeargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_createargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_mkdirargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_mknodargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_renameargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_linkargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_symlinkargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_readdirargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_readdirplusargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_commitargs(struct svc_rqst *, __be32 *); -int nfs3svc_encode_getattrres(struct svc_rqst *, __be32 *); -int nfs3svc_encode_wccstat(struct svc_rqst *, __be32 *); -int nfs3svc_encode_lookupres(struct svc_rqst *, __be32 *); -int nfs3svc_encode_accessres(struct svc_rqst *, __be32 *); -int nfs3svc_encode_readlinkres(struct svc_rqst *, __be32 *); -int nfs3svc_encode_readres(struct svc_rqst *, __be32 *); -int nfs3svc_encode_writeres(struct svc_rqst *, __be32 *); -int nfs3svc_encode_createres(struct svc_rqst *, __be32 *); -int nfs3svc_encode_renameres(struct svc_rqst *, __be32 *); -int nfs3svc_encode_linkres(struct svc_rqst *, __be32 *); -int nfs3svc_encode_readdirres(struct svc_rqst *, __be32 *); -int nfs3svc_encode_fsstatres(struct svc_rqst *, __be32 *); -int nfs3svc_encode_fsinfores(struct svc_rqst *, __be32 *); -int nfs3svc_encode_pathconfres(struct svc_rqst *, __be32 *); -int nfs3svc_encode_commitres(struct svc_rqst *, __be32 *); +bool nfs3svc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_accessargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_commitargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); + +bool nfs3svc_encode_getattrres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_wccstat(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_lookupres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_accessres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_writeres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_createres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_renameres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_linkres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_commitres(struct svc_rqst *rqstp, struct xdr_stream *xdr); void nfs3svc_release_fhandle(struct svc_rqst *); void nfs3svc_release_fhandle2(struct svc_rqst *); diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 3e4052e3bd50..846ab6df9d48 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -702,10 +702,11 @@ struct nfsd4_compoundres { struct xdr_stream *xdr; struct svc_rqst * rqstp; + __be32 *statusp; u32 taglen; char * tag; u32 opcnt; - __be32 * tagp; /* tag, opcount encode location */ + struct nfsd4_compound_state cstate; }; @@ -756,8 +757,8 @@ set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp) bool nfsd4_mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp); -int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *); -int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *); +bool nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs4svc_encode_compoundres(struct svc_rqst *rqstp, struct xdr_stream *xdr); __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32); void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *); void nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op); diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c index adf3bb0a8048..6ce8617b562d 100644 --- a/fs/nilfs2/alloc.c +++ b/fs/nilfs2/alloc.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0+ /* - * alloc.c - NILFS dat/inode allocator + * NILFS dat/inode allocator * * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. * diff --git a/fs/nilfs2/alloc.h b/fs/nilfs2/alloc.h index 0303c3968cee..b667e869ac07 100644 --- a/fs/nilfs2/alloc.h +++ b/fs/nilfs2/alloc.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0+ */ /* - * alloc.h - persistent object (dat entry/disk inode) allocator/deallocator + * Persistent object (dat entry/disk inode) allocator/deallocator * * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. * diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c index 5900879d5693..798a2c1b38c6 100644 --- a/fs/nilfs2/bmap.c +++ b/fs/nilfs2/bmap.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0+ /* - * bmap.c - NILFS block mapping. + * NILFS block mapping. * * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. * diff --git a/fs/nilfs2/bmap.h b/fs/nilfs2/bmap.h index 2c63858e81c9..608168a5cb88 100644 --- a/fs/nilfs2/bmap.h +++ b/fs/nilfs2/bmap.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0+ */ /* - * bmap.h - NILFS block mapping. + * NILFS block mapping. * * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. * diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index 4391fd3abd8f..66bdaa2cf496 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0+ /* - * btnode.c - NILFS B-tree node cache + * NILFS B-tree node cache * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h index 0f88dbc9bcb3..11663650add7 100644 --- a/fs/nilfs2/btnode.h +++ b/fs/nilfs2/btnode.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0+ */ /* - * btnode.h - NILFS B-tree node cache + * NILFS B-tree node cache * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index ab9ec073330f..3594eabe1419 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0+ /* - * btree.c - NILFS B-tree. + * NILFS B-tree. * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * diff --git a/fs/nilfs2/btree.h b/fs/nilfs2/btree.h index d1421b646ce4..92868e1a48ca 100644 --- a/fs/nilfs2/btree.h +++ b/fs/nilfs2/btree.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0+ */ /* - * btree.h - NILFS B-tree. + * NILFS B-tree. * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c index ce144776b4ef..9ebefb3acb0e 100644 --- a/fs/nilfs2/cpfile.c +++ b/fs/nilfs2/cpfile.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0+ /* - * cpfile.c - NILFS checkpoint file. + * NILFS checkpoint file. * * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. * diff --git a/fs/nilfs2/cpfile.h b/fs/nilfs2/cpfile.h index 6336222df24a..edabb2dc5756 100644 --- a/fs/nilfs2/cpfile.h +++ b/fs/nilfs2/cpfile.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0+ */ /* - * cpfile.h - NILFS checkpoint file. + * NILFS checkpoint file. * * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. * diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index 8bccdf1158fc..dc51d3b7a7bf 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0+ /* - * dat.c - NILFS disk address translation. + * NILFS disk address translation. * * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. * diff --git a/fs/nilfs2/dat.h b/fs/nilfs2/dat.h index b17ee34580ae..468c82d26183 100644 --- a/fs/nilfs2/dat.h +++ b/fs/nilfs2/dat.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0+ */ /* - * dat.h - NILFS disk address translation. + * NILFS disk address translation. * * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. * diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index 81394e22d0a0..f8f4c2ff52f4 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0+ /* - * dir.c - NILFS directory entry operations + * NILFS directory entry operations * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c index f353101955e3..a35f2795b242 100644 --- a/fs/nilfs2/direct.c +++ b/fs/nilfs2/direct.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0+ /* - * direct.c - NILFS direct block pointer. + * NILFS direct block pointer. * * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. * diff --git a/fs/nilfs2/direct.h b/fs/nilfs2/direct.h index ec9a23c77994..b7ca896269af 100644 --- a/fs/nilfs2/direct.h +++ b/fs/nilfs2/direct.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0+ */ /* - * direct.h - NILFS direct block pointer. + * NILFS direct block pointer. * * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. * diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index 7cf765258fda..a265d391ffe9 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0+ /* - * file.c - NILFS regular file handling primitives including fsync(). + * NILFS regular file handling primitives including fsync(). * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c index 448320496856..a8f5315f01e3 100644 --- a/fs/nilfs2/gcinode.c +++ b/fs/nilfs2/gcinode.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0+ /* - * gcinode.c - dummy inodes to buffer blocks for garbage collection + * Dummy inodes to buffer blocks for garbage collection * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c index 02727ed3a7c6..a8a4bc8490b4 100644 --- a/fs/nilfs2/ifile.c +++ b/fs/nilfs2/ifile.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0+ /* - * ifile.c - NILFS inode file + * NILFS inode file * * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. * diff --git a/fs/nilfs2/ifile.h b/fs/nilfs2/ifile.h index a1e1e5711a05..35c5273f4821 100644 --- a/fs/nilfs2/ifile.h +++ b/fs/nilfs2/ifile.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0+ */ /* - * ifile.h - NILFS inode file + * NILFS inode file * * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. * diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 2e8eb263cf0f..e3d807d5b83a 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0+ /* - * inode.c - NILFS inode operations. + * NILFS inode operations. * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 1d0583cfd970..fec194a666f4 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0+ /* - * ioctl.c - NILFS ioctl operations. + * NILFS ioctl operations. * * Copyright (C) 2007, 2008 Nippon Telegraph and Telephone Corporation. * diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index 97769fe4d588..4b3d33cf0041 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0+ /* - * mdt.c - meta data file for NILFS + * Meta data file for NILFS * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h index e77aea4bb921..8f86080a436d 100644 --- a/fs/nilfs2/mdt.h +++ b/fs/nilfs2/mdt.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0+ */ /* - * mdt.h - NILFS meta data file prototype and definitions + * NILFS meta data file prototype and definitions * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index 91eebeb0c48b..23899e0ae850 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0+ /* - * namei.c - NILFS pathname lookup operations. + * NILFS pathname lookup operations. * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 60b21b6eeac0..a7b81755c350 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0+ */ /* - * nilfs.h - NILFS local header file. + * NILFS local header file. * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index 171fb5cd427f..bc3e2cd4117f 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0+ /* - * page.c - buffer/page management specific to NILFS + * Buffer/page management specific to NILFS * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h index 62b9bb469e92..569263b23c0c 100644 --- a/fs/nilfs2/page.h +++ b/fs/nilfs2/page.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0+ */ /* - * page.h - buffer/page management specific to NILFS + * Buffer/page management specific to NILFS * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index 2217f904a7cf..9e2ed76c0f25 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0+ /* - * recovery.c - NILFS recovery logic + * NILFS recovery logic * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index 56872e93823d..43287b0d3e9b 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0+ /* - * segbuf.c - NILFS segment buffer + * NILFS segment buffer * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * diff --git a/fs/nilfs2/segbuf.h b/fs/nilfs2/segbuf.h index 9bea1bd59041..e20091ededba 100644 --- a/fs/nilfs2/segbuf.h +++ b/fs/nilfs2/segbuf.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0+ */ /* - * segbuf.h - NILFS Segment buffer prototypes and definitions + * NILFS Segment buffer prototypes and definitions * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 686c8ee7b29c..85a853334771 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0+ /* - * segment.c - NILFS segment constructor. + * NILFS segment constructor. * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index f5cf5308f3fc..1060f72ebf5a 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0+ */ /* - * segment.h - NILFS Segment constructor prototypes and definitions + * NILFS Segment constructor prototypes and definitions * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index 63722475e17e..e385cca2004a 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0+ /* - * sufile.c - NILFS segment usage file. + * NILFS segment usage file. * * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. * diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h index c4e2c7a7add1..8e8a1a5a0402 100644 --- a/fs/nilfs2/sufile.h +++ b/fs/nilfs2/sufile.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0+ */ /* - * sufile.h - NILFS segment usage file. + * NILFS segment usage file. * * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. * diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 3134c0e42fd4..63e5fa74016c 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0+ /* - * super.c - NILFS module and super block management. + * NILFS module and super block management. * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c index 62f8a7ac19c8..81f35c5b5a40 100644 --- a/fs/nilfs2/sysfs.c +++ b/fs/nilfs2/sysfs.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0+ /* - * sysfs.c - sysfs support implementation. + * Sysfs support implementation. * * Copyright (C) 2005-2014 Nippon Telegraph and Telephone Corporation. * Copyright (C) 2014 HGST, Inc., a Western Digital Company. @@ -95,7 +95,7 @@ static ssize_t nilfs_snapshot_inodes_count_show(struct nilfs_snapshot_attr *attr, struct nilfs_root *root, char *buf) { - return snprintf(buf, PAGE_SIZE, "%llu\n", + return sysfs_emit(buf, "%llu\n", (unsigned long long)atomic64_read(&root->inodes_count)); } @@ -103,7 +103,7 @@ static ssize_t nilfs_snapshot_blocks_count_show(struct nilfs_snapshot_attr *attr, struct nilfs_root *root, char *buf) { - return snprintf(buf, PAGE_SIZE, "%llu\n", + return sysfs_emit(buf, "%llu\n", (unsigned long long)atomic64_read(&root->blocks_count)); } @@ -116,7 +116,7 @@ static ssize_t nilfs_snapshot_README_show(struct nilfs_snapshot_attr *attr, struct nilfs_root *root, char *buf) { - return snprintf(buf, PAGE_SIZE, snapshot_readme_str); + return sysfs_emit(buf, snapshot_readme_str); } NILFS_SNAPSHOT_RO_ATTR(inodes_count); @@ -217,7 +217,7 @@ static ssize_t nilfs_mounted_snapshots_README_show(struct nilfs_mounted_snapshots_attr *attr, struct the_nilfs *nilfs, char *buf) { - return snprintf(buf, PAGE_SIZE, mounted_snapshots_readme_str); + return sysfs_emit(buf, mounted_snapshots_readme_str); } NILFS_MOUNTED_SNAPSHOTS_RO_ATTR(README); @@ -255,7 +255,7 @@ nilfs_checkpoints_checkpoints_number_show(struct nilfs_checkpoints_attr *attr, ncheckpoints = cpstat.cs_ncps; - return snprintf(buf, PAGE_SIZE, "%llu\n", ncheckpoints); + return sysfs_emit(buf, "%llu\n", ncheckpoints); } static ssize_t @@ -278,7 +278,7 @@ nilfs_checkpoints_snapshots_number_show(struct nilfs_checkpoints_attr *attr, nsnapshots = cpstat.cs_nsss; - return snprintf(buf, PAGE_SIZE, "%llu\n", nsnapshots); + return sysfs_emit(buf, "%llu\n", nsnapshots); } static ssize_t @@ -292,7 +292,7 @@ nilfs_checkpoints_last_seg_checkpoint_show(struct nilfs_checkpoints_attr *attr, last_cno = nilfs->ns_last_cno; spin_unlock(&nilfs->ns_last_segment_lock); - return snprintf(buf, PAGE_SIZE, "%llu\n", last_cno); + return sysfs_emit(buf, "%llu\n", last_cno); } static ssize_t @@ -306,7 +306,7 @@ nilfs_checkpoints_next_checkpoint_show(struct nilfs_checkpoints_attr *attr, cno = nilfs->ns_cno; up_read(&nilfs->ns_segctor_sem); - return snprintf(buf, PAGE_SIZE, "%llu\n", cno); + return sysfs_emit(buf, "%llu\n", cno); } static const char checkpoints_readme_str[] = @@ -322,7 +322,7 @@ static ssize_t nilfs_checkpoints_README_show(struct nilfs_checkpoints_attr *attr, struct the_nilfs *nilfs, char *buf) { - return snprintf(buf, PAGE_SIZE, checkpoints_readme_str); + return sysfs_emit(buf, checkpoints_readme_str); } NILFS_CHECKPOINTS_RO_ATTR(checkpoints_number); @@ -353,7 +353,7 @@ nilfs_segments_segments_number_show(struct nilfs_segments_attr *attr, struct the_nilfs *nilfs, char *buf) { - return snprintf(buf, PAGE_SIZE, "%lu\n", nilfs->ns_nsegments); + return sysfs_emit(buf, "%lu\n", nilfs->ns_nsegments); } static ssize_t @@ -361,7 +361,7 @@ nilfs_segments_blocks_per_segment_show(struct nilfs_segments_attr *attr, struct the_nilfs *nilfs, char *buf) { - return snprintf(buf, PAGE_SIZE, "%lu\n", nilfs->ns_blocks_per_segment); + return sysfs_emit(buf, "%lu\n", nilfs->ns_blocks_per_segment); } static ssize_t @@ -375,7 +375,7 @@ nilfs_segments_clean_segments_show(struct nilfs_segments_attr *attr, ncleansegs = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile); up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); - return snprintf(buf, PAGE_SIZE, "%lu\n", ncleansegs); + return sysfs_emit(buf, "%lu\n", ncleansegs); } static ssize_t @@ -395,7 +395,7 @@ nilfs_segments_dirty_segments_show(struct nilfs_segments_attr *attr, return err; } - return snprintf(buf, PAGE_SIZE, "%llu\n", sustat.ss_ndirtysegs); + return sysfs_emit(buf, "%llu\n", sustat.ss_ndirtysegs); } static const char segments_readme_str[] = @@ -411,7 +411,7 @@ nilfs_segments_README_show(struct nilfs_segments_attr *attr, struct the_nilfs *nilfs, char *buf) { - return snprintf(buf, PAGE_SIZE, segments_readme_str); + return sysfs_emit(buf, segments_readme_str); } NILFS_SEGMENTS_RO_ATTR(segments_number); @@ -448,7 +448,7 @@ nilfs_segctor_last_pseg_block_show(struct nilfs_segctor_attr *attr, last_pseg = nilfs->ns_last_pseg; spin_unlock(&nilfs->ns_last_segment_lock); - return snprintf(buf, PAGE_SIZE, "%llu\n", + return sysfs_emit(buf, "%llu\n", (unsigned long long)last_pseg); } @@ -463,7 +463,7 @@ nilfs_segctor_last_seg_sequence_show(struct nilfs_segctor_attr *attr, last_seq = nilfs->ns_last_seq; spin_unlock(&nilfs->ns_last_segment_lock); - return snprintf(buf, PAGE_SIZE, "%llu\n", last_seq); + return sysfs_emit(buf, "%llu\n", last_seq); } static ssize_t @@ -477,7 +477,7 @@ nilfs_segctor_last_seg_checkpoint_show(struct nilfs_segctor_attr *attr, last_cno = nilfs->ns_last_cno; spin_unlock(&nilfs->ns_last_segment_lock); - return snprintf(buf, PAGE_SIZE, "%llu\n", last_cno); + return sysfs_emit(buf, "%llu\n", last_cno); } static ssize_t @@ -491,7 +491,7 @@ nilfs_segctor_current_seg_sequence_show(struct nilfs_segctor_attr *attr, seg_seq = nilfs->ns_seg_seq; up_read(&nilfs->ns_segctor_sem); - return snprintf(buf, PAGE_SIZE, "%llu\n", seg_seq); + return sysfs_emit(buf, "%llu\n", seg_seq); } static ssize_t @@ -505,7 +505,7 @@ nilfs_segctor_current_last_full_seg_show(struct nilfs_segctor_attr *attr, segnum = nilfs->ns_segnum; up_read(&nilfs->ns_segctor_sem); - return snprintf(buf, PAGE_SIZE, "%llu\n", segnum); + return sysfs_emit(buf, "%llu\n", segnum); } static ssize_t @@ -519,7 +519,7 @@ nilfs_segctor_next_full_seg_show(struct nilfs_segctor_attr *attr, nextnum = nilfs->ns_nextnum; up_read(&nilfs->ns_segctor_sem); - return snprintf(buf, PAGE_SIZE, "%llu\n", nextnum); + return sysfs_emit(buf, "%llu\n", nextnum); } static ssize_t @@ -533,7 +533,7 @@ nilfs_segctor_next_pseg_offset_show(struct nilfs_segctor_attr *attr, pseg_offset = nilfs->ns_pseg_offset; up_read(&nilfs->ns_segctor_sem); - return snprintf(buf, PAGE_SIZE, "%lu\n", pseg_offset); + return sysfs_emit(buf, "%lu\n", pseg_offset); } static ssize_t @@ -547,7 +547,7 @@ nilfs_segctor_next_checkpoint_show(struct nilfs_segctor_attr *attr, cno = nilfs->ns_cno; up_read(&nilfs->ns_segctor_sem); - return snprintf(buf, PAGE_SIZE, "%llu\n", cno); + return sysfs_emit(buf, "%llu\n", cno); } static ssize_t @@ -575,7 +575,7 @@ nilfs_segctor_last_seg_write_time_secs_show(struct nilfs_segctor_attr *attr, ctime = nilfs->ns_ctime; up_read(&nilfs->ns_segctor_sem); - return snprintf(buf, PAGE_SIZE, "%llu\n", ctime); + return sysfs_emit(buf, "%llu\n", ctime); } static ssize_t @@ -603,7 +603,7 @@ nilfs_segctor_last_nongc_write_time_secs_show(struct nilfs_segctor_attr *attr, nongc_ctime = nilfs->ns_nongc_ctime; up_read(&nilfs->ns_segctor_sem); - return snprintf(buf, PAGE_SIZE, "%llu\n", nongc_ctime); + return sysfs_emit(buf, "%llu\n", nongc_ctime); } static ssize_t @@ -617,7 +617,7 @@ nilfs_segctor_dirty_data_blocks_count_show(struct nilfs_segctor_attr *attr, ndirtyblks = atomic_read(&nilfs->ns_ndirtyblks); up_read(&nilfs->ns_segctor_sem); - return snprintf(buf, PAGE_SIZE, "%u\n", ndirtyblks); + return sysfs_emit(buf, "%u\n", ndirtyblks); } static const char segctor_readme_str[] = @@ -654,7 +654,7 @@ static ssize_t nilfs_segctor_README_show(struct nilfs_segctor_attr *attr, struct the_nilfs *nilfs, char *buf) { - return snprintf(buf, PAGE_SIZE, segctor_readme_str); + return sysfs_emit(buf, segctor_readme_str); } NILFS_SEGCTOR_RO_ATTR(last_pseg_block); @@ -723,7 +723,7 @@ nilfs_superblock_sb_write_time_secs_show(struct nilfs_superblock_attr *attr, sbwtime = nilfs->ns_sbwtime; up_read(&nilfs->ns_sem); - return snprintf(buf, PAGE_SIZE, "%llu\n", sbwtime); + return sysfs_emit(buf, "%llu\n", sbwtime); } static ssize_t @@ -737,7 +737,7 @@ nilfs_superblock_sb_write_count_show(struct nilfs_superblock_attr *attr, sbwcount = nilfs->ns_sbwcount; up_read(&nilfs->ns_sem); - return snprintf(buf, PAGE_SIZE, "%u\n", sbwcount); + return sysfs_emit(buf, "%u\n", sbwcount); } static ssize_t @@ -751,7 +751,7 @@ nilfs_superblock_sb_update_frequency_show(struct nilfs_superblock_attr *attr, sb_update_freq = nilfs->ns_sb_update_freq; up_read(&nilfs->ns_sem); - return snprintf(buf, PAGE_SIZE, "%u\n", sb_update_freq); + return sysfs_emit(buf, "%u\n", sb_update_freq); } static ssize_t @@ -799,7 +799,7 @@ static ssize_t nilfs_superblock_README_show(struct nilfs_superblock_attr *attr, struct the_nilfs *nilfs, char *buf) { - return snprintf(buf, PAGE_SIZE, sb_readme_str); + return sysfs_emit(buf, sb_readme_str); } NILFS_SUPERBLOCK_RO_ATTR(sb_write_time); @@ -834,7 +834,7 @@ ssize_t nilfs_dev_revision_show(struct nilfs_dev_attr *attr, u32 major = le32_to_cpu(sbp[0]->s_rev_level); u16 minor = le16_to_cpu(sbp[0]->s_minor_rev_level); - return snprintf(buf, PAGE_SIZE, "%d.%d\n", major, minor); + return sysfs_emit(buf, "%d.%d\n", major, minor); } static @@ -842,7 +842,7 @@ ssize_t nilfs_dev_blocksize_show(struct nilfs_dev_attr *attr, struct the_nilfs *nilfs, char *buf) { - return snprintf(buf, PAGE_SIZE, "%u\n", nilfs->ns_blocksize); + return sysfs_emit(buf, "%u\n", nilfs->ns_blocksize); } static @@ -853,7 +853,7 @@ ssize_t nilfs_dev_device_size_show(struct nilfs_dev_attr *attr, struct nilfs_super_block **sbp = nilfs->ns_sbp; u64 dev_size = le64_to_cpu(sbp[0]->s_dev_size); - return snprintf(buf, PAGE_SIZE, "%llu\n", dev_size); + return sysfs_emit(buf, "%llu\n", dev_size); } static @@ -864,7 +864,7 @@ ssize_t nilfs_dev_free_blocks_show(struct nilfs_dev_attr *attr, sector_t free_blocks = 0; nilfs_count_free_blocks(nilfs, &free_blocks); - return snprintf(buf, PAGE_SIZE, "%llu\n", + return sysfs_emit(buf, "%llu\n", (unsigned long long)free_blocks); } @@ -875,7 +875,7 @@ ssize_t nilfs_dev_uuid_show(struct nilfs_dev_attr *attr, { struct nilfs_super_block **sbp = nilfs->ns_sbp; - return snprintf(buf, PAGE_SIZE, "%pUb\n", sbp[0]->s_uuid); + return sysfs_emit(buf, "%pUb\n", sbp[0]->s_uuid); } static @@ -903,7 +903,7 @@ static ssize_t nilfs_dev_README_show(struct nilfs_dev_attr *attr, struct the_nilfs *nilfs, char *buf) { - return snprintf(buf, PAGE_SIZE, dev_readme_str); + return sysfs_emit(buf, dev_readme_str); } NILFS_DEV_RO_ATTR(revision); @@ -1047,7 +1047,7 @@ void nilfs_sysfs_delete_device_group(struct the_nilfs *nilfs) static ssize_t nilfs_feature_revision_show(struct kobject *kobj, struct attribute *attr, char *buf) { - return snprintf(buf, PAGE_SIZE, "%d.%d\n", + return sysfs_emit(buf, "%d.%d\n", NILFS_CURRENT_REV, NILFS_MINOR_REV); } @@ -1060,7 +1060,7 @@ static ssize_t nilfs_feature_README_show(struct kobject *kobj, struct attribute *attr, char *buf) { - return snprintf(buf, PAGE_SIZE, features_readme_str); + return sysfs_emit(buf, features_readme_str); } NILFS_FEATURE_RO_ATTR(revision); diff --git a/fs/nilfs2/sysfs.h b/fs/nilfs2/sysfs.h index d001eb862dae..78a87a016928 100644 --- a/fs/nilfs2/sysfs.h +++ b/fs/nilfs2/sysfs.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0+ */ /* - * sysfs.h - sysfs support declarations. + * Sysfs support declarations. * * Copyright (C) 2005-2014 Nippon Telegraph and Telephone Corporation. * Copyright (C) 2014 HGST, Inc., a Western Digital Company. diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 1bfcb5d3ea48..dd48a8f74d57 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0+ /* - * the_nilfs.c - the_nilfs shared structure. + * the_nilfs shared structure. * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index 987c8ab02aee..47c7dfbb7ea5 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0+ */ /* - * the_nilfs.h - the_nilfs shared structure. + * the_nilfs shared structure. * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 057abd2cf887..b6091775aa6e 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -111,6 +111,16 @@ static bool fanotify_name_event_equal(struct fanotify_name_event *fne1, return fanotify_info_equal(info1, info2); } +static bool fanotify_error_event_equal(struct fanotify_error_event *fee1, + struct fanotify_error_event *fee2) +{ + /* Error events against the same file system are always merged. */ + if (!fanotify_fsid_equal(&fee1->fsid, &fee2->fsid)) + return false; + + return true; +} + static bool fanotify_should_merge(struct fanotify_event *old, struct fanotify_event *new) { @@ -141,6 +151,9 @@ static bool fanotify_should_merge(struct fanotify_event *old, case FANOTIFY_EVENT_TYPE_FID_NAME: return fanotify_name_event_equal(FANOTIFY_NE(old), FANOTIFY_NE(new)); + case FANOTIFY_EVENT_TYPE_FS_ERROR: + return fanotify_error_event_equal(FANOTIFY_EE(old), + FANOTIFY_EE(new)); default: WARN_ON_ONCE(1); } @@ -176,6 +189,10 @@ static int fanotify_merge(struct fsnotify_group *group, break; if (fanotify_should_merge(old, new)) { old->mask |= new->mask; + + if (fanotify_is_error_event(old->mask)) + FANOTIFY_EE(old)->err_count++; + return 1; } } @@ -343,13 +360,23 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group, static int fanotify_encode_fh_len(struct inode *inode) { int dwords = 0; + int fh_len; if (!inode) return 0; exportfs_encode_inode_fh(inode, NULL, &dwords, NULL); + fh_len = dwords << 2; + + /* + * struct fanotify_error_event might be preallocated and is + * limited to MAX_HANDLE_SZ. This should never happen, but + * safeguard by forcing an invalid file handle. + */ + if (WARN_ON_ONCE(fh_len > MAX_HANDLE_SZ)) + return 0; - return dwords << 2; + return fh_len; } /* @@ -370,8 +397,14 @@ static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode, fh->type = FILEID_ROOT; fh->len = 0; fh->flags = 0; + + /* + * Invalid FHs are used by FAN_FS_ERROR for errors not + * linked to any inode. The f_handle won't be reported + * back to userspace. + */ if (!inode) - return 0; + goto out; /* * !gpf means preallocated variable size fh, but fh_len could @@ -403,8 +436,13 @@ static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode, fh->type = type; fh->len = fh_len; - /* Mix fh into event merge key */ - *hash ^= fanotify_hash_fh(fh); +out: + /* + * Mix fh into event merge key. Hash might be NULL in case of + * unhashed FID events (i.e. FAN_FS_ERROR). + */ + if (hash) + *hash ^= fanotify_hash_fh(fh); return FANOTIFY_FH_HDR_LEN + fh_len; @@ -452,7 +490,7 @@ static struct inode *fanotify_dfid_inode(u32 event_mask, const void *data, if (event_mask & ALL_FSNOTIFY_DIRENT_EVENTS) return dir; - if (S_ISDIR(inode->i_mode)) + if (inode && S_ISDIR(inode->i_mode)) return inode; return dir; @@ -563,6 +601,44 @@ static struct fanotify_event *fanotify_alloc_name_event(struct inode *id, return &fne->fae; } +static struct fanotify_event *fanotify_alloc_error_event( + struct fsnotify_group *group, + __kernel_fsid_t *fsid, + const void *data, int data_type, + unsigned int *hash) +{ + struct fs_error_report *report = + fsnotify_data_error_report(data, data_type); + struct inode *inode; + struct fanotify_error_event *fee; + int fh_len; + + if (WARN_ON_ONCE(!report)) + return NULL; + + fee = mempool_alloc(&group->fanotify_data.error_events_pool, GFP_NOFS); + if (!fee) + return NULL; + + fee->fae.type = FANOTIFY_EVENT_TYPE_FS_ERROR; + fee->error = report->error; + fee->err_count = 1; + fee->fsid = *fsid; + + inode = report->inode; + fh_len = fanotify_encode_fh_len(inode); + + /* Bad fh_len. Fallback to using an invalid fh. Should never happen. */ + if (!fh_len && inode) + inode = NULL; + + fanotify_encode_fh(&fee->object_fh, inode, fh_len, NULL, 0); + + *hash ^= fanotify_hash_fsid(fsid); + + return &fee->fae; +} + static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, u32 mask, const void *data, int data_type, struct inode *dir, @@ -630,6 +706,9 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, if (fanotify_is_perm_event(mask)) { event = fanotify_alloc_perm_event(path, gfp); + } else if (fanotify_is_error_event(mask)) { + event = fanotify_alloc_error_event(group, fsid, data, + data_type, &hash); } else if (name_event && (file_name || child)) { event = fanotify_alloc_name_event(id, fsid, file_name, child, &hash, gfp); @@ -702,6 +781,9 @@ static void fanotify_insert_event(struct fsnotify_group *group, assert_spin_locked(&group->notification_lock); + if (!fanotify_is_hashed_event(event->mask)) + return; + pr_debug("%s: group=%p event=%p bucket=%u\n", __func__, group, event, bucket); @@ -738,8 +820,9 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask, BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR); BUILD_BUG_ON(FAN_OPEN_EXEC != FS_OPEN_EXEC); BUILD_BUG_ON(FAN_OPEN_EXEC_PERM != FS_OPEN_EXEC_PERM); + BUILD_BUG_ON(FAN_FS_ERROR != FS_ERROR); - BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 19); + BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 20); mask = fanotify_group_event_mask(group, iter_info, mask, data, data_type, dir); @@ -778,9 +861,8 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask, } fsn_event = &event->fse; - ret = fsnotify_add_event(group, fsn_event, fanotify_merge, - fanotify_is_hashed_event(mask) ? - fanotify_insert_event : NULL); + ret = fsnotify_insert_event(group, fsn_event, fanotify_merge, + fanotify_insert_event); if (ret) { /* Permission events shouldn't be merged */ BUG_ON(ret == 1 && mask & FANOTIFY_PERM_EVENTS); @@ -805,6 +887,9 @@ static void fanotify_free_group_priv(struct fsnotify_group *group) if (group->fanotify_data.ucounts) dec_ucount(group->fanotify_data.ucounts, UCOUNT_FANOTIFY_GROUPS); + + if (mempool_initialized(&group->fanotify_data.error_events_pool)) + mempool_exit(&group->fanotify_data.error_events_pool); } static void fanotify_free_path_event(struct fanotify_event *event) @@ -833,7 +918,16 @@ static void fanotify_free_name_event(struct fanotify_event *event) kfree(FANOTIFY_NE(event)); } -static void fanotify_free_event(struct fsnotify_event *fsn_event) +static void fanotify_free_error_event(struct fsnotify_group *group, + struct fanotify_event *event) +{ + struct fanotify_error_event *fee = FANOTIFY_EE(event); + + mempool_free(fee, &group->fanotify_data.error_events_pool); +} + +static void fanotify_free_event(struct fsnotify_group *group, + struct fsnotify_event *fsn_event) { struct fanotify_event *event; @@ -855,6 +949,9 @@ static void fanotify_free_event(struct fsnotify_event *fsn_event) case FANOTIFY_EVENT_TYPE_OVERFLOW: kfree(event); break; + case FANOTIFY_EVENT_TYPE_FS_ERROR: + fanotify_free_error_event(group, event); + break; default: WARN_ON_ONCE(1); } diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index 4a5e555dc3d2..d25f500bf7e7 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -141,6 +141,7 @@ enum fanotify_event_type { FANOTIFY_EVENT_TYPE_PATH, FANOTIFY_EVENT_TYPE_PATH_PERM, FANOTIFY_EVENT_TYPE_OVERFLOW, /* struct fanotify_event */ + FANOTIFY_EVENT_TYPE_FS_ERROR, /* struct fanotify_error_event */ __FANOTIFY_EVENT_TYPE_NUM }; @@ -170,12 +171,18 @@ static inline void fanotify_init_event(struct fanotify_event *event, event->pid = NULL; } +#define FANOTIFY_INLINE_FH(name, size) \ +struct { \ + struct fanotify_fh (name); \ + /* Space for object_fh.buf[] - access with fanotify_fh_buf() */ \ + unsigned char _inline_fh_buf[(size)]; \ +} + struct fanotify_fid_event { struct fanotify_event fae; __kernel_fsid_t fsid; - struct fanotify_fh object_fh; - /* Reserve space in object_fh.buf[] - access with fanotify_fh_buf() */ - unsigned char _inline_fh_buf[FANOTIFY_INLINE_FH_LEN]; + + FANOTIFY_INLINE_FH(object_fh, FANOTIFY_INLINE_FH_LEN); }; static inline struct fanotify_fid_event * @@ -196,12 +203,30 @@ FANOTIFY_NE(struct fanotify_event *event) return container_of(event, struct fanotify_name_event, fae); } +struct fanotify_error_event { + struct fanotify_event fae; + s32 error; /* Error reported by the Filesystem. */ + u32 err_count; /* Suppressed errors count */ + + __kernel_fsid_t fsid; /* FSID this error refers to. */ + + FANOTIFY_INLINE_FH(object_fh, MAX_HANDLE_SZ); +}; + +static inline struct fanotify_error_event * +FANOTIFY_EE(struct fanotify_event *event) +{ + return container_of(event, struct fanotify_error_event, fae); +} + static inline __kernel_fsid_t *fanotify_event_fsid(struct fanotify_event *event) { if (event->type == FANOTIFY_EVENT_TYPE_FID) return &FANOTIFY_FE(event)->fsid; else if (event->type == FANOTIFY_EVENT_TYPE_FID_NAME) return &FANOTIFY_NE(event)->fsid; + else if (event->type == FANOTIFY_EVENT_TYPE_FS_ERROR) + return &FANOTIFY_EE(event)->fsid; else return NULL; } @@ -213,6 +238,8 @@ static inline struct fanotify_fh *fanotify_event_object_fh( return &FANOTIFY_FE(event)->object_fh; else if (event->type == FANOTIFY_EVENT_TYPE_FID_NAME) return fanotify_info_file_fh(&FANOTIFY_NE(event)->info); + else if (event->type == FANOTIFY_EVENT_TYPE_FS_ERROR) + return &FANOTIFY_EE(event)->object_fh; else return NULL; } @@ -244,6 +271,19 @@ static inline int fanotify_event_dir_fh_len(struct fanotify_event *event) return info ? fanotify_info_dir_fh_len(info) : 0; } +static inline bool fanotify_event_has_object_fh(struct fanotify_event *event) +{ + /* For error events, even zeroed fh are reported. */ + if (event->type == FANOTIFY_EVENT_TYPE_FS_ERROR) + return true; + return fanotify_event_object_fh_len(event) > 0; +} + +static inline bool fanotify_event_has_dir_fh(struct fanotify_event *event) +{ + return fanotify_event_dir_fh_len(event) > 0; +} + struct fanotify_path_event { struct fanotify_event fae; struct path path; @@ -287,6 +327,11 @@ static inline struct fanotify_event *FANOTIFY_E(struct fsnotify_event *fse) return container_of(fse, struct fanotify_event, fse); } +static inline bool fanotify_is_error_event(u32 mask) +{ + return mask & FAN_FS_ERROR; +} + static inline bool fanotify_event_has_path(struct fanotify_event *event) { return event->type == FANOTIFY_EVENT_TYPE_PATH || @@ -315,7 +360,8 @@ static inline struct path *fanotify_event_path(struct fanotify_event *event) */ static inline bool fanotify_is_hashed_event(u32 mask) { - return !fanotify_is_perm_event(mask) && !(mask & FS_Q_OVERFLOW); + return !(fanotify_is_perm_event(mask) || + fsnotify_is_overflow_event(mask)); } static inline unsigned int fanotify_event_hash_bucket( diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 6facdf476255..559bc1e9926d 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -30,6 +30,7 @@ #define FANOTIFY_DEFAULT_MAX_EVENTS 16384 #define FANOTIFY_OLD_DEFAULT_MAX_MARKS 8192 #define FANOTIFY_DEFAULT_MAX_GROUPS 128 +#define FANOTIFY_DEFAULT_FEE_POOL_SIZE 32 /* * Legacy fanotify marks limits (8192) is per group and we introduced a tunable @@ -114,6 +115,8 @@ struct kmem_cache *fanotify_perm_event_cachep __read_mostly; (sizeof(struct fanotify_event_info_fid) + sizeof(struct file_handle)) #define FANOTIFY_PIDFD_INFO_HDR_LEN \ sizeof(struct fanotify_event_info_pidfd) +#define FANOTIFY_ERROR_INFO_LEN \ + (sizeof(struct fanotify_event_info_error)) static int fanotify_fid_info_len(int fh_len, int name_len) { @@ -126,17 +129,26 @@ static int fanotify_fid_info_len(int fh_len, int name_len) FANOTIFY_EVENT_ALIGN); } -static int fanotify_event_info_len(unsigned int info_mode, - struct fanotify_event *event) +static size_t fanotify_event_len(unsigned int info_mode, + struct fanotify_event *event) { - struct fanotify_info *info = fanotify_event_info(event); - int dir_fh_len = fanotify_event_dir_fh_len(event); - int fh_len = fanotify_event_object_fh_len(event); - int info_len = 0; + size_t event_len = FAN_EVENT_METADATA_LEN; + struct fanotify_info *info; + int dir_fh_len; + int fh_len; int dot_len = 0; - if (dir_fh_len) { - info_len += fanotify_fid_info_len(dir_fh_len, info->name_len); + if (!info_mode) + return event_len; + + if (fanotify_is_error_event(event->mask)) + event_len += FANOTIFY_ERROR_INFO_LEN; + + info = fanotify_event_info(event); + + if (fanotify_event_has_dir_fh(event)) { + dir_fh_len = fanotify_event_dir_fh_len(event); + event_len += fanotify_fid_info_len(dir_fh_len, info->name_len); } else if ((info_mode & FAN_REPORT_NAME) && (event->mask & FAN_ONDIR)) { /* @@ -147,12 +159,14 @@ static int fanotify_event_info_len(unsigned int info_mode, } if (info_mode & FAN_REPORT_PIDFD) - info_len += FANOTIFY_PIDFD_INFO_HDR_LEN; + event_len += FANOTIFY_PIDFD_INFO_HDR_LEN; - if (fh_len) - info_len += fanotify_fid_info_len(fh_len, dot_len); + if (fanotify_event_has_object_fh(event)) { + fh_len = fanotify_event_object_fh_len(event); + event_len += fanotify_fid_info_len(fh_len, dot_len); + } - return info_len; + return event_len; } /* @@ -181,7 +195,7 @@ static void fanotify_unhash_event(struct fsnotify_group *group, static struct fanotify_event *get_one_event(struct fsnotify_group *group, size_t count) { - size_t event_size = FAN_EVENT_METADATA_LEN; + size_t event_size; struct fanotify_event *event = NULL; struct fsnotify_event *fsn_event; unsigned int info_mode = FAN_GROUP_FLAG(group, FANOTIFY_INFO_MODES); @@ -194,8 +208,7 @@ static struct fanotify_event *get_one_event(struct fsnotify_group *group, goto out; event = FANOTIFY_E(fsn_event); - if (info_mode) - event_size += fanotify_event_info_len(info_mode, event); + event_size = fanotify_event_len(info_mode, event); if (event_size > count) { event = ERR_PTR(-EINVAL); @@ -316,6 +329,28 @@ static int process_access_response(struct fsnotify_group *group, return -ENOENT; } +static size_t copy_error_info_to_user(struct fanotify_event *event, + char __user *buf, int count) +{ + struct fanotify_event_info_error info; + struct fanotify_error_event *fee = FANOTIFY_EE(event); + + info.hdr.info_type = FAN_EVENT_INFO_TYPE_ERROR; + info.hdr.pad = 0; + info.hdr.len = FANOTIFY_ERROR_INFO_LEN; + + if (WARN_ON(count < info.hdr.len)) + return -EFAULT; + + info.error = fee->error; + info.error_count = fee->err_count; + + if (copy_to_user(buf, &info, sizeof(info))) + return -EFAULT; + + return info.hdr.len; +} + static int copy_fid_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh, int info_type, const char *name, size_t name_len, @@ -331,9 +366,6 @@ static int copy_fid_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh, pr_debug("%s: fh_len=%zu name_len=%zu, info_len=%zu, count=%zu\n", __func__, fh_len, name_len, info_len, count); - if (!fh_len) - return 0; - if (WARN_ON_ONCE(len < sizeof(info) || len > count)) return -EFAULT; @@ -368,6 +400,11 @@ static int copy_fid_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh, handle.handle_type = fh->type; handle.handle_bytes = fh_len; + + /* Mangle handle_type for bad file_handle */ + if (!fh_len) + handle.handle_type = FILEID_INVALID; + if (copy_to_user(buf, &handle, sizeof(handle))) return -EFAULT; @@ -444,7 +481,7 @@ static int copy_info_records_to_user(struct fanotify_event *event, /* * Event info records order is as follows: dir fid + name, child fid. */ - if (fanotify_event_dir_fh_len(event)) { + if (fanotify_event_has_dir_fh(event)) { info_type = info->name_len ? FAN_EVENT_INFO_TYPE_DFID_NAME : FAN_EVENT_INFO_TYPE_DFID; ret = copy_fid_info_to_user(fanotify_event_fsid(event), @@ -460,7 +497,7 @@ static int copy_info_records_to_user(struct fanotify_event *event, total_bytes += ret; } - if (fanotify_event_object_fh_len(event)) { + if (fanotify_event_has_object_fh(event)) { const char *dot = NULL; int dot_len = 0; @@ -520,6 +557,15 @@ static int copy_info_records_to_user(struct fanotify_event *event, total_bytes += ret; } + if (fanotify_is_error_event(event->mask)) { + ret = copy_error_info_to_user(event, buf, count); + if (ret < 0) + return ret; + buf += ret; + count -= ret; + total_bytes += ret; + } + return total_bytes; } @@ -537,8 +583,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, pr_debug("%s: group=%p event=%p\n", __func__, group, event); - metadata.event_len = FAN_EVENT_METADATA_LEN + - fanotify_event_info_len(info_mode, event); + metadata.event_len = fanotify_event_len(info_mode, event); metadata.metadata_len = FAN_EVENT_METADATA_LEN; metadata.vers = FANOTIFY_METADATA_VERSION; metadata.reserved = 0; @@ -1049,6 +1094,15 @@ out_dec_ucounts: return ERR_PTR(ret); } +static int fanotify_group_init_error_pool(struct fsnotify_group *group) +{ + if (mempool_initialized(&group->fanotify_data.error_events_pool)) + return 0; + + return mempool_init_kmalloc_pool(&group->fanotify_data.error_events_pool, + FANOTIFY_DEFAULT_FEE_POOL_SIZE, + sizeof(struct fanotify_error_event)); +} static int fanotify_add_mark(struct fsnotify_group *group, fsnotify_connp_t *connp, unsigned int type, @@ -1057,6 +1111,7 @@ static int fanotify_add_mark(struct fsnotify_group *group, { struct fsnotify_mark *fsn_mark; __u32 added; + int ret = 0; mutex_lock(&group->mark_mutex); fsn_mark = fsnotify_find_mark(connp, group); @@ -1067,13 +1122,26 @@ static int fanotify_add_mark(struct fsnotify_group *group, return PTR_ERR(fsn_mark); } } + + /* + * Error events are pre-allocated per group, only if strictly + * needed (i.e. FAN_FS_ERROR was requested). + */ + if (!(flags & FAN_MARK_IGNORED_MASK) && (mask & FAN_FS_ERROR)) { + ret = fanotify_group_init_error_pool(group); + if (ret) + goto out; + } + added = fanotify_mark_add_to_mask(fsn_mark, mask, flags); if (added & ~fsnotify_conn_mask(fsn_mark->connector)) fsnotify_recalc_mask(fsn_mark->connector); + +out: mutex_unlock(&group->mark_mutex); fsnotify_put_mark(fsn_mark); - return 0; + return ret; } static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, @@ -1295,16 +1363,15 @@ out_destroy_group: return fd; } -/* Check if filesystem can encode a unique fid */ -static int fanotify_test_fid(struct path *path, __kernel_fsid_t *fsid) +static int fanotify_test_fsid(struct dentry *dentry, __kernel_fsid_t *fsid) { __kernel_fsid_t root_fsid; int err; /* - * Make sure path is not in filesystem with zero fsid (e.g. tmpfs). + * Make sure dentry is not of a filesystem with zero fsid (e.g. fuse). */ - err = vfs_get_fsid(path->dentry, fsid); + err = vfs_get_fsid(dentry, fsid); if (err) return err; @@ -1312,10 +1379,10 @@ static int fanotify_test_fid(struct path *path, __kernel_fsid_t *fsid) return -ENODEV; /* - * Make sure path is not inside a filesystem subvolume (e.g. btrfs) + * Make sure dentry is not of a filesystem subvolume (e.g. btrfs) * which uses a different fsid than sb root. */ - err = vfs_get_fsid(path->dentry->d_sb->s_root, &root_fsid); + err = vfs_get_fsid(dentry->d_sb->s_root, &root_fsid); if (err) return err; @@ -1323,6 +1390,12 @@ static int fanotify_test_fid(struct path *path, __kernel_fsid_t *fsid) root_fsid.val[1] != fsid->val[1]) return -EXDEV; + return 0; +} + +/* Check if filesystem can encode a unique fid */ +static int fanotify_test_fid(struct dentry *dentry) +{ /* * We need to make sure that the file system supports at least * encoding a file handle so user can use name_to_handle_at() to @@ -1330,8 +1403,8 @@ static int fanotify_test_fid(struct path *path, __kernel_fsid_t *fsid) * objects. However, name_to_handle_at() requires that the * filesystem also supports decoding file handles. */ - if (!path->dentry->d_sb->s_export_op || - !path->dentry->d_sb->s_export_op->fh_to_dentry) + if (!dentry->d_sb->s_export_op || + !dentry->d_sb->s_export_op->fh_to_dentry) return -EOPNOTSUPP; return 0; @@ -1447,15 +1520,19 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, group->priority == FS_PRIO_0) goto fput_and_out; + if (mask & FAN_FS_ERROR && + mark_type != FAN_MARK_FILESYSTEM) + goto fput_and_out; + /* - * Events with data type inode do not carry enough information to report - * event->fd, so we do not allow setting a mask for inode events unless - * group supports reporting fid. - * inode events are not supported on a mount mark, because they do not - * carry enough information (i.e. path) to be filtered by mount point. + * Events that do not carry enough information to report + * event->fd require a group that supports reporting fid. Those + * events are not supported on a mount mark, because they do not + * carry enough information (i.e. path) to be filtered by mount + * point. */ fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS); - if (mask & FANOTIFY_INODE_EVENTS && + if (mask & ~(FANOTIFY_FD_EVENTS|FANOTIFY_EVENT_FLAGS) && (!fid_mode || mark_type == FAN_MARK_MOUNT)) goto fput_and_out; @@ -1482,7 +1559,11 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, } if (fid_mode) { - ret = fanotify_test_fid(&path, &__fsid); + ret = fanotify_test_fsid(path.dentry, &__fsid); + if (ret) + goto path_put_and_out; + + ret = fanotify_test_fid(path.dentry); if (ret) goto path_put_and_out; diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 963e6ce75b96..4034ca566f95 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -252,6 +252,9 @@ static int fsnotify_handle_inode_event(struct fsnotify_group *group, if (WARN_ON_ONCE(!ops->handle_inode_event)) return 0; + if (WARN_ON_ONCE(!inode && !dir)) + return 0; + if ((inode_mark->mask & FS_EXCL_UNLINK) && path && d_unlinked(path->dentry)) return 0; @@ -455,16 +458,16 @@ static void fsnotify_iter_next(struct fsnotify_iter_info *iter_info) * @file_name is relative to * @file_name: optional file name associated with event * @inode: optional inode associated with event - - * either @dir or @inode must be non-NULL. - * if both are non-NULL event may be reported to both. + * If @dir and @inode are both non-NULL, event may be + * reported to both. * @cookie: inotify rename cookie */ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, const struct qstr *file_name, struct inode *inode, u32 cookie) { const struct path *path = fsnotify_data_path(data, data_type); + struct super_block *sb = fsnotify_data_sb(data, data_type); struct fsnotify_iter_info iter_info = {}; - struct super_block *sb; struct mount *mnt = NULL; struct inode *parent = NULL; int ret = 0; @@ -483,7 +486,6 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, */ parent = dir; } - sb = inode->i_sb; /* * Optimization: srcu_read_lock() has a memory barrier which can diff --git a/fs/notify/group.c b/fs/notify/group.c index fb89c351295d..6a297efc4788 100644 --- a/fs/notify/group.c +++ b/fs/notify/group.c @@ -88,7 +88,7 @@ void fsnotify_destroy_group(struct fsnotify_group *group) * that deliberately ignores overflow events. */ if (group->overflow_event) - group->ops->free_event(group->overflow_event); + group->ops->free_event(group, group->overflow_event); fsnotify_put_group(group); } diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index d1a64daa0171..d92d7b0adc9a 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -116,7 +116,7 @@ int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask, if (len) strcpy(event->name, name->name); - ret = fsnotify_add_event(group, fsn_event, inotify_merge, NULL); + ret = fsnotify_add_event(group, fsn_event, inotify_merge); if (ret) { /* Our event wasn't used in the end. Free it. */ fsnotify_destroy_event(group, fsn_event); @@ -177,7 +177,8 @@ static void inotify_free_group_priv(struct fsnotify_group *group) dec_inotify_instances(group->inotify_data.ucounts); } -static void inotify_free_event(struct fsnotify_event *fsn_event) +static void inotify_free_event(struct fsnotify_group *group, + struct fsnotify_event *fsn_event) { kfree(INOTIFY_E(fsn_event)); } diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 62051247f6d2..29fca3284bb5 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -94,10 +94,10 @@ static inline __u32 inotify_arg_to_mask(struct inode *inode, u32 arg) __u32 mask; /* - * Everything should accept their own ignored and should receive events - * when the inode is unmounted. All directories care about children. + * Everything should receive events when the inode is unmounted. + * All directories care about children. */ - mask = (FS_IN_IGNORED | FS_UNMOUNT); + mask = (FS_UNMOUNT); if (S_ISDIR(inode->i_mode)) mask |= FS_EVENT_ON_CHILD; diff --git a/fs/notify/notification.c b/fs/notify/notification.c index 32f45543b9c6..9022ae650cf8 100644 --- a/fs/notify/notification.c +++ b/fs/notify/notification.c @@ -64,7 +64,7 @@ void fsnotify_destroy_event(struct fsnotify_group *group, WARN_ON(!list_empty(&event->list)); spin_unlock(&group->notification_lock); } - group->ops->free_event(event); + group->ops->free_event(group, event); } /* @@ -78,12 +78,12 @@ void fsnotify_destroy_event(struct fsnotify_group *group, * 2 if the event was not queued - either the queue of events has overflown * or the group is shutting down. */ -int fsnotify_add_event(struct fsnotify_group *group, - struct fsnotify_event *event, - int (*merge)(struct fsnotify_group *, - struct fsnotify_event *), - void (*insert)(struct fsnotify_group *, - struct fsnotify_event *)) +int fsnotify_insert_event(struct fsnotify_group *group, + struct fsnotify_event *event, + int (*merge)(struct fsnotify_group *, + struct fsnotify_event *), + void (*insert)(struct fsnotify_group *, + struct fsnotify_event *)) { int ret = 0; struct list_head *list = &group->notification_list; diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 5d9ae17bd443..bb247bc349e4 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -5940,6 +5940,7 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb, status = ocfs2_journal_access_di(handle, INODE_CACHE(tl_inode), tl_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { + ocfs2_commit_trans(osb, handle); mlog_errno(status); goto bail; } @@ -5964,6 +5965,7 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb, data_alloc_bh, start_blk, num_clusters); if (status < 0) { + ocfs2_commit_trans(osb, handle); mlog_errno(status); goto bail; } @@ -6921,13 +6923,12 @@ static int ocfs2_grab_eof_pages(struct inode *inode, loff_t start, loff_t end, } /* - * Zero the area past i_size but still within an allocated - * cluster. This avoids exposing nonzero data on subsequent file - * extends. + * Zero partial cluster for a hole punch or truncate. This avoids exposing + * nonzero data on subsequent file extends. * * We need to call this before i_size is updated on the inode because * otherwise block_write_full_page() will skip writeout of pages past - * i_size. The new_i_size parameter is passed for this reason. + * i_size. */ int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle, u64 range_start, u64 range_end) @@ -6945,6 +6946,15 @@ int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle, if (!ocfs2_sparse_alloc(OCFS2_SB(sb))) return 0; + /* + * Avoid zeroing pages fully beyond current i_size. It is pointless as + * underlying blocks of those pages should be already zeroed out and + * page writeback will skip them anyway. + */ + range_end = min_t(u64, range_end, i_size_read(inode)); + if (range_start >= range_end) + return 0; + pages = kcalloc(ocfs2_pages_per_cluster(sb), sizeof(struct page *), GFP_NOFS); if (pages == NULL) { @@ -6953,9 +6963,6 @@ int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle, goto out; } - if (range_start == range_end) - goto out; - ret = ocfs2_extent_map_get_blocks(inode, range_start >> sb->s_blocksize_bits, &phys, NULL, &ext_flags); diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 0e7aad1b11cc..5cd5f7511dac 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -2698,7 +2698,6 @@ static int dlm_send_begin_reco_message(struct dlm_ctxt *dlm, u8 dead_node) continue; } retry: - ret = -EINVAL; mlog(0, "attempting to send begin reco msg to %d\n", nodenum); ret = o2net_send_message(DLM_BEGIN_RECO_MSG, dlm->key, diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 54d7843c0211..fc5f780fa235 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -476,10 +476,11 @@ int ocfs2_truncate_file(struct inode *inode, * greater than page size, so we have to truncate them * anyway. */ - unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1); - truncate_inode_pages(inode->i_mapping, new_i_size); if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { + unmap_mapping_range(inode->i_mapping, + new_i_size + PAGE_SIZE - 1, 0, 1); + truncate_inode_pages(inode->i_mapping, new_i_size); status = ocfs2_truncate_inline(inode, di_bh, new_i_size, i_size_read(inode), 1); if (status) @@ -498,6 +499,9 @@ int ocfs2_truncate_file(struct inode *inode, goto bail_unlock_sem; } + unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1); + truncate_inode_pages(inode->i_mapping, new_i_size); + status = ocfs2_commit_truncate(osb, inode, di_bh); if (status < 0) { mlog_errno(status); diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index bc8f32fab964..6c2411c2afcf 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -125,7 +125,6 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags, struct inode *inode = NULL; struct super_block *sb = osb->sb; struct ocfs2_find_inode_args args; - journal_t *journal = OCFS2_SB(sb)->journal->j_journal; trace_ocfs2_iget_begin((unsigned long long)blkno, flags, sysfile_type); @@ -172,10 +171,11 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags, * part of the transaction - the inode could have been reclaimed and * now it is reread from disk. */ - if (journal) { + if (osb->journal) { transaction_t *transaction; tid_t tid; struct ocfs2_inode_info *oi = OCFS2_I(inode); + journal_t *journal = osb->journal->j_journal; read_lock(&journal->j_state_lock); if (journal->j_running_transaction) diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 4f15750aac5d..dbf9b9e97d74 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -810,19 +810,34 @@ void ocfs2_set_journal_params(struct ocfs2_super *osb) write_unlock(&journal->j_state_lock); } -int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty) +int ocfs2_journal_init(struct ocfs2_super *osb, int *dirty) { int status = -1; struct inode *inode = NULL; /* the journal inode */ journal_t *j_journal = NULL; + struct ocfs2_journal *journal = NULL; struct ocfs2_dinode *di = NULL; struct buffer_head *bh = NULL; - struct ocfs2_super *osb; int inode_lock = 0; - BUG_ON(!journal); + /* initialize our journal structure */ + journal = kzalloc(sizeof(struct ocfs2_journal), GFP_KERNEL); + if (!journal) { + mlog(ML_ERROR, "unable to alloc journal\n"); + status = -ENOMEM; + goto done; + } + osb->journal = journal; + journal->j_osb = osb; - osb = journal->j_osb; + atomic_set(&journal->j_num_trans, 0); + init_rwsem(&journal->j_trans_barrier); + init_waitqueue_head(&journal->j_checkpointed); + spin_lock_init(&journal->j_lock); + journal->j_trans_id = 1UL; + INIT_LIST_HEAD(&journal->j_la_cleanups); + INIT_WORK(&journal->j_recovery_work, ocfs2_complete_recovery); + journal->j_state = OCFS2_JOURNAL_FREE; /* already have the inode for our journal */ inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE, @@ -1028,9 +1043,10 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb) journal->j_state = OCFS2_JOURNAL_FREE; -// up_write(&journal->j_trans_barrier); done: iput(inode); + kfree(journal); + osb->journal = NULL; } static void ocfs2_clear_journal_error(struct super_block *sb, @@ -1497,10 +1513,7 @@ bail: if (quota_enabled) kfree(rm_quota); - /* no one is callint kthread_stop() for us so the kthread() api - * requires that we call do_exit(). And it isn't exported, but - * complete_and_exit() seems to be a minimal wrapper around it. */ - complete_and_exit(NULL, status); + return status; } void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num) diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index d158acb8b38a..8dcb2f2cadbc 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -167,8 +167,7 @@ int ocfs2_compute_replay_slots(struct ocfs2_super *osb); * ocfs2_start_checkpoint - Kick the commit thread to do a checkpoint. */ void ocfs2_set_journal_params(struct ocfs2_super *osb); -int ocfs2_journal_init(struct ocfs2_journal *journal, - int *dirty); +int ocfs2_journal_init(struct ocfs2_super *osb, int *dirty); void ocfs2_journal_shutdown(struct ocfs2_super *osb); int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full); diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 5c914ce9b3ac..1286b88b6fa1 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1894,8 +1894,6 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) /* This will disable recovery and flush any recovery work. */ ocfs2_recovery_exit(osb); - ocfs2_journal_shutdown(osb); - ocfs2_sync_blockdev(sb); ocfs2_purge_refcount_trees(osb); @@ -1918,6 +1916,8 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) ocfs2_release_system_inodes(osb); + ocfs2_journal_shutdown(osb); + /* * If we're dismounting due to mount error, mount.ocfs2 will clean * up heartbeat. If we're a local mount, there is no heartbeat. @@ -2016,7 +2016,6 @@ static int ocfs2_initialize_super(struct super_block *sb, int i, cbits, bbits; struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; struct inode *inode = NULL; - struct ocfs2_journal *journal; struct ocfs2_super *osb; u64 total_blocks; @@ -2197,33 +2196,6 @@ static int ocfs2_initialize_super(struct super_block *sb, get_random_bytes(&osb->s_next_generation, sizeof(u32)); - /* FIXME - * This should be done in ocfs2_journal_init(), but unknown - * ordering issues will cause the filesystem to crash. - * If anyone wants to figure out what part of the code - * refers to osb->journal before ocfs2_journal_init() is run, - * be my guest. - */ - /* initialize our journal structure */ - - journal = kzalloc(sizeof(struct ocfs2_journal), GFP_KERNEL); - if (!journal) { - mlog(ML_ERROR, "unable to alloc journal\n"); - status = -ENOMEM; - goto bail; - } - osb->journal = journal; - journal->j_osb = osb; - - atomic_set(&journal->j_num_trans, 0); - init_rwsem(&journal->j_trans_barrier); - init_waitqueue_head(&journal->j_checkpointed); - spin_lock_init(&journal->j_lock); - journal->j_trans_id = (unsigned long) 1; - INIT_LIST_HEAD(&journal->j_la_cleanups); - INIT_WORK(&journal->j_recovery_work, ocfs2_complete_recovery); - journal->j_state = OCFS2_JOURNAL_FREE; - INIT_WORK(&osb->dquot_drop_work, ocfs2_drop_dquot_refs); init_llist_head(&osb->dquot_drop_list); @@ -2404,7 +2376,7 @@ static int ocfs2_check_volume(struct ocfs2_super *osb) * ourselves. */ /* Init our journal object. */ - status = ocfs2_journal_init(osb->journal, &dirty); + status = ocfs2_journal_init(osb, &dirty); if (status < 0) { mlog(ML_ERROR, "Could not initialize journal!\n"); goto finally; @@ -2513,12 +2485,6 @@ static void ocfs2_delete_osb(struct ocfs2_super *osb) kfree(osb->osb_orphan_wipes); kfree(osb->slot_recovery_generations); - /* FIXME - * This belongs in journal shutdown, but because we have to - * allocate osb->journal at the start of ocfs2_initialize_osb(), - * we free it here. - */ - kfree(osb->journal); kfree(osb->local_alloc_copy); kfree(osb->uuid_str); kfree(osb->vol_label); diff --git a/fs/open.c b/fs/open.c index a7f6cab81267..f732fb94600c 100644 --- a/fs/open.c +++ b/fs/open.c @@ -856,8 +856,20 @@ static int do_dentry_open(struct file *f, * of THPs into the page cache will fail. */ smp_mb(); - if (filemap_nr_thps(inode->i_mapping)) - truncate_pagecache(inode, 0); + if (filemap_nr_thps(inode->i_mapping)) { + struct address_space *mapping = inode->i_mapping; + + filemap_invalidate_lock(inode->i_mapping); + /* + * unmap_mapping_range just need to be called once + * here, because the private pages is not need to be + * unmapped mapping (e.g. data segment of dynamic + * shared libraries here). + */ + unmap_mapping_range(mapping, 0, 0, 0); + truncate_inode_pages(mapping, 0); + filemap_invalidate_unlock(inode->i_mapping); + } } return 0; diff --git a/fs/orangefs/dcache.c b/fs/orangefs/dcache.c index fe484cf93e5c..8bbe9486e3a6 100644 --- a/fs/orangefs/dcache.c +++ b/fs/orangefs/dcache.c @@ -26,8 +26,10 @@ static int orangefs_revalidate_lookup(struct dentry *dentry) gossip_debug(GOSSIP_DCACHE_DEBUG, "%s: attempting lookup.\n", __func__); new_op = op_alloc(ORANGEFS_VFS_OP_LOOKUP); - if (!new_op) + if (!new_op) { + ret = -ENOMEM; goto out_put_parent; + } new_op->upcall.req.lookup.sym_follow = ORANGEFS_LOOKUP_LINK_NO_FOLLOW; new_op->upcall.req.lookup.parent_refn = parent->refn; diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c index 8bb0a53a658b..d90d8addbfc2 100644 --- a/fs/orangefs/super.c +++ b/fs/orangefs/super.c @@ -476,7 +476,7 @@ struct dentry *orangefs_mount(struct file_system_type *fst, const char *devname, void *data) { - int ret = -EINVAL; + int ret; struct super_block *sb = ERR_PTR(-EINVAL); struct orangefs_kernel_op_s *new_op; struct dentry *d = ERR_PTR(-EINVAL); @@ -527,7 +527,7 @@ struct dentry *orangefs_mount(struct file_system_type *fst, sb->s_fs_info = kzalloc(sizeof(struct orangefs_sb_info_s), GFP_KERNEL); if (!ORANGEFS_SB(sb)) { d = ERR_PTR(-ENOMEM); - goto free_op; + goto free_sb_and_op; } ret = orangefs_fill_sb(sb, diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 4e7d5bfa2949..b193d08a3dc3 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -140,12 +140,14 @@ static int ovl_copy_fileattr(struct inode *inode, struct path *old, int err; err = ovl_real_fileattr_get(old, &oldfa); - if (err) - return err; - - err = ovl_real_fileattr_get(new, &newfa); - if (err) + if (err) { + /* Ntfs-3g returns -EINVAL for "no fileattr support" */ + if (err == -ENOTTY || err == -EINVAL) + return 0; + pr_warn("failed to retrieve lower fileattr (%pd2, err=%i)\n", + old, err); return err; + } /* * We cannot set immutable and append-only flags on upper inode, @@ -159,6 +161,17 @@ static int ovl_copy_fileattr(struct inode *inode, struct path *old, return err; } + /* Don't bother copying flags if none are set */ + if (!(oldfa.flags & OVL_COPY_FS_FLAGS_MASK)) + return 0; + + err = ovl_real_fileattr_get(new, &newfa); + if (err) { + pr_warn("failed to retrieve upper fileattr (%pd2, err=%i)\n", + new, err); + return err; + } + BUILD_BUG_ON(OVL_COPY_FS_FLAGS_MASK & ~FS_COMMON_FL); newfa.flags &= ~OVL_COPY_FS_FLAGS_MASK; newfa.flags |= (oldfa.flags & OVL_COPY_FS_FLAGS_MASK); diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 93c7c267de93..f18490813170 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -137,8 +137,7 @@ kill_whiteout: goto out; } -static int ovl_mkdir_real(struct inode *dir, struct dentry **newdentry, - umode_t mode) +int ovl_mkdir_real(struct inode *dir, struct dentry **newdentry, umode_t mode) { int err; struct dentry *d, *dentry = *newdentry; diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index ac461a499882..fa125feed0ff 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -17,6 +17,7 @@ struct ovl_aio_req { struct kiocb iocb; + refcount_t ref; struct kiocb *orig_iocb; struct fd fd; }; @@ -252,6 +253,14 @@ static rwf_t ovl_iocb_to_rwf(int ifl) return flags; } +static inline void ovl_aio_put(struct ovl_aio_req *aio_req) +{ + if (refcount_dec_and_test(&aio_req->ref)) { + fdput(aio_req->fd); + kmem_cache_free(ovl_aio_request_cachep, aio_req); + } +} + static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req) { struct kiocb *iocb = &aio_req->iocb; @@ -268,8 +277,7 @@ static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req) } orig_iocb->ki_pos = iocb->ki_pos; - fdput(aio_req->fd); - kmem_cache_free(ovl_aio_request_cachep, aio_req); + ovl_aio_put(aio_req); } static void ovl_aio_rw_complete(struct kiocb *iocb, long res) @@ -319,7 +327,9 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter) aio_req->orig_iocb = iocb; kiocb_clone(&aio_req->iocb, iocb, real.file); aio_req->iocb.ki_complete = ovl_aio_rw_complete; + refcount_set(&aio_req->ref, 2); ret = vfs_iocb_iter_read(real.file, &aio_req->iocb, iter); + ovl_aio_put(aio_req); if (ret != -EIOCBQUEUED) ovl_aio_cleanup_handler(aio_req); } @@ -390,7 +400,9 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) kiocb_clone(&aio_req->iocb, iocb, real.file); aio_req->iocb.ki_flags = ifl; aio_req->iocb.ki_complete = ovl_aio_rw_complete; + refcount_set(&aio_req->ref, 2); ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter); + ovl_aio_put(aio_req); if (ret != -EIOCBQUEUED) ovl_aio_cleanup_handler(aio_req); } diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 832b17589733..1f36158c7dbe 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -610,7 +610,10 @@ int ovl_real_fileattr_get(struct path *realpath, struct fileattr *fa) if (err) return err; - return vfs_fileattr_get(realpath->dentry, fa); + err = vfs_fileattr_get(realpath->dentry, fa); + if (err == -ENOIOCTLCMD) + err = -ENOTTY; + return err; } int ovl_fileattr_get(struct dentry *dentry, struct fileattr *fa) diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 3894f3347955..2cd5741c873b 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -570,6 +570,7 @@ struct ovl_cattr { #define OVL_CATTR(m) (&(struct ovl_cattr) { .mode = (m) }) +int ovl_mkdir_real(struct inode *dir, struct dentry **newdentry, umode_t mode); struct dentry *ovl_create_real(struct inode *dir, struct dentry *newdentry, struct ovl_cattr *attr); int ovl_cleanup(struct inode *dir, struct dentry *dentry); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 178daa5e82c9..265181c110ae 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -787,10 +787,14 @@ retry: goto retry; } - work = ovl_create_real(dir, work, OVL_CATTR(attr.ia_mode)); - err = PTR_ERR(work); - if (IS_ERR(work)) - goto out_err; + err = ovl_mkdir_real(dir, &work, attr.ia_mode); + if (err) + goto out_dput; + + /* Weird filesystem returning with hashed negative (kernfs)? */ + err = -EINVAL; + if (d_really_is_negative(work)) + goto out_dput; /* * Try to remove POSIX ACL xattrs from workdir. We are good if: diff --git a/fs/posix_acl.c b/fs/posix_acl.c index f5c25f580dd9..9323a854a60a 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -134,8 +134,7 @@ struct posix_acl *get_acl(struct inode *inode, int type) * to just call ->get_acl to fetch the ACL ourself. (This is going to * be an unlikely race.) */ - if (cmpxchg(p, ACL_NOT_CACHED, sentinel) != ACL_NOT_CACHED) - /* fall through */ ; + cmpxchg(p, ACL_NOT_CACHED, sentinel); /* * Normally, the ACL returned by ->get_acl will be cached. diff --git a/fs/proc/array.c b/fs/proc/array.c index 77cf4187adec..ff869a66b34e 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -408,9 +408,9 @@ static void task_cpus_allowed(struct seq_file *m, struct task_struct *task) cpumask_pr_args(&task->cpus_mask)); } -static inline void task_core_dumping(struct seq_file *m, struct mm_struct *mm) +static inline void task_core_dumping(struct seq_file *m, struct task_struct *task) { - seq_put_decimal_ull(m, "CoreDumping:\t", !!mm->core_state); + seq_put_decimal_ull(m, "CoreDumping:\t", !!task->signal->core_state); seq_putc(m, '\n'); } @@ -436,7 +436,7 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, if (mm) { task_mem(m, mm); - task_core_dumping(m, mm); + task_core_dumping(m, task); task_thp_status(m, mm); mmput(mm); } diff --git a/fs/proc/base.c b/fs/proc/base.c index 1f394095eb88..13eda8de2998 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1982,19 +1982,21 @@ static int pid_revalidate(struct dentry *dentry, unsigned int flags) { struct inode *inode; struct task_struct *task; + int ret = 0; - if (flags & LOOKUP_RCU) - return -ECHILD; - - inode = d_inode(dentry); - task = get_proc_task(inode); + rcu_read_lock(); + inode = d_inode_rcu(dentry); + if (!inode) + goto out; + task = pid_task(proc_pid(inode), PIDTYPE_PID); if (task) { pid_update_inode(task, inode); - put_task_struct(task); - return 1; + ret = 1; } - return 0; +out: + rcu_read_unlock(); + return ret; } static inline bool proc_inode_is_dead(struct inode *inode) @@ -3802,7 +3804,10 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx) task = next_tid(task), ctx->pos++) { char name[10 + 1]; unsigned int len; + tid = task_pid_nr_ns(task, ns); + if (!tid) + continue; /* The task has just exited. */ len = snprintf(name, sizeof(name), "%u", tid); if (!proc_fill_cache(file, ctx, name, len, proc_task_instantiate, task, NULL)) { diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index cf25be3e0321..ad667dbc96f5 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -397,7 +397,6 @@ struct mem_size_stats { u64 pss_shmem; u64 pss_locked; u64 swap_pss; - bool check_shmem_swap; }; static void smaps_page_accumulate(struct mem_size_stats *mss, @@ -478,9 +477,11 @@ static int smaps_pte_hole(unsigned long addr, unsigned long end, __always_unused int depth, struct mm_walk *walk) { struct mem_size_stats *mss = walk->private; + struct vm_area_struct *vma = walk->vma; - mss->swap += shmem_partial_swap_usage( - walk->vma->vm_file->f_mapping, addr, end); + mss->swap += shmem_partial_swap_usage(walk->vma->vm_file->f_mapping, + linear_page_index(vma, addr), + linear_page_index(vma, end)); return 0; } @@ -488,6 +489,16 @@ static int smaps_pte_hole(unsigned long addr, unsigned long end, #define smaps_pte_hole NULL #endif /* CONFIG_SHMEM */ +static void smaps_pte_hole_lookup(unsigned long addr, struct mm_walk *walk) +{ +#ifdef CONFIG_SHMEM + if (walk->ops->pte_hole) { + /* depth is not used */ + smaps_pte_hole(addr, addr + PAGE_SIZE, 0, walk); + } +#endif +} + static void smaps_pte_entry(pte_t *pte, unsigned long addr, struct mm_walk *walk) { @@ -516,12 +527,8 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, } } else if (is_pfn_swap_entry(swpent)) page = pfn_swap_entry_to_page(swpent); - } else if (unlikely(IS_ENABLED(CONFIG_SHMEM) && mss->check_shmem_swap - && pte_none(*pte))) { - page = xa_load(&vma->vm_file->f_mapping->i_pages, - linear_page_index(vma, addr)); - if (xa_is_value(page)) - mss->swap += PAGE_SIZE; + } else { + smaps_pte_hole_lookup(addr, walk); return; } @@ -735,8 +742,6 @@ static void smap_gather_stats(struct vm_area_struct *vma, return; #ifdef CONFIG_SHMEM - /* In case of smaps_rollup, reset the value from previous vma */ - mss->check_shmem_swap = false; if (vma->vm_file && shmem_mapping(vma->vm_file->f_mapping)) { /* * For shared or readonly shmem mappings we know that all @@ -754,7 +759,6 @@ static void smap_gather_stats(struct vm_area_struct *vma, !(vma->vm_flags & VM_WRITE))) { mss->swap += shmem_swapped; } else { - mss->check_shmem_swap = true; ops = &smaps_shmem_walk_ops; } } diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index cdbbf819d2d6..30a3b66f475a 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -62,46 +62,75 @@ core_param(novmcoredd, vmcoredd_disabled, bool, 0); /* Device Dump Size */ static size_t vmcoredd_orig_sz; -/* - * Returns > 0 for RAM pages, 0 for non-RAM pages, < 0 on error - * The called function has to take care of module refcounting. - */ -static int (*oldmem_pfn_is_ram)(unsigned long pfn); - -int register_oldmem_pfn_is_ram(int (*fn)(unsigned long pfn)) +static DECLARE_RWSEM(vmcore_cb_rwsem); +/* List of registered vmcore callbacks. */ +static LIST_HEAD(vmcore_cb_list); +/* Whether we had a surprise unregistration of a callback. */ +static bool vmcore_cb_unstable; +/* Whether the vmcore has been opened once. */ +static bool vmcore_opened; + +void register_vmcore_cb(struct vmcore_cb *cb) { - if (oldmem_pfn_is_ram) - return -EBUSY; - oldmem_pfn_is_ram = fn; - return 0; + down_write(&vmcore_cb_rwsem); + INIT_LIST_HEAD(&cb->next); + list_add_tail(&cb->next, &vmcore_cb_list); + /* + * Registering a vmcore callback after the vmcore was opened is + * very unusual (e.g., manual driver loading). + */ + if (vmcore_opened) + pr_warn_once("Unexpected vmcore callback registration\n"); + up_write(&vmcore_cb_rwsem); } -EXPORT_SYMBOL_GPL(register_oldmem_pfn_is_ram); +EXPORT_SYMBOL_GPL(register_vmcore_cb); -void unregister_oldmem_pfn_is_ram(void) +void unregister_vmcore_cb(struct vmcore_cb *cb) { - oldmem_pfn_is_ram = NULL; - wmb(); + down_write(&vmcore_cb_rwsem); + list_del(&cb->next); + /* + * Unregistering a vmcore callback after the vmcore was opened is + * very unusual (e.g., forced driver removal), but we cannot stop + * unregistering. + */ + if (vmcore_opened) { + pr_warn_once("Unexpected vmcore callback unregistration\n"); + vmcore_cb_unstable = true; + } + up_write(&vmcore_cb_rwsem); } -EXPORT_SYMBOL_GPL(unregister_oldmem_pfn_is_ram); +EXPORT_SYMBOL_GPL(unregister_vmcore_cb); -static int pfn_is_ram(unsigned long pfn) +static bool pfn_is_ram(unsigned long pfn) { - int (*fn)(unsigned long pfn); - /* pfn is ram unless fn() checks pagetype */ - int ret = 1; + struct vmcore_cb *cb; + bool ret = true; - /* - * Ask hypervisor if the pfn is really ram. - * A ballooned page contains no data and reading from such a page - * will cause high load in the hypervisor. - */ - fn = oldmem_pfn_is_ram; - if (fn) - ret = fn(pfn); + lockdep_assert_held_read(&vmcore_cb_rwsem); + if (unlikely(vmcore_cb_unstable)) + return false; + + list_for_each_entry(cb, &vmcore_cb_list, next) { + if (unlikely(!cb->pfn_is_ram)) + continue; + ret = cb->pfn_is_ram(cb, pfn); + if (!ret) + break; + } return ret; } +static int open_vmcore(struct inode *inode, struct file *file) +{ + down_read(&vmcore_cb_rwsem); + vmcore_opened = true; + up_read(&vmcore_cb_rwsem); + + return 0; +} + /* Reads a page from the oldmem device from given offset. */ ssize_t read_from_oldmem(char *buf, size_t count, u64 *ppos, int userbuf, @@ -117,6 +146,7 @@ ssize_t read_from_oldmem(char *buf, size_t count, offset = (unsigned long)(*ppos % PAGE_SIZE); pfn = (unsigned long)(*ppos / PAGE_SIZE); + down_read(&vmcore_cb_rwsem); do { if (count > (PAGE_SIZE - offset)) nr_bytes = PAGE_SIZE - offset; @@ -124,7 +154,7 @@ ssize_t read_from_oldmem(char *buf, size_t count, nr_bytes = count; /* If pfn is not ram, return zeros for sparse dump files */ - if (pfn_is_ram(pfn) == 0) + if (!pfn_is_ram(pfn)) memset(buf, 0, nr_bytes); else { if (encrypted) @@ -136,8 +166,10 @@ ssize_t read_from_oldmem(char *buf, size_t count, tmp = copy_oldmem_page(pfn, buf, nr_bytes, offset, userbuf); - if (tmp < 0) + if (tmp < 0) { + up_read(&vmcore_cb_rwsem); return tmp; + } } *ppos += nr_bytes; count -= nr_bytes; @@ -147,6 +179,7 @@ ssize_t read_from_oldmem(char *buf, size_t count, offset = 0; } while (count); + up_read(&vmcore_cb_rwsem); return read; } @@ -537,14 +570,19 @@ static int vmcore_remap_oldmem_pfn(struct vm_area_struct *vma, unsigned long from, unsigned long pfn, unsigned long size, pgprot_t prot) { + int ret; + /* * Check if oldmem_pfn_is_ram was registered to avoid * looping over all pages without a reason. */ - if (oldmem_pfn_is_ram) - return remap_oldmem_pfn_checked(vma, from, pfn, size, prot); + down_read(&vmcore_cb_rwsem); + if (!list_empty(&vmcore_cb_list) || vmcore_cb_unstable) + ret = remap_oldmem_pfn_checked(vma, from, pfn, size, prot); else - return remap_oldmem_pfn_range(vma, from, pfn, size, prot); + ret = remap_oldmem_pfn_range(vma, from, pfn, size, prot); + up_read(&vmcore_cb_rwsem); + return ret; } static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) @@ -668,6 +706,7 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) #endif static const struct proc_ops vmcore_proc_ops = { + .proc_open = open_vmcore, .proc_read = read_vmcore, .proc_lseek = default_llseek, .proc_mmap = mmap_vmcore, diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c index d3e995e1046f..5f2405994280 100644 --- a/fs/quota/quota_tree.c +++ b/fs/quota/quota_tree.c @@ -414,6 +414,7 @@ static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot, quota_error(dquot->dq_sb, "Quota structure has offset to " "other block (%u) than it should (%u)", blk, (uint)(dquot->dq_off >> info->dqi_blocksize_bits)); + ret = -EIO; goto out_buf; } ret = read_blk(info, blk, buf); @@ -479,6 +480,13 @@ static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot, goto out_buf; } newblk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]); + if (newblk < QT_TREEOFF || newblk >= info->dqi_blocks) { + quota_error(dquot->dq_sb, "Getting block too big (%u >= %u)", + newblk, info->dqi_blocks); + ret = -EUCLEAN; + goto out_buf; + } + if (depth == info->dqi_qtree_depth - 1) { ret = free_dqentry(info, dquot, newblk); newblk = 0; @@ -578,6 +586,13 @@ static loff_t find_tree_dqentry(struct qtree_mem_dqinfo *info, blk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]); if (!blk) /* No reference? */ goto out_buf; + if (blk < QT_TREEOFF || blk >= info->dqi_blocks) { + quota_error(dquot->dq_sb, "Getting block too big (%u >= %u)", + blk, info->dqi_blocks); + ret = -EUCLEAN; + goto out_buf; + } + if (depth < info->dqi_qtree_depth - 1) ret = find_tree_dqentry(info, dquot, blk, depth+1); else diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index e2302342a67f..bc66d0173e33 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -204,17 +204,20 @@ static int ramfs_parse_param(struct fs_context *fc, struct fs_parameter *param) int opt; opt = fs_parse(fc, ramfs_fs_parameters, param, &result); - if (opt < 0) { + if (opt == -ENOPARAM) { + opt = vfs_parse_fs_param_source(fc, param); + if (opt != -ENOPARAM) + return opt; /* * We might like to report bad mount options here; * but traditionally ramfs has ignored all mount options, * and as it is used as a !CONFIG_SHMEM simple substitute * for tmpfs, better continue to ignore other mount options. */ - if (opt == -ENOPARAM) - opt = 0; - return opt; + return 0; } + if (opt < 0) + return opt; switch (opt) { case Opt_mode: diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 076f9ab94306..82e09901462e 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -1435,7 +1435,6 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) unsigned long safe_mask = 0; unsigned int commit_max_age = (unsigned int)-1; struct reiserfs_journal *journal = SB_JOURNAL(s); - char *new_opts; int err; char *qf_names[REISERFS_MAXQUOTAS]; unsigned int qfmt = 0; @@ -1443,10 +1442,6 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) int i; #endif - new_opts = kstrdup(arg, GFP_KERNEL); - if (arg && !new_opts) - return -ENOMEM; - sync_filesystem(s); reiserfs_write_lock(s); @@ -1597,7 +1592,6 @@ out_ok_unlocked: out_err_unlock: reiserfs_write_unlock(s); out_err: - kfree(new_opts); return err; } diff --git a/fs/seq_file.c b/fs/seq_file.c index 4a2cda04d3e2..f8e1f4ee87ff 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -383,22 +383,6 @@ void seq_escape_mem(struct seq_file *m, const char *src, size_t len, } EXPORT_SYMBOL(seq_escape_mem); -/** - * seq_escape - print string into buffer, escaping some characters - * @m: target buffer - * @s: string - * @esc: set of characters that need escaping - * - * Puts string into buffer, replacing each occurrence of character from - * @esc with usual octal escape. - * Use seq_has_overflowed() to check for errors. - */ -void seq_escape(struct seq_file *m, const char *s, const char *esc) -{ - seq_escape_str(m, s, ESCAPE_OCTAL, esc); -} -EXPORT_SYMBOL(seq_escape); - void seq_vprintf(struct seq_file *m, const char *f, va_list args) { int len; diff --git a/fs/smbfs_common/smb2pdu.h b/fs/smbfs_common/smb2pdu.h new file mode 100644 index 000000000000..7ccadcbe684b --- /dev/null +++ b/fs/smbfs_common/smb2pdu.h @@ -0,0 +1,989 @@ +/* SPDX-License-Identifier: LGPL-2.1 */ +#ifndef _COMMON_SMB2PDU_H +#define _COMMON_SMB2PDU_H + +/* + * Note that, due to trying to use names similar to the protocol specifications, + * there are many mixed case field names in the structures below. Although + * this does not match typical Linux kernel style, it is necessary to be + * able to match against the protocol specfication. + * + * SMB2 commands + * Some commands have minimal (wct=0,bcc=0), or uninteresting, responses + * (ie no useful data other than the SMB error code itself) and are marked such. + * Knowing this helps avoid response buffer allocations and copy in some cases. + */ + +/* List of commands in host endian */ +#define SMB2_NEGOTIATE_HE 0x0000 +#define SMB2_SESSION_SETUP_HE 0x0001 +#define SMB2_LOGOFF_HE 0x0002 /* trivial request/resp */ +#define SMB2_TREE_CONNECT_HE 0x0003 +#define SMB2_TREE_DISCONNECT_HE 0x0004 /* trivial req/resp */ +#define SMB2_CREATE_HE 0x0005 +#define SMB2_CLOSE_HE 0x0006 +#define SMB2_FLUSH_HE 0x0007 /* trivial resp */ +#define SMB2_READ_HE 0x0008 +#define SMB2_WRITE_HE 0x0009 +#define SMB2_LOCK_HE 0x000A +#define SMB2_IOCTL_HE 0x000B +#define SMB2_CANCEL_HE 0x000C +#define SMB2_ECHO_HE 0x000D +#define SMB2_QUERY_DIRECTORY_HE 0x000E +#define SMB2_CHANGE_NOTIFY_HE 0x000F +#define SMB2_QUERY_INFO_HE 0x0010 +#define SMB2_SET_INFO_HE 0x0011 +#define SMB2_OPLOCK_BREAK_HE 0x0012 + +/* The same list in little endian */ +#define SMB2_NEGOTIATE cpu_to_le16(SMB2_NEGOTIATE_HE) +#define SMB2_SESSION_SETUP cpu_to_le16(SMB2_SESSION_SETUP_HE) +#define SMB2_LOGOFF cpu_to_le16(SMB2_LOGOFF_HE) +#define SMB2_TREE_CONNECT cpu_to_le16(SMB2_TREE_CONNECT_HE) +#define SMB2_TREE_DISCONNECT cpu_to_le16(SMB2_TREE_DISCONNECT_HE) +#define SMB2_CREATE cpu_to_le16(SMB2_CREATE_HE) +#define SMB2_CLOSE cpu_to_le16(SMB2_CLOSE_HE) +#define SMB2_FLUSH cpu_to_le16(SMB2_FLUSH_HE) +#define SMB2_READ cpu_to_le16(SMB2_READ_HE) +#define SMB2_WRITE cpu_to_le16(SMB2_WRITE_HE) +#define SMB2_LOCK cpu_to_le16(SMB2_LOCK_HE) +#define SMB2_IOCTL cpu_to_le16(SMB2_IOCTL_HE) +#define SMB2_CANCEL cpu_to_le16(SMB2_CANCEL_HE) +#define SMB2_ECHO cpu_to_le16(SMB2_ECHO_HE) +#define SMB2_QUERY_DIRECTORY cpu_to_le16(SMB2_QUERY_DIRECTORY_HE) +#define SMB2_CHANGE_NOTIFY cpu_to_le16(SMB2_CHANGE_NOTIFY_HE) +#define SMB2_QUERY_INFO cpu_to_le16(SMB2_QUERY_INFO_HE) +#define SMB2_SET_INFO cpu_to_le16(SMB2_SET_INFO_HE) +#define SMB2_OPLOCK_BREAK cpu_to_le16(SMB2_OPLOCK_BREAK_HE) + +#define SMB2_INTERNAL_CMD cpu_to_le16(0xFFFF) + +#define NUMBER_OF_SMB2_COMMANDS 0x0013 + +/* + * SMB2 Header Definition + * + * "MBZ" : Must be Zero + * "BB" : BugBug, Something to check/review/analyze later + * "PDU" : "Protocol Data Unit" (ie a network "frame") + * + */ + +#define __SMB2_HEADER_STRUCTURE_SIZE 64 +#define SMB2_HEADER_STRUCTURE_SIZE \ + cpu_to_le16(__SMB2_HEADER_STRUCTURE_SIZE) + +#define SMB2_PROTO_NUMBER cpu_to_le32(0x424d53fe) +#define SMB2_TRANSFORM_PROTO_NUM cpu_to_le32(0x424d53fd) +#define SMB2_COMPRESSION_TRANSFORM_ID cpu_to_le32(0x424d53fc) + +/* + * SMB2 flag definitions + */ +#define SMB2_FLAGS_SERVER_TO_REDIR cpu_to_le32(0x00000001) +#define SMB2_FLAGS_ASYNC_COMMAND cpu_to_le32(0x00000002) +#define SMB2_FLAGS_RELATED_OPERATIONS cpu_to_le32(0x00000004) +#define SMB2_FLAGS_SIGNED cpu_to_le32(0x00000008) +#define SMB2_FLAGS_PRIORITY_MASK cpu_to_le32(0x00000070) /* SMB3.1.1 */ +#define SMB2_FLAGS_DFS_OPERATIONS cpu_to_le32(0x10000000) +#define SMB2_FLAGS_REPLAY_OPERATION cpu_to_le32(0x20000000) /* SMB3 & up */ + +/* See MS-SMB2 section 2.2.1 */ +struct smb2_hdr { + __le32 ProtocolId; /* 0xFE 'S' 'M' 'B' */ + __le16 StructureSize; /* 64 */ + __le16 CreditCharge; /* MBZ */ + __le32 Status; /* Error from server */ + __le16 Command; + __le16 CreditRequest; /* CreditResponse */ + __le32 Flags; + __le32 NextCommand; + __le64 MessageId; + union { + struct { + __le32 ProcessId; + __le32 TreeId; + } __packed SyncId; + __le64 AsyncId; + } __packed Id; + __le64 SessionId; + __u8 Signature[16]; +} __packed; + +struct smb2_pdu { + struct smb2_hdr hdr; + __le16 StructureSize2; /* size of wct area (varies, request specific) */ +} __packed; + +#define SMB3_AES_CCM_NONCE 11 +#define SMB3_AES_GCM_NONCE 12 + +/* Transform flags (for 3.0 dialect this flag indicates CCM */ +#define TRANSFORM_FLAG_ENCRYPTED 0x0001 +struct smb2_transform_hdr { + __le32 ProtocolId; /* 0xFD 'S' 'M' 'B' */ + __u8 Signature[16]; + __u8 Nonce[16]; + __le32 OriginalMessageSize; + __u16 Reserved1; + __le16 Flags; /* EncryptionAlgorithm for 3.0, enc enabled for 3.1.1 */ + __le64 SessionId; +} __packed; + + +/* See MS-SMB2 2.2.42 */ +struct smb2_compression_transform_hdr_unchained { + __le32 ProtocolId; /* 0xFC 'S' 'M' 'B' */ + __le32 OriginalCompressedSegmentSize; + __le16 CompressionAlgorithm; + __le16 Flags; + __le16 Length; /* if chained it is length, else offset */ +} __packed; + +/* See MS-SMB2 2.2.42.1 */ +#define SMB2_COMPRESSION_FLAG_NONE 0x0000 +#define SMB2_COMPRESSION_FLAG_CHAINED 0x0001 + +struct compression_payload_header { + __le16 CompressionAlgorithm; + __le16 Flags; + __le32 Length; /* length of compressed playload including field below if present */ + /* __le32 OriginalPayloadSize; */ /* optional, present when LZNT1, LZ77, LZ77+Huffman */ +} __packed; + +/* See MS-SMB2 2.2.42.2 */ +struct smb2_compression_transform_hdr_chained { + __le32 ProtocolId; /* 0xFC 'S' 'M' 'B' */ + __le32 OriginalCompressedSegmentSize; + /* struct compression_payload_header[] */ +} __packed; + +/* See MS-SMB2 2.2.42.2.2 */ +struct compression_pattern_payload_v1 { + __le16 Pattern; + __le16 Reserved1; + __le16 Reserved2; + __le32 Repetitions; +} __packed; + +/* See MS-SMB2 section 2.2.9.2 */ +/* Context Types */ +#define SMB2_RESERVED_TREE_CONNECT_CONTEXT_ID 0x0000 +#define SMB2_REMOTED_IDENTITY_TREE_CONNECT_CONTEXT_ID cpu_to_le16(0x0001) + +struct tree_connect_contexts { + __le16 ContextType; + __le16 DataLength; + __le32 Reserved; + __u8 Data[]; +} __packed; + +/* Remoted identity tree connect context structures - see MS-SMB2 2.2.9.2.1 */ +struct smb3_blob_data { + __le16 BlobSize; + __u8 BlobData[]; +} __packed; + +/* Valid values for Attr */ +#define SE_GROUP_MANDATORY 0x00000001 +#define SE_GROUP_ENABLED_BY_DEFAULT 0x00000002 +#define SE_GROUP_ENABLED 0x00000004 +#define SE_GROUP_OWNER 0x00000008 +#define SE_GROUP_USE_FOR_DENY_ONLY 0x00000010 +#define SE_GROUP_INTEGRITY 0x00000020 +#define SE_GROUP_INTEGRITY_ENABLED 0x00000040 +#define SE_GROUP_RESOURCE 0x20000000 +#define SE_GROUP_LOGON_ID 0xC0000000 + +/* struct sid_attr_data is SidData array in BlobData format then le32 Attr */ + +struct sid_array_data { + __le16 SidAttrCount; + /* SidAttrList - array of sid_attr_data structs */ +} __packed; + +struct luid_attr_data { + +} __packed; + +/* + * struct privilege_data is the same as BLOB_DATA - see MS-SMB2 2.2.9.2.1.5 + * but with size of LUID_ATTR_DATA struct and BlobData set to LUID_ATTR DATA + */ + +struct privilege_array_data { + __le16 PrivilegeCount; + /* array of privilege_data structs */ +} __packed; + +struct remoted_identity_tcon_context { + __le16 TicketType; /* must be 0x0001 */ + __le16 TicketSize; /* total size of this struct */ + __le16 User; /* offset to SID_ATTR_DATA struct with user info */ + __le16 UserName; /* offset to null terminated Unicode username string */ + __le16 Domain; /* offset to null terminated Unicode domain name */ + __le16 Groups; /* offset to SID_ARRAY_DATA struct with group info */ + __le16 RestrictedGroups; /* similar to above */ + __le16 Privileges; /* offset to PRIVILEGE_ARRAY_DATA struct */ + __le16 PrimaryGroup; /* offset to SID_ARRAY_DATA struct */ + __le16 Owner; /* offset to BLOB_DATA struct */ + __le16 DefaultDacl; /* offset to BLOB_DATA struct */ + __le16 DeviceGroups; /* offset to SID_ARRAY_DATA struct */ + __le16 UserClaims; /* offset to BLOB_DATA struct */ + __le16 DeviceClaims; /* offset to BLOB_DATA struct */ + __u8 TicketInfo[]; /* variable length buf - remoted identity data */ +} __packed; + +struct smb2_tree_connect_req_extension { + __le32 TreeConnectContextOffset; + __le16 TreeConnectContextCount; + __u8 Reserved[10]; + __u8 PathName[]; /* variable sized array */ + /* followed by array of TreeConnectContexts */ +} __packed; + +/* Flags/Reserved for SMB3.1.1 */ +#define SMB2_TREE_CONNECT_FLAG_CLUSTER_RECONNECT cpu_to_le16(0x0001) +#define SMB2_TREE_CONNECT_FLAG_REDIRECT_TO_OWNER cpu_to_le16(0x0002) +#define SMB2_TREE_CONNECT_FLAG_EXTENSION_PRESENT cpu_to_le16(0x0004) + +struct smb2_tree_connect_req { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 9 */ + __le16 Flags; /* Flags in SMB3.1.1 */ + __le16 PathOffset; + __le16 PathLength; + __u8 Buffer[1]; /* variable length */ +} __packed; + +/* Possible ShareType values */ +#define SMB2_SHARE_TYPE_DISK 0x01 +#define SMB2_SHARE_TYPE_PIPE 0x02 +#define SMB2_SHARE_TYPE_PRINT 0x03 + +/* + * Possible ShareFlags - exactly one and only one of the first 4 caching flags + * must be set (any of the remaining, SHI1005, flags may be set individually + * or in combination. + */ +#define SMB2_SHAREFLAG_MANUAL_CACHING 0x00000000 +#define SMB2_SHAREFLAG_AUTO_CACHING 0x00000010 +#define SMB2_SHAREFLAG_VDO_CACHING 0x00000020 +#define SMB2_SHAREFLAG_NO_CACHING 0x00000030 +#define SHI1005_FLAGS_DFS 0x00000001 +#define SHI1005_FLAGS_DFS_ROOT 0x00000002 +#define SHI1005_FLAGS_RESTRICT_EXCLUSIVE_OPENS 0x00000100 +#define SHI1005_FLAGS_FORCE_SHARED_DELETE 0x00000200 +#define SHI1005_FLAGS_ALLOW_NAMESPACE_CACHING 0x00000400 +#define SHI1005_FLAGS_ACCESS_BASED_DIRECTORY_ENUM 0x00000800 +#define SHI1005_FLAGS_FORCE_LEVELII_OPLOCK 0x00001000 +#define SHI1005_FLAGS_ENABLE_HASH_V1 0x00002000 +#define SHI1005_FLAGS_ENABLE_HASH_V2 0x00004000 +#define SHI1005_FLAGS_ENCRYPT_DATA 0x00008000 +#define SMB2_SHAREFLAG_IDENTITY_REMOTING 0x00040000 /* 3.1.1 */ +#define SMB2_SHAREFLAG_COMPRESS_DATA 0x00100000 /* 3.1.1 */ +#define SHI1005_FLAGS_ALL 0x0014FF33 + +/* Possible share capabilities */ +#define SMB2_SHARE_CAP_DFS cpu_to_le32(0x00000008) /* all dialects */ +#define SMB2_SHARE_CAP_CONTINUOUS_AVAILABILITY cpu_to_le32(0x00000010) /* 3.0 */ +#define SMB2_SHARE_CAP_SCALEOUT cpu_to_le32(0x00000020) /* 3.0 */ +#define SMB2_SHARE_CAP_CLUSTER cpu_to_le32(0x00000040) /* 3.0 */ +#define SMB2_SHARE_CAP_ASYMMETRIC cpu_to_le32(0x00000080) /* 3.02 */ +#define SMB2_SHARE_CAP_REDIRECT_TO_OWNER cpu_to_le32(0x00000100) /* 3.1.1 */ + +struct smb2_tree_connect_rsp { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 16 */ + __u8 ShareType; /* see below */ + __u8 Reserved; + __le32 ShareFlags; /* see below */ + __le32 Capabilities; /* see below */ + __le32 MaximalAccess; +} __packed; + +struct smb2_tree_disconnect_req { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 4 */ + __le16 Reserved; +} __packed; + +struct smb2_tree_disconnect_rsp { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 4 */ + __le16 Reserved; +} __packed; + + +/* + * SMB2_NEGOTIATE_PROTOCOL See MS-SMB2 section 2.2.3 + */ +/* SecurityMode flags */ +#define SMB2_NEGOTIATE_SIGNING_ENABLED 0x0001 +#define SMB2_NEGOTIATE_SIGNING_ENABLED_LE cpu_to_le16(0x0001) +#define SMB2_NEGOTIATE_SIGNING_REQUIRED 0x0002 +#define SMB2_NEGOTIATE_SIGNING_REQUIRED_LE cpu_to_le16(0x0002) +#define SMB2_SEC_MODE_FLAGS_ALL 0x0003 + +/* Capabilities flags */ +#define SMB2_GLOBAL_CAP_DFS 0x00000001 +#define SMB2_GLOBAL_CAP_LEASING 0x00000002 /* Resp only New to SMB2.1 */ +#define SMB2_GLOBAL_CAP_LARGE_MTU 0X00000004 /* Resp only New to SMB2.1 */ +#define SMB2_GLOBAL_CAP_MULTI_CHANNEL 0x00000008 /* New to SMB3 */ +#define SMB2_GLOBAL_CAP_PERSISTENT_HANDLES 0x00000010 /* New to SMB3 */ +#define SMB2_GLOBAL_CAP_DIRECTORY_LEASING 0x00000020 /* New to SMB3 */ +#define SMB2_GLOBAL_CAP_ENCRYPTION 0x00000040 /* New to SMB3 */ +/* Internal types */ +#define SMB2_NT_FIND 0x00100000 +#define SMB2_LARGE_FILES 0x00200000 + +#define SMB2_CLIENT_GUID_SIZE 16 +#define SMB2_CREATE_GUID_SIZE 16 + +/* Dialects */ +#define SMB10_PROT_ID 0x0000 /* local only, not sent on wire w/CIFS negprot */ +#define SMB20_PROT_ID 0x0202 +#define SMB21_PROT_ID 0x0210 +#define SMB2X_PROT_ID 0x02FF +#define SMB30_PROT_ID 0x0300 +#define SMB302_PROT_ID 0x0302 +#define SMB311_PROT_ID 0x0311 +#define BAD_PROT_ID 0xFFFF + +#define SMB311_SALT_SIZE 32 +/* Hash Algorithm Types */ +#define SMB2_PREAUTH_INTEGRITY_SHA512 cpu_to_le16(0x0001) +#define SMB2_PREAUTH_HASH_SIZE 64 + +/* Negotiate Contexts - ContextTypes. See MS-SMB2 section 2.2.3.1 for details */ +#define SMB2_PREAUTH_INTEGRITY_CAPABILITIES cpu_to_le16(1) +#define SMB2_ENCRYPTION_CAPABILITIES cpu_to_le16(2) +#define SMB2_COMPRESSION_CAPABILITIES cpu_to_le16(3) +#define SMB2_NETNAME_NEGOTIATE_CONTEXT_ID cpu_to_le16(5) +#define SMB2_TRANSPORT_CAPABILITIES cpu_to_le16(6) +#define SMB2_RDMA_TRANSFORM_CAPABILITIES cpu_to_le16(7) +#define SMB2_SIGNING_CAPABILITIES cpu_to_le16(8) +#define SMB2_POSIX_EXTENSIONS_AVAILABLE cpu_to_le16(0x100) + +struct smb2_neg_context { + __le16 ContextType; + __le16 DataLength; + __le32 Reserved; + /* Followed by array of data. NOTE: some servers require padding to 8 byte boundary */ +} __packed; + +/* + * SaltLength that the server send can be zero, so the only three required + * fields (all __le16) end up six bytes total, so the minimum context data len + * in the response is six bytes which accounts for + * + * HashAlgorithmCount, SaltLength, and 1 HashAlgorithm. + */ +#define MIN_PREAUTH_CTXT_DATA_LEN 6 + +struct smb2_preauth_neg_context { + __le16 ContextType; /* 1 */ + __le16 DataLength; + __le32 Reserved; + __le16 HashAlgorithmCount; /* 1 */ + __le16 SaltLength; + __le16 HashAlgorithms; /* HashAlgorithms[0] since only one defined */ + __u8 Salt[SMB311_SALT_SIZE]; +} __packed; + +/* Encryption Algorithms Ciphers */ +#define SMB2_ENCRYPTION_AES128_CCM cpu_to_le16(0x0001) +#define SMB2_ENCRYPTION_AES128_GCM cpu_to_le16(0x0002) +#define SMB2_ENCRYPTION_AES256_CCM cpu_to_le16(0x0003) +#define SMB2_ENCRYPTION_AES256_GCM cpu_to_le16(0x0004) + +/* Min encrypt context data is one cipher so 2 bytes + 2 byte count field */ +#define MIN_ENCRYPT_CTXT_DATA_LEN 4 +struct smb2_encryption_neg_context { + __le16 ContextType; /* 2 */ + __le16 DataLength; + __le32 Reserved; + /* CipherCount usally 2, but can be 3 when AES256-GCM enabled */ + __le16 CipherCount; /* AES128-GCM and AES128-CCM by default */ + __le16 Ciphers[]; +} __packed; + +/* See MS-SMB2 2.2.3.1.3 */ +#define SMB3_COMPRESS_NONE cpu_to_le16(0x0000) +#define SMB3_COMPRESS_LZNT1 cpu_to_le16(0x0001) +#define SMB3_COMPRESS_LZ77 cpu_to_le16(0x0002) +#define SMB3_COMPRESS_LZ77_HUFF cpu_to_le16(0x0003) +/* Pattern scanning algorithm See MS-SMB2 3.1.4.4.1 */ +#define SMB3_COMPRESS_PATTERN cpu_to_le16(0x0004) /* Pattern_V1 */ + +/* Compression Flags */ +#define SMB2_COMPRESSION_CAPABILITIES_FLAG_NONE cpu_to_le32(0x00000000) +#define SMB2_COMPRESSION_CAPABILITIES_FLAG_CHAINED cpu_to_le32(0x00000001) + +struct smb2_compression_capabilities_context { + __le16 ContextType; /* 3 */ + __le16 DataLength; + __le32 Reserved; + __le16 CompressionAlgorithmCount; + __le16 Padding; + __le32 Flags; + __le16 CompressionAlgorithms[3]; + __u16 Pad; /* Some servers require pad to DataLen multiple of 8 */ + /* Check if pad needed */ +} __packed; + +/* + * For smb2_netname_negotiate_context_id See MS-SMB2 2.2.3.1.4. + * Its struct simply contains NetName, an array of Unicode characters + */ +struct smb2_netname_neg_context { + __le16 ContextType; /* 5 */ + __le16 DataLength; + __le32 Reserved; + __le16 NetName[]; /* hostname of target converted to UCS-2 */ +} __packed; + +/* + * For smb2_transport_capabilities context see MS-SMB2 2.2.3.1.5 + * and 2.2.4.1.5 + */ + +/* Flags */ +#define SMB2_ACCEPT_TRANSFORM_LEVEL_SECURITY 0x00000001 + +struct smb2_transport_capabilities_context { + __le16 ContextType; /* 6 */ + __le16 DataLength; + __u32 Reserved; + __le32 Flags; + __u32 Pad; +} __packed; + +/* + * For rdma transform capabilities context see MS-SMB2 2.2.3.1.6 + * and 2.2.4.1.6 + */ + +/* RDMA Transform IDs */ +#define SMB2_RDMA_TRANSFORM_NONE 0x0000 +#define SMB2_RDMA_TRANSFORM_ENCRYPTION 0x0001 +#define SMB2_RDMA_TRANSFORM_SIGNING 0x0002 + +struct smb2_rdma_transform_capabilities_context { + __le16 ContextType; /* 7 */ + __le16 DataLength; + __u32 Reserved; + __le16 TransformCount; + __u16 Reserved1; + __u32 Reserved2; + __le16 RDMATransformIds[]; +} __packed; + +/* + * For signing capabilities context see MS-SMB2 2.2.3.1.7 + * and 2.2.4.1.7 + */ + +/* Signing algorithms */ +#define SIGNING_ALG_HMAC_SHA256 0 +#define SIGNING_ALG_HMAC_SHA256_LE cpu_to_le16(0) +#define SIGNING_ALG_AES_CMAC 1 +#define SIGNING_ALG_AES_CMAC_LE cpu_to_le16(1) +#define SIGNING_ALG_AES_GMAC 2 +#define SIGNING_ALG_AES_GMAC_LE cpu_to_le16(2) + +struct smb2_signing_capabilities { + __le16 ContextType; /* 8 */ + __le16 DataLength; + __le32 Reserved; + __le16 SigningAlgorithmCount; + __le16 SigningAlgorithms[]; + /* Followed by padding to 8 byte boundary (required by some servers) */ +} __packed; + +#define POSIX_CTXT_DATA_LEN 16 +struct smb2_posix_neg_context { + __le16 ContextType; /* 0x100 */ + __le16 DataLength; + __le32 Reserved; + __u8 Name[16]; /* POSIX ctxt GUID 93AD25509CB411E7B42383DE968BCD7C */ +} __packed; + +struct smb2_negotiate_req { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 36 */ + __le16 DialectCount; + __le16 SecurityMode; + __le16 Reserved; /* MBZ */ + __le32 Capabilities; + __u8 ClientGUID[SMB2_CLIENT_GUID_SIZE]; + /* In SMB3.02 and earlier next three were MBZ le64 ClientStartTime */ + __le32 NegotiateContextOffset; /* SMB3.1.1 only. MBZ earlier */ + __le16 NegotiateContextCount; /* SMB3.1.1 only. MBZ earlier */ + __le16 Reserved2; + __le16 Dialects[]; +} __packed; + +struct smb2_negotiate_rsp { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 65 */ + __le16 SecurityMode; + __le16 DialectRevision; + __le16 NegotiateContextCount; /* Prior to SMB3.1.1 was Reserved & MBZ */ + __u8 ServerGUID[16]; + __le32 Capabilities; + __le32 MaxTransactSize; + __le32 MaxReadSize; + __le32 MaxWriteSize; + __le64 SystemTime; /* MBZ */ + __le64 ServerStartTime; + __le16 SecurityBufferOffset; + __le16 SecurityBufferLength; + __le32 NegotiateContextOffset; /* Pre:SMB3.1.1 was reserved/ignored */ + __u8 Buffer[1]; /* variable length GSS security buffer */ +} __packed; + + +/* + * SMB2_SESSION_SETUP See MS-SMB2 section 2.2.5 + */ +/* Flags */ +#define SMB2_SESSION_REQ_FLAG_BINDING 0x01 +#define SMB2_SESSION_REQ_FLAG_ENCRYPT_DATA 0x04 + +struct smb2_sess_setup_req { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 25 */ + __u8 Flags; + __u8 SecurityMode; + __le32 Capabilities; + __le32 Channel; + __le16 SecurityBufferOffset; + __le16 SecurityBufferLength; + __le64 PreviousSessionId; + __u8 Buffer[1]; /* variable length GSS security buffer */ +} __packed; + +/* Currently defined SessionFlags */ +#define SMB2_SESSION_FLAG_IS_GUEST 0x0001 +#define SMB2_SESSION_FLAG_IS_GUEST_LE cpu_to_le16(0x0001) +#define SMB2_SESSION_FLAG_IS_NULL 0x0002 +#define SMB2_SESSION_FLAG_IS_NULL_LE cpu_to_le16(0x0002) +#define SMB2_SESSION_FLAG_ENCRYPT_DATA 0x0004 +#define SMB2_SESSION_FLAG_ENCRYPT_DATA_LE cpu_to_le16(0x0004) + +struct smb2_sess_setup_rsp { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 9 */ + __le16 SessionFlags; + __le16 SecurityBufferOffset; + __le16 SecurityBufferLength; + __u8 Buffer[1]; /* variable length GSS security buffer */ +} __packed; + + +/* + * SMB2_LOGOFF See MS-SMB2 section 2.2.7 + */ +struct smb2_logoff_req { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 4 */ + __le16 Reserved; +} __packed; + +struct smb2_logoff_rsp { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 4 */ + __le16 Reserved; +} __packed; + + +/* + * SMB2_CLOSE See MS-SMB2 section 2.2.15 + */ +/* Currently defined values for close flags */ +#define SMB2_CLOSE_FLAG_POSTQUERY_ATTRIB cpu_to_le16(0x0001) +struct smb2_close_req { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 24 */ + __le16 Flags; + __le32 Reserved; + __le64 PersistentFileId; /* opaque endianness */ + __le64 VolatileFileId; /* opaque endianness */ +} __packed; + +/* + * Maximum size of a SMB2_CLOSE response is 64 (smb2 header) + 60 (data) + */ +#define MAX_SMB2_CLOSE_RESPONSE_SIZE 124 + +struct smb2_close_rsp { + struct smb2_hdr hdr; + __le16 StructureSize; /* 60 */ + __le16 Flags; + __le32 Reserved; + __le64 CreationTime; + __le64 LastAccessTime; + __le64 LastWriteTime; + __le64 ChangeTime; + __le64 AllocationSize; /* Beginning of FILE_STANDARD_INFO equivalent */ + __le64 EndOfFile; + __le32 Attributes; +} __packed; + + +/* + * SMB2_READ See MS-SMB2 section 2.2.19 + */ +/* For read request Flags field below, following flag is defined for SMB3.02 */ +#define SMB2_READFLAG_READ_UNBUFFERED 0x01 +#define SMB2_READFLAG_REQUEST_COMPRESSED 0x02 /* See MS-SMB2 2.2.19 */ + +/* Channel field for read and write: exactly one of following flags can be set*/ +#define SMB2_CHANNEL_NONE cpu_to_le32(0x00000000) +#define SMB2_CHANNEL_RDMA_V1 cpu_to_le32(0x00000001) +#define SMB2_CHANNEL_RDMA_V1_INVALIDATE cpu_to_le32(0x00000002) +#define SMB2_CHANNEL_RDMA_TRANSFORM cpu_to_le32(0x00000003) + +/* SMB2 read request without RFC1001 length at the beginning */ +struct smb2_read_req { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 49 */ + __u8 Padding; /* offset from start of SMB2 header to place read */ + __u8 Flags; /* MBZ unless SMB3.02 or later */ + __le32 Length; + __le64 Offset; + __le64 PersistentFileId; + __le64 VolatileFileId; + __le32 MinimumCount; + __le32 Channel; /* MBZ except for SMB3 or later */ + __le32 RemainingBytes; + __le16 ReadChannelInfoOffset; + __le16 ReadChannelInfoLength; + __u8 Buffer[1]; +} __packed; + +/* Read flags */ +#define SMB2_READFLAG_RESPONSE_NONE cpu_to_le32(0x00000000) +#define SMB2_READFLAG_RESPONSE_RDMA_TRANSFORM cpu_to_le32(0x00000001) + +struct smb2_read_rsp { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 17 */ + __u8 DataOffset; + __u8 Reserved; + __le32 DataLength; + __le32 DataRemaining; + __le32 Flags; + __u8 Buffer[1]; +} __packed; + + +/* + * SMB2_WRITE See MS-SMB2 section 2.2.21 + */ +/* For write request Flags field below the following flags are defined: */ +#define SMB2_WRITEFLAG_WRITE_THROUGH 0x00000001 /* SMB2.1 or later */ +#define SMB2_WRITEFLAG_WRITE_UNBUFFERED 0x00000002 /* SMB3.02 or later */ + +struct smb2_write_req { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 49 */ + __le16 DataOffset; /* offset from start of SMB2 header to write data */ + __le32 Length; + __le64 Offset; + __le64 PersistentFileId; /* opaque endianness */ + __le64 VolatileFileId; /* opaque endianness */ + __le32 Channel; /* MBZ unless SMB3.02 or later */ + __le32 RemainingBytes; + __le16 WriteChannelInfoOffset; + __le16 WriteChannelInfoLength; + __le32 Flags; + __u8 Buffer[1]; +} __packed; + +struct smb2_write_rsp { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 17 */ + __u8 DataOffset; + __u8 Reserved; + __le32 DataLength; + __le32 DataRemaining; + __u32 Reserved2; + __u8 Buffer[1]; +} __packed; + + +/* + * SMB2_FLUSH See MS-SMB2 section 2.2.17 + */ +struct smb2_flush_req { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 24 */ + __le16 Reserved1; + __le32 Reserved2; + __le64 PersistentFileId; + __le64 VolatileFileId; +} __packed; + +struct smb2_flush_rsp { + struct smb2_hdr hdr; + __le16 StructureSize; + __le16 Reserved; +} __packed; + + +/* + * SMB2_NOTIFY See MS-SMB2 section 2.2.35 + */ +/* notify flags */ +#define SMB2_WATCH_TREE 0x0001 + +/* notify completion filter flags. See MS-FSCC 2.6 and MS-SMB2 2.2.35 */ +#define FILE_NOTIFY_CHANGE_FILE_NAME 0x00000001 +#define FILE_NOTIFY_CHANGE_DIR_NAME 0x00000002 +#define FILE_NOTIFY_CHANGE_ATTRIBUTES 0x00000004 +#define FILE_NOTIFY_CHANGE_SIZE 0x00000008 +#define FILE_NOTIFY_CHANGE_LAST_WRITE 0x00000010 +#define FILE_NOTIFY_CHANGE_LAST_ACCESS 0x00000020 +#define FILE_NOTIFY_CHANGE_CREATION 0x00000040 +#define FILE_NOTIFY_CHANGE_EA 0x00000080 +#define FILE_NOTIFY_CHANGE_SECURITY 0x00000100 +#define FILE_NOTIFY_CHANGE_STREAM_NAME 0x00000200 +#define FILE_NOTIFY_CHANGE_STREAM_SIZE 0x00000400 +#define FILE_NOTIFY_CHANGE_STREAM_WRITE 0x00000800 + +/* SMB2 Notify Action Flags */ +#define FILE_ACTION_ADDED 0x00000001 +#define FILE_ACTION_REMOVED 0x00000002 +#define FILE_ACTION_MODIFIED 0x00000003 +#define FILE_ACTION_RENAMED_OLD_NAME 0x00000004 +#define FILE_ACTION_RENAMED_NEW_NAME 0x00000005 +#define FILE_ACTION_ADDED_STREAM 0x00000006 +#define FILE_ACTION_REMOVED_STREAM 0x00000007 +#define FILE_ACTION_MODIFIED_STREAM 0x00000008 +#define FILE_ACTION_REMOVED_BY_DELETE 0x00000009 + +struct smb2_change_notify_req { + struct smb2_hdr hdr; + __le16 StructureSize; + __le16 Flags; + __le32 OutputBufferLength; + __le64 PersistentFileId; /* opaque endianness */ + __le64 VolatileFileId; /* opaque endianness */ + __le32 CompletionFilter; + __u32 Reserved; +} __packed; + +struct smb2_change_notify_rsp { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 9 */ + __le16 OutputBufferOffset; + __le32 OutputBufferLength; + __u8 Buffer[1]; /* array of file notify structs */ +} __packed; + + +/* + * SMB2_CREATE See MS-SMB2 section 2.2.13 + */ +/* Oplock levels */ +#define SMB2_OPLOCK_LEVEL_NONE 0x00 +#define SMB2_OPLOCK_LEVEL_II 0x01 +#define SMB2_OPLOCK_LEVEL_EXCLUSIVE 0x08 +#define SMB2_OPLOCK_LEVEL_BATCH 0x09 +#define SMB2_OPLOCK_LEVEL_LEASE 0xFF +/* Non-spec internal type */ +#define SMB2_OPLOCK_LEVEL_NOCHANGE 0x99 + +/* Impersonation Levels. See MS-WPO section 9.7 and MSDN-IMPERS */ +#define IL_ANONYMOUS cpu_to_le32(0x00000000) +#define IL_IDENTIFICATION cpu_to_le32(0x00000001) +#define IL_IMPERSONATION cpu_to_le32(0x00000002) +#define IL_DELEGATE cpu_to_le32(0x00000003) + +/* File Attrubutes */ +#define FILE_ATTRIBUTE_READONLY 0x00000001 +#define FILE_ATTRIBUTE_HIDDEN 0x00000002 +#define FILE_ATTRIBUTE_SYSTEM 0x00000004 +#define FILE_ATTRIBUTE_DIRECTORY 0x00000010 +#define FILE_ATTRIBUTE_ARCHIVE 0x00000020 +#define FILE_ATTRIBUTE_NORMAL 0x00000080 +#define FILE_ATTRIBUTE_TEMPORARY 0x00000100 +#define FILE_ATTRIBUTE_SPARSE_FILE 0x00000200 +#define FILE_ATTRIBUTE_REPARSE_POINT 0x00000400 +#define FILE_ATTRIBUTE_COMPRESSED 0x00000800 +#define FILE_ATTRIBUTE_OFFLINE 0x00001000 +#define FILE_ATTRIBUTE_NOT_CONTENT_INDEXED 0x00002000 +#define FILE_ATTRIBUTE_ENCRYPTED 0x00004000 +#define FILE_ATTRIBUTE_INTEGRITY_STREAM 0x00008000 +#define FILE_ATTRIBUTE_NO_SCRUB_DATA 0x00020000 +#define FILE_ATTRIBUTE__MASK 0x00007FB7 + +#define FILE_ATTRIBUTE_READONLY_LE cpu_to_le32(0x00000001) +#define FILE_ATTRIBUTE_HIDDEN_LE cpu_to_le32(0x00000002) +#define FILE_ATTRIBUTE_SYSTEM_LE cpu_to_le32(0x00000004) +#define FILE_ATTRIBUTE_DIRECTORY_LE cpu_to_le32(0x00000010) +#define FILE_ATTRIBUTE_ARCHIVE_LE cpu_to_le32(0x00000020) +#define FILE_ATTRIBUTE_NORMAL_LE cpu_to_le32(0x00000080) +#define FILE_ATTRIBUTE_TEMPORARY_LE cpu_to_le32(0x00000100) +#define FILE_ATTRIBUTE_SPARSE_FILE_LE cpu_to_le32(0x00000200) +#define FILE_ATTRIBUTE_REPARSE_POINT_LE cpu_to_le32(0x00000400) +#define FILE_ATTRIBUTE_COMPRESSED_LE cpu_to_le32(0x00000800) +#define FILE_ATTRIBUTE_OFFLINE_LE cpu_to_le32(0x00001000) +#define FILE_ATTRIBUTE_NOT_CONTENT_INDEXED_LE cpu_to_le32(0x00002000) +#define FILE_ATTRIBUTE_ENCRYPTED_LE cpu_to_le32(0x00004000) +#define FILE_ATTRIBUTE_INTEGRITY_STREAM_LE cpu_to_le32(0x00008000) +#define FILE_ATTRIBUTE_NO_SCRUB_DATA_LE cpu_to_le32(0x00020000) +#define FILE_ATTRIBUTE_MASK_LE cpu_to_le32(0x00007FB7) + +/* Desired Access Flags */ +#define FILE_READ_DATA_LE cpu_to_le32(0x00000001) +#define FILE_LIST_DIRECTORY_LE cpu_to_le32(0x00000001) +#define FILE_WRITE_DATA_LE cpu_to_le32(0x00000002) +#define FILE_APPEND_DATA_LE cpu_to_le32(0x00000004) +#define FILE_ADD_SUBDIRECTORY_LE cpu_to_le32(0x00000004) +#define FILE_READ_EA_LE cpu_to_le32(0x00000008) +#define FILE_WRITE_EA_LE cpu_to_le32(0x00000010) +#define FILE_EXECUTE_LE cpu_to_le32(0x00000020) +#define FILE_DELETE_CHILD_LE cpu_to_le32(0x00000040) +#define FILE_READ_ATTRIBUTES_LE cpu_to_le32(0x00000080) +#define FILE_WRITE_ATTRIBUTES_LE cpu_to_le32(0x00000100) +#define FILE_DELETE_LE cpu_to_le32(0x00010000) +#define FILE_READ_CONTROL_LE cpu_to_le32(0x00020000) +#define FILE_WRITE_DAC_LE cpu_to_le32(0x00040000) +#define FILE_WRITE_OWNER_LE cpu_to_le32(0x00080000) +#define FILE_SYNCHRONIZE_LE cpu_to_le32(0x00100000) +#define FILE_ACCESS_SYSTEM_SECURITY_LE cpu_to_le32(0x01000000) +#define FILE_MAXIMAL_ACCESS_LE cpu_to_le32(0x02000000) +#define FILE_GENERIC_ALL_LE cpu_to_le32(0x10000000) +#define FILE_GENERIC_EXECUTE_LE cpu_to_le32(0x20000000) +#define FILE_GENERIC_WRITE_LE cpu_to_le32(0x40000000) +#define FILE_GENERIC_READ_LE cpu_to_le32(0x80000000) +#define DESIRED_ACCESS_MASK cpu_to_le32(0xF21F01FF) + + +#define FILE_READ_DESIRED_ACCESS_LE (FILE_READ_DATA_LE | \ + FILE_READ_EA_LE | \ + FILE_GENERIC_READ_LE) +#define FILE_WRITE_DESIRE_ACCESS_LE (FILE_WRITE_DATA_LE | \ + FILE_APPEND_DATA_LE | \ + FILE_WRITE_EA_LE | \ + FILE_WRITE_ATTRIBUTES_LE | \ + FILE_GENERIC_WRITE_LE) + +/* ShareAccess Flags */ +#define FILE_SHARE_READ_LE cpu_to_le32(0x00000001) +#define FILE_SHARE_WRITE_LE cpu_to_le32(0x00000002) +#define FILE_SHARE_DELETE_LE cpu_to_le32(0x00000004) +#define FILE_SHARE_ALL_LE cpu_to_le32(0x00000007) + +/* CreateDisposition Flags */ +#define FILE_SUPERSEDE_LE cpu_to_le32(0x00000000) +#define FILE_OPEN_LE cpu_to_le32(0x00000001) +#define FILE_CREATE_LE cpu_to_le32(0x00000002) +#define FILE_OPEN_IF_LE cpu_to_le32(0x00000003) +#define FILE_OVERWRITE_LE cpu_to_le32(0x00000004) +#define FILE_OVERWRITE_IF_LE cpu_to_le32(0x00000005) +#define FILE_CREATE_MASK_LE cpu_to_le32(0x00000007) + +#define FILE_READ_RIGHTS (FILE_READ_DATA | FILE_READ_EA \ + | FILE_READ_ATTRIBUTES) +#define FILE_WRITE_RIGHTS (FILE_WRITE_DATA | FILE_APPEND_DATA \ + | FILE_WRITE_EA | FILE_WRITE_ATTRIBUTES) +#define FILE_EXEC_RIGHTS (FILE_EXECUTE) + +/* CreateOptions Flags */ +#define FILE_DIRECTORY_FILE_LE cpu_to_le32(0x00000001) +/* same as #define CREATE_NOT_FILE_LE cpu_to_le32(0x00000001) */ +#define FILE_WRITE_THROUGH_LE cpu_to_le32(0x00000002) +#define FILE_SEQUENTIAL_ONLY_LE cpu_to_le32(0x00000004) +#define FILE_NO_INTERMEDIATE_BUFFERING_LE cpu_to_le32(0x00000008) +#define FILE_NON_DIRECTORY_FILE_LE cpu_to_le32(0x00000040) +#define FILE_COMPLETE_IF_OPLOCKED_LE cpu_to_le32(0x00000100) +#define FILE_NO_EA_KNOWLEDGE_LE cpu_to_le32(0x00000200) +#define FILE_RANDOM_ACCESS_LE cpu_to_le32(0x00000800) +#define FILE_DELETE_ON_CLOSE_LE cpu_to_le32(0x00001000) +#define FILE_OPEN_BY_FILE_ID_LE cpu_to_le32(0x00002000) +#define FILE_OPEN_FOR_BACKUP_INTENT_LE cpu_to_le32(0x00004000) +#define FILE_NO_COMPRESSION_LE cpu_to_le32(0x00008000) +#define FILE_OPEN_REPARSE_POINT_LE cpu_to_le32(0x00200000) +#define FILE_OPEN_NO_RECALL_LE cpu_to_le32(0x00400000) +#define CREATE_OPTIONS_MASK_LE cpu_to_le32(0x00FFFFFF) + +#define FILE_READ_RIGHTS_LE (FILE_READ_DATA_LE | FILE_READ_EA_LE \ + | FILE_READ_ATTRIBUTES_LE) +#define FILE_WRITE_RIGHTS_LE (FILE_WRITE_DATA_LE | FILE_APPEND_DATA_LE \ + | FILE_WRITE_EA_LE | FILE_WRITE_ATTRIBUTES_LE) +#define FILE_EXEC_RIGHTS_LE (FILE_EXECUTE_LE) + +/* Create Context Values */ +#define SMB2_CREATE_EA_BUFFER "ExtA" /* extended attributes */ +#define SMB2_CREATE_SD_BUFFER "SecD" /* security descriptor */ +#define SMB2_CREATE_DURABLE_HANDLE_REQUEST "DHnQ" +#define SMB2_CREATE_DURABLE_HANDLE_RECONNECT "DHnC" +#define SMB2_CREATE_ALLOCATION_SIZE "AISi" +#define SMB2_CREATE_QUERY_MAXIMAL_ACCESS_REQUEST "MxAc" +#define SMB2_CREATE_TIMEWARP_REQUEST "TWrp" +#define SMB2_CREATE_QUERY_ON_DISK_ID "QFid" +#define SMB2_CREATE_REQUEST_LEASE "RqLs" +#define SMB2_CREATE_DURABLE_HANDLE_REQUEST_V2 "DH2Q" +#define SMB2_CREATE_DURABLE_HANDLE_RECONNECT_V2 "DH2C" +#define SMB2_CREATE_TAG_POSIX "\x93\xAD\x25\x50\x9C\xB4\x11\xE7\xB4\x23\x83\xDE\x96\x8B\xCD\x7C" + +/* Flag (SMB3 open response) values */ +#define SMB2_CREATE_FLAG_REPARSEPOINT 0x01 + +struct create_context { + __le32 Next; + __le16 NameOffset; + __le16 NameLength; + __le16 Reserved; + __le16 DataOffset; + __le32 DataLength; + __u8 Buffer[]; +} __packed; + +struct smb2_create_req { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 57 */ + __u8 SecurityFlags; + __u8 RequestedOplockLevel; + __le32 ImpersonationLevel; + __le64 SmbCreateFlags; + __le64 Reserved; + __le32 DesiredAccess; + __le32 FileAttributes; + __le32 ShareAccess; + __le32 CreateDisposition; + __le32 CreateOptions; + __le16 NameOffset; + __le16 NameLength; + __le32 CreateContextsOffset; + __le32 CreateContextsLength; + __u8 Buffer[]; +} __packed; + +struct smb2_create_rsp { + struct smb2_hdr hdr; + __le16 StructureSize; /* Must be 89 */ + __u8 OplockLevel; + __u8 Flags; /* 0x01 if reparse point */ + __le32 CreateAction; + __le64 CreationTime; + __le64 LastAccessTime; + __le64 LastWriteTime; + __le64 ChangeTime; + __le64 AllocationSize; + __le64 EndofFile; + __le32 FileAttributes; + __le32 Reserved2; + __le64 PersistentFileId; + __le64 VolatileFileId; + __le32 CreateContextsOffset; + __le32 CreateContextsLength; + __u8 Buffer[1]; +} __packed; + + +#endif /* _COMMON_SMB2PDU_H */ diff --git a/fs/super.c b/fs/super.c index bcef3a6f4c4b..3bfc0f8fbd5b 100644 --- a/fs/super.c +++ b/fs/super.c @@ -476,6 +476,8 @@ void generic_shutdown_super(struct super_block *sb) spin_unlock(&sb_lock); up_write(&sb->s_umount); if (sb->s_bdi != &noop_backing_dev_info) { + if (sb->s_iflags & SB_I_PERSB_BDI) + bdi_unregister(sb->s_bdi); bdi_put(sb->s_bdi); sb->s_bdi = &noop_backing_dev_info; } @@ -1562,6 +1564,7 @@ int super_setup_bdi_name(struct super_block *sb, char *fmt, ...) } WARN_ON(sb->s_bdi != &noop_backing_dev_info); sb->s_bdi = bdi; + sb->s_iflags |= SB_I_PERSB_BDI; return 0; } diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 59dffd5ca517..b6b6796e1616 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -56,8 +56,7 @@ int sysfs_create_dir_ns(struct kobject *kobj, const void *ns) kobject_get_ownership(kobj, &uid, &gid); - kn = kernfs_create_dir_ns(parent, kobject_name(kobj), - S_IRWXU | S_IRUGO | S_IXUGO, uid, gid, + kn = kernfs_create_dir_ns(parent, kobject_name(kobj), 0755, uid, gid, kobj, ns); if (IS_ERR(kn)) { if (PTR_ERR(kn) == -EEXIST) diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index d019d6ac6ad0..42dcf96881b6 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -45,6 +45,9 @@ static int sysfs_kf_seq_show(struct seq_file *sf, void *v) ssize_t count; char *buf; + if (WARN_ON_ONCE(!ops->show)) + return -EINVAL; + /* acquire buffer and ensure that it's >= PAGE_SIZE and clear */ count = seq_get_buf(sf, &buf); if (count < PAGE_SIZE) { @@ -53,15 +56,9 @@ static int sysfs_kf_seq_show(struct seq_file *sf, void *v) } memset(buf, 0, PAGE_SIZE); - /* - * Invoke show(). Control may reach here via seq file lseek even - * if @ops->show() isn't implemented. - */ - if (ops->show) { - count = ops->show(kobj, of->kn->priv, buf); - if (count < 0) - return count; - } + count = ops->show(kobj, of->kn->priv, buf); + if (count < 0) + return count; /* * The code works fine with PAGE_SIZE return but it's likely to @@ -255,59 +252,74 @@ static const struct kernfs_ops sysfs_bin_kfops_mmap = { }; int sysfs_add_file_mode_ns(struct kernfs_node *parent, - const struct attribute *attr, bool is_bin, - umode_t mode, kuid_t uid, kgid_t gid, const void *ns) + const struct attribute *attr, umode_t mode, kuid_t uid, + kgid_t gid, const void *ns) { + struct kobject *kobj = parent->priv; + const struct sysfs_ops *sysfs_ops = kobj->ktype->sysfs_ops; struct lock_class_key *key = NULL; - const struct kernfs_ops *ops; + const struct kernfs_ops *ops = NULL; struct kernfs_node *kn; - loff_t size; - - if (!is_bin) { - struct kobject *kobj = parent->priv; - const struct sysfs_ops *sysfs_ops = kobj->ktype->sysfs_ops; - - /* every kobject with an attribute needs a ktype assigned */ - if (WARN(!sysfs_ops, KERN_ERR - "missing sysfs attribute operations for kobject: %s\n", - kobject_name(kobj))) - return -EINVAL; - - if (sysfs_ops->show && sysfs_ops->store) { - if (mode & SYSFS_PREALLOC) - ops = &sysfs_prealloc_kfops_rw; - else - ops = &sysfs_file_kfops_rw; - } else if (sysfs_ops->show) { - if (mode & SYSFS_PREALLOC) - ops = &sysfs_prealloc_kfops_ro; - else - ops = &sysfs_file_kfops_ro; - } else if (sysfs_ops->store) { - if (mode & SYSFS_PREALLOC) - ops = &sysfs_prealloc_kfops_wo; - else - ops = &sysfs_file_kfops_wo; - } else - ops = &sysfs_file_kfops_empty; - - size = PAGE_SIZE; + + /* every kobject with an attribute needs a ktype assigned */ + if (WARN(!sysfs_ops, KERN_ERR + "missing sysfs attribute operations for kobject: %s\n", + kobject_name(kobj))) + return -EINVAL; + + if (mode & SYSFS_PREALLOC) { + if (sysfs_ops->show && sysfs_ops->store) + ops = &sysfs_prealloc_kfops_rw; + else if (sysfs_ops->show) + ops = &sysfs_prealloc_kfops_ro; + else if (sysfs_ops->store) + ops = &sysfs_prealloc_kfops_wo; } else { - struct bin_attribute *battr = (void *)attr; - - if (battr->mmap) - ops = &sysfs_bin_kfops_mmap; - else if (battr->read && battr->write) - ops = &sysfs_bin_kfops_rw; - else if (battr->read) - ops = &sysfs_bin_kfops_ro; - else if (battr->write) - ops = &sysfs_bin_kfops_wo; - else - ops = &sysfs_file_kfops_empty; - - size = battr->size; + if (sysfs_ops->show && sysfs_ops->store) + ops = &sysfs_file_kfops_rw; + else if (sysfs_ops->show) + ops = &sysfs_file_kfops_ro; + else if (sysfs_ops->store) + ops = &sysfs_file_kfops_wo; + } + + if (!ops) + ops = &sysfs_file_kfops_empty; + +#ifdef CONFIG_DEBUG_LOCK_ALLOC + if (!attr->ignore_lockdep) + key = attr->key ?: (struct lock_class_key *)&attr->skey; +#endif + + kn = __kernfs_create_file(parent, attr->name, mode & 0777, uid, gid, + PAGE_SIZE, ops, (void *)attr, ns, key); + if (IS_ERR(kn)) { + if (PTR_ERR(kn) == -EEXIST) + sysfs_warn_dup(parent, attr->name); + return PTR_ERR(kn); } + return 0; +} + +int sysfs_add_bin_file_mode_ns(struct kernfs_node *parent, + const struct bin_attribute *battr, umode_t mode, + kuid_t uid, kgid_t gid, const void *ns) +{ + const struct attribute *attr = &battr->attr; + struct lock_class_key *key = NULL; + const struct kernfs_ops *ops; + struct kernfs_node *kn; + + if (battr->mmap) + ops = &sysfs_bin_kfops_mmap; + else if (battr->read && battr->write) + ops = &sysfs_bin_kfops_rw; + else if (battr->read) + ops = &sysfs_bin_kfops_ro; + else if (battr->write) + ops = &sysfs_bin_kfops_wo; + else + ops = &sysfs_file_kfops_empty; #ifdef CONFIG_DEBUG_LOCK_ALLOC if (!attr->ignore_lockdep) @@ -315,7 +327,7 @@ int sysfs_add_file_mode_ns(struct kernfs_node *parent, #endif kn = __kernfs_create_file(parent, attr->name, mode & 0777, uid, gid, - size, ops, (void *)attr, ns, key); + battr->size, ops, (void *)attr, ns, key); if (IS_ERR(kn)) { if (PTR_ERR(kn) == -EEXIST) sysfs_warn_dup(parent, attr->name); @@ -340,9 +352,7 @@ int sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr, return -EINVAL; kobject_get_ownership(kobj, &uid, &gid); - return sysfs_add_file_mode_ns(kobj->sd, attr, false, attr->mode, - uid, gid, ns); - + return sysfs_add_file_mode_ns(kobj->sd, attr, attr->mode, uid, gid, ns); } EXPORT_SYMBOL_GPL(sysfs_create_file_ns); @@ -385,8 +395,8 @@ int sysfs_add_file_to_group(struct kobject *kobj, return -ENOENT; kobject_get_ownership(kobj, &uid, &gid); - error = sysfs_add_file_mode_ns(parent, attr, false, - attr->mode, uid, gid, NULL); + error = sysfs_add_file_mode_ns(parent, attr, attr->mode, uid, gid, + NULL); kernfs_put(parent); return error; @@ -555,8 +565,8 @@ int sysfs_create_bin_file(struct kobject *kobj, return -EINVAL; kobject_get_ownership(kobj, &uid, &gid); - return sysfs_add_file_mode_ns(kobj->sd, &attr->attr, true, - attr->attr.mode, uid, gid, NULL); + return sysfs_add_bin_file_mode_ns(kobj->sd, attr, attr->attr.mode, uid, + gid, NULL); } EXPORT_SYMBOL_GPL(sysfs_create_bin_file); diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index f29d62004527..eeb0e3099421 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c @@ -61,8 +61,8 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj, (*attr)->name, mode); mode &= SYSFS_PREALLOC | 0664; - error = sysfs_add_file_mode_ns(parent, *attr, false, - mode, uid, gid, NULL); + error = sysfs_add_file_mode_ns(parent, *attr, mode, uid, + gid, NULL); if (unlikely(error)) break; } @@ -90,10 +90,9 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj, (*bin_attr)->attr.name, mode); mode &= SYSFS_PREALLOC | 0664; - error = sysfs_add_file_mode_ns(parent, - &(*bin_attr)->attr, true, - mode, - uid, gid, NULL); + error = sysfs_add_bin_file_mode_ns(parent, *bin_attr, + mode, uid, gid, + NULL); if (error) break; } @@ -340,8 +339,8 @@ int sysfs_merge_group(struct kobject *kobj, kobject_get_ownership(kobj, &uid, &gid); for ((i = 0, attr = grp->attrs); *attr && !error; (++i, ++attr)) - error = sysfs_add_file_mode_ns(parent, *attr, false, - (*attr)->mode, uid, gid, NULL); + error = sysfs_add_file_mode_ns(parent, *attr, (*attr)->mode, + uid, gid, NULL); if (error) { while (--i >= 0) kernfs_remove_by_name(parent, (*--attr)->name); diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index 0050cc0c0236..3f28c9af5756 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h @@ -28,9 +28,11 @@ void sysfs_warn_dup(struct kernfs_node *parent, const char *name); * file.c */ int sysfs_add_file_mode_ns(struct kernfs_node *parent, - const struct attribute *attr, bool is_bin, - umode_t amode, kuid_t uid, kgid_t gid, - const void *ns); + const struct attribute *attr, umode_t amode, kuid_t uid, + kgid_t gid, const void *ns); +int sysfs_add_bin_file_mode_ns(struct kernfs_node *parent, + const struct bin_attribute *battr, umode_t mode, + kuid_t uid, kgid_t gid, const void *ns); /* * symlink.c diff --git a/fs/sysv/super.c b/fs/sysv/super.c index cc8e2ed155c8..d1def0771a40 100644 --- a/fs/sysv/super.c +++ b/fs/sysv/super.c @@ -474,10 +474,8 @@ static int v7_fill_super(struct super_block *sb, void *data, int silent) struct sysv_sb_info *sbi; struct buffer_head *bh; - if (440 != sizeof (struct v7_super_block)) - panic("V7 FS: bad super-block size"); - if (64 != sizeof (struct sysv_inode)) - panic("sysv fs: bad i-node size"); + BUILD_BUG_ON(sizeof(struct v7_super_block) != 440); + BUILD_BUG_ON(sizeof(struct sysv_inode) != 64); sbi = kzalloc(sizeof(struct sysv_sb_info), GFP_KERNEL); if (!sbi) diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h index 54da6d717a06..b987dc2c6851 100644 --- a/fs/xfs/kmem.h +++ b/fs/xfs/kmem.h @@ -72,10 +72,6 @@ kmem_zalloc(size_t size, xfs_km_flags_t flags) /* * Zone interfaces */ - -#define kmem_zone kmem_cache -#define kmem_zone_t struct kmem_cache - static inline struct page * kmem_to_page(void *addr) { diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index 005abfd9fd34..d7d875cef07a 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -850,7 +850,7 @@ xfs_ag_shrink_space( if (err2 != -ENOSPC) goto resv_err; - __xfs_bmap_add_free(*tpp, args.fsbno, delta, NULL, true); + __xfs_free_extent_later(*tpp, args.fsbno, delta, NULL, true); /* * Roll the transaction before trying to re-init the per-ag diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h index 4c6f9045baca..3f597cad2c33 100644 --- a/fs/xfs/libxfs/xfs_ag.h +++ b/fs/xfs/libxfs/xfs_ag.h @@ -116,23 +116,29 @@ void xfs_perag_put(struct xfs_perag *pag); /* * Perag iteration APIs - * - * XXX: for_each_perag_range() usage really needs an iterator to clean up when - * we terminate at end_agno because we may have taken a reference to the perag - * beyond end_agno. Right now callers have to be careful to catch and clean that - * up themselves. This is not necessary for the callers of for_each_perag() and - * for_each_perag_from() because they terminate at sb_agcount where there are - * no perag structures in tree beyond end_agno. */ -#define for_each_perag_range(mp, next_agno, end_agno, pag) \ - for ((pag) = xfs_perag_get((mp), (next_agno)); \ - (pag) != NULL && (next_agno) <= (end_agno); \ - (next_agno) = (pag)->pag_agno + 1, \ - xfs_perag_put(pag), \ - (pag) = xfs_perag_get((mp), (next_agno))) +static inline struct xfs_perag * +xfs_perag_next( + struct xfs_perag *pag, + xfs_agnumber_t *agno, + xfs_agnumber_t end_agno) +{ + struct xfs_mount *mp = pag->pag_mount; + + *agno = pag->pag_agno + 1; + xfs_perag_put(pag); + if (*agno > end_agno) + return NULL; + return xfs_perag_get(mp, *agno); +} + +#define for_each_perag_range(mp, agno, end_agno, pag) \ + for ((pag) = xfs_perag_get((mp), (agno)); \ + (pag) != NULL; \ + (pag) = xfs_perag_next((pag), &(agno), (end_agno))) -#define for_each_perag_from(mp, next_agno, pag) \ - for_each_perag_range((mp), (next_agno), (mp)->m_sb.sb_agcount, (pag)) +#define for_each_perag_from(mp, agno, pag) \ + for_each_perag_range((mp), (agno), (mp)->m_sb.sb_agcount - 1, (pag)) #define for_each_perag(mp, agno, pag) \ diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c index 2aa2b3484c28..fe94058d4e9e 100644 --- a/fs/xfs/libxfs/xfs_ag_resv.c +++ b/fs/xfs/libxfs/xfs_ag_resv.c @@ -91,7 +91,8 @@ xfs_ag_resv_critical( trace_xfs_ag_resv_critical(pag, type, avail); /* Critically low if less than 10% or max btree height remains. */ - return XFS_TEST_ERROR(avail < orig / 10 || avail < XFS_BTREE_MAXLEVELS, + return XFS_TEST_ERROR(avail < orig / 10 || + avail < pag->pag_mount->m_agbtree_maxlevels, pag->pag_mount, XFS_ERRTAG_AG_RESV_CRITICAL); } diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 95157f5a5a6c..353e53b892e6 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -27,7 +27,7 @@ #include "xfs_ag_resv.h" #include "xfs_bmap.h" -extern kmem_zone_t *xfs_bmap_free_item_zone; +struct kmem_cache *xfs_extfree_item_cache; struct workqueue_struct *xfs_alloc_wq; @@ -426,8 +426,8 @@ xfs_alloc_fix_len( */ STATIC int /* error code */ xfs_alloc_fixup_trees( - xfs_btree_cur_t *cnt_cur, /* cursor for by-size btree */ - xfs_btree_cur_t *bno_cur, /* cursor for by-block btree */ + struct xfs_btree_cur *cnt_cur, /* cursor for by-size btree */ + struct xfs_btree_cur *bno_cur, /* cursor for by-block btree */ xfs_agblock_t fbno, /* starting block of free extent */ xfs_extlen_t flen, /* length of free extent */ xfs_agblock_t rbno, /* starting block of returned extent */ @@ -488,8 +488,8 @@ xfs_alloc_fixup_trees( struct xfs_btree_block *bnoblock; struct xfs_btree_block *cntblock; - bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]); - cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]); + bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_levels[0].bp); + cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_levels[0].bp); if (XFS_IS_CORRUPT(mp, bnoblock->bb_numrecs != @@ -1200,8 +1200,8 @@ xfs_alloc_ag_vextent_exact( xfs_alloc_arg_t *args) /* allocation argument structure */ { struct xfs_agf __maybe_unused *agf = args->agbp->b_addr; - xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */ - xfs_btree_cur_t *cnt_cur;/* by count btree cursor */ + struct xfs_btree_cur *bno_cur;/* by block-number btree cursor */ + struct xfs_btree_cur *cnt_cur;/* by count btree cursor */ int error; xfs_agblock_t fbno; /* start block of found extent */ xfs_extlen_t flen; /* length of found extent */ @@ -1512,7 +1512,7 @@ xfs_alloc_ag_vextent_lastblock( * than minlen. */ if (*len || args->alignment > 1) { - acur->cnt->bc_ptrs[0] = 1; + acur->cnt->bc_levels[0].ptr = 1; do { error = xfs_alloc_get_rec(acur->cnt, bno, len, &i); if (error) @@ -1658,8 +1658,8 @@ xfs_alloc_ag_vextent_size( xfs_alloc_arg_t *args) /* allocation argument structure */ { struct xfs_agf *agf = args->agbp->b_addr; - xfs_btree_cur_t *bno_cur; /* cursor for bno btree */ - xfs_btree_cur_t *cnt_cur; /* cursor for cnt btree */ + struct xfs_btree_cur *bno_cur; /* cursor for bno btree */ + struct xfs_btree_cur *cnt_cur; /* cursor for cnt btree */ int error; /* error result */ xfs_agblock_t fbno; /* start of found freespace */ xfs_extlen_t flen; /* length of found freespace */ @@ -2190,14 +2190,15 @@ xfs_free_ag_extent( */ /* - * Compute and fill in value of m_ag_maxlevels. + * Compute and fill in value of m_alloc_maxlevels. */ void xfs_alloc_compute_maxlevels( xfs_mount_t *mp) /* file system mount structure */ { - mp->m_ag_maxlevels = xfs_btree_compute_maxlevels(mp->m_alloc_mnr, + mp->m_alloc_maxlevels = xfs_btree_compute_maxlevels(mp->m_alloc_mnr, (mp->m_sb.sb_agblocks + 1) / 2); + ASSERT(mp->m_alloc_maxlevels <= xfs_allocbt_maxlevels_ondisk()); } /* @@ -2255,14 +2256,14 @@ xfs_alloc_min_freelist( const uint8_t *levels = pag ? pag->pagf_levels : fake_levels; unsigned int min_free; - ASSERT(mp->m_ag_maxlevels > 0); + ASSERT(mp->m_alloc_maxlevels > 0); /* space needed by-bno freespace btree */ min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1, - mp->m_ag_maxlevels); + mp->m_alloc_maxlevels); /* space needed by-size freespace btree */ min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1, - mp->m_ag_maxlevels); + mp->m_alloc_maxlevels); /* space needed reverse mapping used space btree */ if (xfs_has_rmapbt(mp)) min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1, @@ -2439,7 +2440,7 @@ xfs_agfl_reset( /* * Defer an AGFL block free. This is effectively equivalent to - * xfs_bmap_add_free() with some special handling particular to AGFL blocks. + * xfs_free_extent_later() with some special handling particular to AGFL blocks. * * Deferring AGFL frees helps prevent log reservation overruns due to too many * allocation operations in a transaction. AGFL frees are prone to this problem @@ -2458,21 +2459,74 @@ xfs_defer_agfl_block( struct xfs_mount *mp = tp->t_mountp; struct xfs_extent_free_item *new; /* new element */ - ASSERT(xfs_bmap_free_item_zone != NULL); + ASSERT(xfs_extfree_item_cache != NULL); ASSERT(oinfo != NULL); - new = kmem_cache_alloc(xfs_bmap_free_item_zone, + new = kmem_cache_zalloc(xfs_extfree_item_cache, GFP_KERNEL | __GFP_NOFAIL); new->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno); new->xefi_blockcount = 1; - new->xefi_oinfo = *oinfo; - new->xefi_skip_discard = false; + new->xefi_owner = oinfo->oi_owner; trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1); xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &new->xefi_list); } +/* + * Add the extent to the list of extents to be free at transaction end. + * The list is maintained sorted (by block number). + */ +void +__xfs_free_extent_later( + struct xfs_trans *tp, + xfs_fsblock_t bno, + xfs_filblks_t len, + const struct xfs_owner_info *oinfo, + bool skip_discard) +{ + struct xfs_extent_free_item *new; /* new element */ +#ifdef DEBUG + struct xfs_mount *mp = tp->t_mountp; + xfs_agnumber_t agno; + xfs_agblock_t agbno; + + ASSERT(bno != NULLFSBLOCK); + ASSERT(len > 0); + ASSERT(len <= MAXEXTLEN); + ASSERT(!isnullstartblock(bno)); + agno = XFS_FSB_TO_AGNO(mp, bno); + agbno = XFS_FSB_TO_AGBNO(mp, bno); + ASSERT(agno < mp->m_sb.sb_agcount); + ASSERT(agbno < mp->m_sb.sb_agblocks); + ASSERT(len < mp->m_sb.sb_agblocks); + ASSERT(agbno + len <= mp->m_sb.sb_agblocks); +#endif + ASSERT(xfs_extfree_item_cache != NULL); + + new = kmem_cache_zalloc(xfs_extfree_item_cache, + GFP_KERNEL | __GFP_NOFAIL); + new->xefi_startblock = bno; + new->xefi_blockcount = (xfs_extlen_t)len; + if (skip_discard) + new->xefi_flags |= XFS_EFI_SKIP_DISCARD; + if (oinfo) { + ASSERT(oinfo->oi_offset == 0); + + if (oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK) + new->xefi_flags |= XFS_EFI_ATTR_FORK; + if (oinfo->oi_flags & XFS_OWNER_INFO_BMBT_BLOCK) + new->xefi_flags |= XFS_EFI_BMBT_BLOCK; + new->xefi_owner = oinfo->oi_owner; + } else { + new->xefi_owner = XFS_RMAP_OWN_NULL; + } + trace_xfs_bmap_free_defer(tp->t_mountp, + XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0, + XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len); + xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &new->xefi_list); +} + #ifdef DEBUG /* * Check if an AGF has a free extent record whose length is equal to @@ -2903,13 +2957,16 @@ xfs_agf_verify( if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 || be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 || - be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > mp->m_ag_maxlevels || - be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > mp->m_ag_maxlevels) + be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > + mp->m_alloc_maxlevels || + be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > + mp->m_alloc_maxlevels) return __this_address; if (xfs_has_rmapbt(mp) && (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 || - be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > mp->m_rmap_maxlevels)) + be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > + mp->m_rmap_maxlevels)) return __this_address; if (xfs_has_rmapbt(mp) && @@ -3495,3 +3552,20 @@ xfs_agfl_walk( return 0; } + +int __init +xfs_extfree_intent_init_cache(void) +{ + xfs_extfree_item_cache = kmem_cache_create("xfs_extfree_intent", + sizeof(struct xfs_extent_free_item), + 0, 0, NULL); + + return xfs_extfree_item_cache != NULL ? 0 : -ENOMEM; +} + +void +xfs_extfree_intent_destroy_cache(void) +{ + kmem_cache_destroy(xfs_extfree_item_cache); + xfs_extfree_item_cache = NULL; +} diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index df4aefaf0046..1c14a0b1abea 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -98,7 +98,7 @@ unsigned int xfs_alloc_min_freelist(struct xfs_mount *mp, struct xfs_perag *pag); /* - * Compute and fill in value of m_ag_maxlevels. + * Compute and fill in value of m_alloc_maxlevels. */ void xfs_alloc_compute_maxlevels( @@ -248,4 +248,40 @@ xfs_buf_to_agfl_bno( return bp->b_addr; } +void __xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno, + xfs_filblks_t len, const struct xfs_owner_info *oinfo, + bool skip_discard); + +/* + * List of extents to be free "later". + * The list is kept sorted on xbf_startblock. + */ +struct xfs_extent_free_item { + struct list_head xefi_list; + uint64_t xefi_owner; + xfs_fsblock_t xefi_startblock;/* starting fs block number */ + xfs_extlen_t xefi_blockcount;/* number of blocks in extent */ + unsigned int xefi_flags; +}; + +#define XFS_EFI_SKIP_DISCARD (1U << 0) /* don't issue discard */ +#define XFS_EFI_ATTR_FORK (1U << 1) /* freeing attr fork block */ +#define XFS_EFI_BMBT_BLOCK (1U << 2) /* freeing bmap btree block */ + +static inline void +xfs_free_extent_later( + struct xfs_trans *tp, + xfs_fsblock_t bno, + xfs_filblks_t len, + const struct xfs_owner_info *oinfo) +{ + __xfs_free_extent_later(tp, bno, len, oinfo, false); +} + + +extern struct kmem_cache *xfs_extfree_item_cache; + +int __init xfs_extfree_intent_init_cache(void); +void xfs_extfree_intent_destroy_cache(void); + #endif /* __XFS_ALLOC_H__ */ diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c index 6746fd735550..8c9f73cc0bee 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.c +++ b/fs/xfs/libxfs/xfs_alloc_btree.c @@ -20,6 +20,7 @@ #include "xfs_trans.h" #include "xfs_ag.h" +static struct kmem_cache *xfs_allocbt_cur_cache; STATIC struct xfs_btree_cur * xfs_allocbt_dup_cursor( @@ -316,7 +317,7 @@ xfs_allocbt_verify( if (pag && pag->pagf_init) { if (level >= pag->pagf_levels[btnum]) return __this_address; - } else if (level >= mp->m_ag_maxlevels) + } else if (level >= mp->m_alloc_maxlevels) return __this_address; return xfs_btree_sblock_verify(bp, mp->m_alloc_mxr[level != 0]); @@ -477,12 +478,8 @@ xfs_allocbt_init_common( ASSERT(btnum == XFS_BTNUM_BNO || btnum == XFS_BTNUM_CNT); - cur = kmem_cache_zalloc(xfs_btree_cur_zone, GFP_NOFS | __GFP_NOFAIL); - - cur->bc_tp = tp; - cur->bc_mp = mp; - cur->bc_btnum = btnum; - cur->bc_blocklog = mp->m_sb.sb_blocklog; + cur = xfs_btree_alloc_cursor(mp, tp, btnum, mp->m_alloc_maxlevels, + xfs_allocbt_cur_cache); cur->bc_ag.abt.active = false; if (btnum == XFS_BTNUM_CNT) { @@ -571,6 +568,17 @@ xfs_allocbt_commit_staged_btree( } } +/* Calculate number of records in an alloc btree block. */ +static inline unsigned int +xfs_allocbt_block_maxrecs( + unsigned int blocklen, + bool leaf) +{ + if (leaf) + return blocklen / sizeof(xfs_alloc_rec_t); + return blocklen / (sizeof(xfs_alloc_key_t) + sizeof(xfs_alloc_ptr_t)); +} + /* * Calculate number of records in an alloc btree block. */ @@ -581,10 +589,26 @@ xfs_allocbt_maxrecs( int leaf) { blocklen -= XFS_ALLOC_BLOCK_LEN(mp); + return xfs_allocbt_block_maxrecs(blocklen, leaf); +} - if (leaf) - return blocklen / sizeof(xfs_alloc_rec_t); - return blocklen / (sizeof(xfs_alloc_key_t) + sizeof(xfs_alloc_ptr_t)); +/* Free space btrees are at their largest when every other block is free. */ +#define XFS_MAX_FREESP_RECORDS ((XFS_MAX_AG_BLOCKS + 1) / 2) + +/* Compute the max possible height for free space btrees. */ +unsigned int +xfs_allocbt_maxlevels_ondisk(void) +{ + unsigned int minrecs[2]; + unsigned int blocklen; + + blocklen = min(XFS_MIN_BLOCKSIZE - XFS_BTREE_SBLOCK_LEN, + XFS_MIN_CRC_BLOCKSIZE - XFS_BTREE_SBLOCK_CRC_LEN); + + minrecs[0] = xfs_allocbt_block_maxrecs(blocklen, true) / 2; + minrecs[1] = xfs_allocbt_block_maxrecs(blocklen, false) / 2; + + return xfs_btree_compute_maxlevels(minrecs, XFS_MAX_FREESP_RECORDS); } /* Calculate the freespace btree size for some records. */ @@ -595,3 +619,22 @@ xfs_allocbt_calc_size( { return xfs_btree_calc_size(mp->m_alloc_mnr, len); } + +int __init +xfs_allocbt_init_cur_cache(void) +{ + xfs_allocbt_cur_cache = kmem_cache_create("xfs_bnobt_cur", + xfs_btree_cur_sizeof(xfs_allocbt_maxlevels_ondisk()), + 0, 0, NULL); + + if (!xfs_allocbt_cur_cache) + return -ENOMEM; + return 0; +} + +void +xfs_allocbt_destroy_cur_cache(void) +{ + kmem_cache_destroy(xfs_allocbt_cur_cache); + xfs_allocbt_cur_cache = NULL; +} diff --git a/fs/xfs/libxfs/xfs_alloc_btree.h b/fs/xfs/libxfs/xfs_alloc_btree.h index 2f6b816aaf9f..45df893ef6bb 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.h +++ b/fs/xfs/libxfs/xfs_alloc_btree.h @@ -60,4 +60,9 @@ extern xfs_extlen_t xfs_allocbt_calc_size(struct xfs_mount *mp, void xfs_allocbt_commit_staged_btree(struct xfs_btree_cur *cur, struct xfs_trans *tp, struct xfs_buf *agbp); +unsigned int xfs_allocbt_maxlevels_ondisk(void); + +int __init xfs_allocbt_init_cur_cache(void); +void xfs_allocbt_destroy_cur_cache(void); + #endif /* __XFS_ALLOC_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index e1d11e314228..014daa8c542d 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -770,7 +770,7 @@ xfs_attr_fork_remove( ASSERT(ip->i_afp->if_nextents == 0); xfs_idestroy_fork(ip->i_afp); - kmem_cache_free(xfs_ifork_zone, ip->i_afp); + kmem_cache_free(xfs_ifork_cache, ip->i_afp); ip->i_afp = NULL; ip->i_forkoff = 0; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index b48230f1a361..4dccd4d90622 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -37,8 +37,7 @@ #include "xfs_icache.h" #include "xfs_iomap.h" - -kmem_zone_t *xfs_bmap_free_item_zone; +struct kmem_cache *xfs_bmap_intent_cache; /* * Miscellaneous helper functions @@ -93,6 +92,7 @@ xfs_bmap_compute_maxlevels( maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs; } mp->m_bm_maxlevels[whichfork] = level; + ASSERT(mp->m_bm_maxlevels[whichfork] <= xfs_bmbt_maxlevels_ondisk()); } unsigned int @@ -239,11 +239,11 @@ xfs_bmap_get_bp( if (!cur) return NULL; - for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) { - if (!cur->bc_bufs[i]) + for (i = 0; i < cur->bc_maxlevels; i++) { + if (!cur->bc_levels[i].bp) break; - if (xfs_buf_daddr(cur->bc_bufs[i]) == bno) - return cur->bc_bufs[i]; + if (xfs_buf_daddr(cur->bc_levels[i].bp) == bno) + return cur->bc_levels[i].bp; } /* Chase down all the log items to see if the bp is there */ @@ -316,7 +316,7 @@ xfs_check_block( */ STATIC void xfs_bmap_check_leaf_extents( - xfs_btree_cur_t *cur, /* btree cursor or null */ + struct xfs_btree_cur *cur, /* btree cursor or null */ xfs_inode_t *ip, /* incore inode pointer */ int whichfork) /* data or attr fork */ { @@ -522,56 +522,6 @@ xfs_bmap_validate_ret( #endif /* DEBUG */ /* - * bmap free list manipulation functions - */ - -/* - * Add the extent to the list of extents to be free at transaction end. - * The list is maintained sorted (by block number). - */ -void -__xfs_bmap_add_free( - struct xfs_trans *tp, - xfs_fsblock_t bno, - xfs_filblks_t len, - const struct xfs_owner_info *oinfo, - bool skip_discard) -{ - struct xfs_extent_free_item *new; /* new element */ -#ifdef DEBUG - struct xfs_mount *mp = tp->t_mountp; - xfs_agnumber_t agno; - xfs_agblock_t agbno; - - ASSERT(bno != NULLFSBLOCK); - ASSERT(len > 0); - ASSERT(len <= MAXEXTLEN); - ASSERT(!isnullstartblock(bno)); - agno = XFS_FSB_TO_AGNO(mp, bno); - agbno = XFS_FSB_TO_AGBNO(mp, bno); - ASSERT(agno < mp->m_sb.sb_agcount); - ASSERT(agbno < mp->m_sb.sb_agblocks); - ASSERT(len < mp->m_sb.sb_agblocks); - ASSERT(agbno + len <= mp->m_sb.sb_agblocks); -#endif - ASSERT(xfs_bmap_free_item_zone != NULL); - - new = kmem_cache_alloc(xfs_bmap_free_item_zone, - GFP_KERNEL | __GFP_NOFAIL); - new->xefi_startblock = bno; - new->xefi_blockcount = (xfs_extlen_t)len; - if (oinfo) - new->xefi_oinfo = *oinfo; - else - new->xefi_oinfo = XFS_RMAP_OINFO_SKIP_UPDATE; - new->xefi_skip_discard = skip_discard; - trace_xfs_bmap_free_defer(tp->t_mountp, - XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0, - XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len); - xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &new->xefi_list); -} - -/* * Inode fork format manipulation functions */ @@ -625,12 +575,12 @@ xfs_bmap_btree_to_extents( if ((error = xfs_btree_check_block(cur, cblock, 0, cbp))) return error; xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork); - xfs_bmap_add_free(cur->bc_tp, cbno, 1, &oinfo); + xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo); ip->i_nblocks--; xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); xfs_trans_binval(tp, cbp); - if (cur->bc_bufs[0] == cbp) - cur->bc_bufs[0] = NULL; + if (cur->bc_levels[0].bp == cbp) + cur->bc_levels[0].bp = NULL; xfs_iroot_realloc(ip, -1, whichfork); ASSERT(ifp->if_broot == NULL); ifp->if_format = XFS_DINODE_FMT_EXTENTS; @@ -925,7 +875,7 @@ xfs_bmap_add_attrfork_btree( int *flags) /* inode logging flags */ { struct xfs_btree_block *block = ip->i_df.if_broot; - xfs_btree_cur_t *cur; /* btree cursor */ + struct xfs_btree_cur *cur; /* btree cursor */ int error; /* error return value */ xfs_mount_t *mp; /* file system mount struct */ int stat; /* newroot status */ @@ -968,7 +918,7 @@ xfs_bmap_add_attrfork_extents( struct xfs_inode *ip, /* incore inode pointer */ int *flags) /* inode logging flags */ { - xfs_btree_cur_t *cur; /* bmap btree cursor */ + struct xfs_btree_cur *cur; /* bmap btree cursor */ int error; /* error return value */ if (ip->i_df.if_nextents * sizeof(struct xfs_bmbt_rec) <= @@ -1988,11 +1938,11 @@ xfs_bmap_add_extent_unwritten_real( xfs_inode_t *ip, /* incore inode pointer */ int whichfork, struct xfs_iext_cursor *icur, - xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ + struct xfs_btree_cur **curp, /* if *curp is null, not a btree */ xfs_bmbt_irec_t *new, /* new data to add to file extents */ int *logflagsp) /* inode logging flags */ { - xfs_btree_cur_t *cur; /* btree cursor */ + struct xfs_btree_cur *cur; /* btree cursor */ int error; /* error return value */ int i; /* temp state */ struct xfs_ifork *ifp; /* inode fork pointer */ @@ -5045,7 +4995,7 @@ xfs_bmap_del_extent_real( xfs_inode_t *ip, /* incore inode pointer */ xfs_trans_t *tp, /* current transaction pointer */ struct xfs_iext_cursor *icur, - xfs_btree_cur_t *cur, /* if null, not a btree */ + struct xfs_btree_cur *cur, /* if null, not a btree */ xfs_bmbt_irec_t *del, /* data to remove from extents */ int *logflagsp, /* inode logging flags */ int whichfork, /* data or attr fork */ @@ -5296,7 +5246,7 @@ xfs_bmap_del_extent_real( if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) { xfs_refcount_decrease_extent(tp, del); } else { - __xfs_bmap_add_free(tp, del->br_startblock, + __xfs_free_extent_later(tp, del->br_startblock, del->br_blockcount, NULL, (bflags & XFS_BMAPI_NODISCARD) || del->br_state == XFS_EXT_UNWRITTEN); @@ -6189,7 +6139,7 @@ __xfs_bmap_add( bmap->br_blockcount, bmap->br_state); - bi = kmem_alloc(sizeof(struct xfs_bmap_intent), KM_NOFS); + bi = kmem_cache_alloc(xfs_bmap_intent_cache, GFP_NOFS | __GFP_NOFAIL); INIT_LIST_HEAD(&bi->bi_list); bi->bi_type = type; bi->bi_owner = ip; @@ -6300,3 +6250,20 @@ xfs_bmap_validate_extent( return __this_address; return NULL; } + +int __init +xfs_bmap_intent_init_cache(void) +{ + xfs_bmap_intent_cache = kmem_cache_create("xfs_bmap_intent", + sizeof(struct xfs_bmap_intent), + 0, 0, NULL); + + return xfs_bmap_intent_cache != NULL ? 0 : -ENOMEM; +} + +void +xfs_bmap_intent_destroy_cache(void) +{ + kmem_cache_destroy(xfs_bmap_intent_cache); + xfs_bmap_intent_cache = NULL; +} diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index 67641f669918..03d9aaf87413 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -13,8 +13,6 @@ struct xfs_inode; struct xfs_mount; struct xfs_trans; -extern kmem_zone_t *xfs_bmap_free_item_zone; - /* * Argument structure for xfs_bmap_alloc. */ @@ -44,19 +42,6 @@ struct xfs_bmalloca { int flags; }; -/* - * List of extents to be free "later". - * The list is kept sorted on xbf_startblock. - */ -struct xfs_extent_free_item -{ - xfs_fsblock_t xefi_startblock;/* starting fs block number */ - xfs_extlen_t xefi_blockcount;/* number of blocks in extent */ - bool xefi_skip_discard; - struct list_head xefi_list; - struct xfs_owner_info xefi_oinfo; /* extent owner */ -}; - #define XFS_BMAP_MAX_NMAP 4 /* @@ -189,9 +174,6 @@ unsigned int xfs_bmap_compute_attr_offset(struct xfs_mount *mp); int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd); void xfs_bmap_local_to_extents_empty(struct xfs_trans *tp, struct xfs_inode *ip, int whichfork); -void __xfs_bmap_add_free(struct xfs_trans *tp, xfs_fsblock_t bno, - xfs_filblks_t len, const struct xfs_owner_info *oinfo, - bool skip_discard); void xfs_bmap_compute_maxlevels(struct xfs_mount *mp, int whichfork); int xfs_bmap_first_unused(struct xfs_trans *tp, struct xfs_inode *ip, xfs_extlen_t len, xfs_fileoff_t *unused, int whichfork); @@ -239,16 +221,6 @@ int xfs_bmap_add_extent_unwritten_real(struct xfs_trans *tp, struct xfs_iext_cursor *icur, struct xfs_btree_cur **curp, struct xfs_bmbt_irec *new, int *logflagsp); -static inline void -xfs_bmap_add_free( - struct xfs_trans *tp, - xfs_fsblock_t bno, - xfs_filblks_t len, - const struct xfs_owner_info *oinfo) -{ - __xfs_bmap_add_free(tp, bno, len, oinfo, false); -} - enum xfs_bmap_intent_type { XFS_BMAP_MAP = 1, XFS_BMAP_UNMAP, @@ -257,8 +229,8 @@ enum xfs_bmap_intent_type { struct xfs_bmap_intent { struct list_head bi_list; enum xfs_bmap_intent_type bi_type; - struct xfs_inode *bi_owner; int bi_whichfork; + struct xfs_inode *bi_owner; struct xfs_bmbt_irec bi_bmap; }; @@ -290,4 +262,9 @@ int xfs_bmapi_remap(struct xfs_trans *tp, struct xfs_inode *ip, xfs_fileoff_t bno, xfs_filblks_t len, xfs_fsblock_t startblock, int flags); +extern struct kmem_cache *xfs_bmap_intent_cache; + +int __init xfs_bmap_intent_init_cache(void); +void xfs_bmap_intent_destroy_cache(void); + #endif /* __XFS_BMAP_H__ */ diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index 72444b8b38a6..453309fc85f2 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -22,6 +22,8 @@ #include "xfs_trace.h" #include "xfs_rmap.h" +static struct kmem_cache *xfs_bmbt_cur_cache; + /* * Convert on-disk form of btree root to in-memory form. */ @@ -286,7 +288,7 @@ xfs_bmbt_free_block( struct xfs_owner_info oinfo; xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_ino.whichfork); - xfs_bmap_add_free(cur->bc_tp, fsbno, 1, &oinfo); + xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo); ip->i_nblocks--; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); @@ -552,13 +554,9 @@ xfs_bmbt_init_cursor( struct xfs_btree_cur *cur; ASSERT(whichfork != XFS_COW_FORK); - cur = kmem_cache_zalloc(xfs_btree_cur_zone, GFP_NOFS | __GFP_NOFAIL); - - cur->bc_tp = tp; - cur->bc_mp = mp; + cur = xfs_btree_alloc_cursor(mp, tp, XFS_BTNUM_BMAP, + mp->m_bm_maxlevels[whichfork], xfs_bmbt_cur_cache); cur->bc_nlevels = be16_to_cpu(ifp->if_broot->bb_level) + 1; - cur->bc_btnum = XFS_BTNUM_BMAP; - cur->bc_blocklog = mp->m_sb.sb_blocklog; cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_bmbt_2); cur->bc_ops = &xfs_bmbt_ops; @@ -575,6 +573,17 @@ xfs_bmbt_init_cursor( return cur; } +/* Calculate number of records in a block mapping btree block. */ +static inline unsigned int +xfs_bmbt_block_maxrecs( + unsigned int blocklen, + bool leaf) +{ + if (leaf) + return blocklen / sizeof(xfs_bmbt_rec_t); + return blocklen / (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t)); +} + /* * Calculate number of records in a bmap btree block. */ @@ -585,10 +594,24 @@ xfs_bmbt_maxrecs( int leaf) { blocklen -= XFS_BMBT_BLOCK_LEN(mp); + return xfs_bmbt_block_maxrecs(blocklen, leaf); +} - if (leaf) - return blocklen / sizeof(xfs_bmbt_rec_t); - return blocklen / (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t)); +/* Compute the max possible height for block mapping btrees. */ +unsigned int +xfs_bmbt_maxlevels_ondisk(void) +{ + unsigned int minrecs[2]; + unsigned int blocklen; + + blocklen = min(XFS_MIN_BLOCKSIZE - XFS_BTREE_SBLOCK_LEN, + XFS_MIN_CRC_BLOCKSIZE - XFS_BTREE_SBLOCK_CRC_LEN); + + minrecs[0] = xfs_bmbt_block_maxrecs(blocklen, true) / 2; + minrecs[1] = xfs_bmbt_block_maxrecs(blocklen, false) / 2; + + /* One extra level for the inode root. */ + return xfs_btree_compute_maxlevels(minrecs, MAXEXTNUM) + 1; } /* @@ -654,3 +677,22 @@ xfs_bmbt_calc_size( { return xfs_btree_calc_size(mp->m_bmap_dmnr, len); } + +int __init +xfs_bmbt_init_cur_cache(void) +{ + xfs_bmbt_cur_cache = kmem_cache_create("xfs_bmbt_cur", + xfs_btree_cur_sizeof(xfs_bmbt_maxlevels_ondisk()), + 0, 0, NULL); + + if (!xfs_bmbt_cur_cache) + return -ENOMEM; + return 0; +} + +void +xfs_bmbt_destroy_cur_cache(void) +{ + kmem_cache_destroy(xfs_bmbt_cur_cache); + xfs_bmbt_cur_cache = NULL; +} diff --git a/fs/xfs/libxfs/xfs_bmap_btree.h b/fs/xfs/libxfs/xfs_bmap_btree.h index 729e3bc569be..3e7a40a83835 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.h +++ b/fs/xfs/libxfs/xfs_bmap_btree.h @@ -110,4 +110,9 @@ extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *, extern unsigned long long xfs_bmbt_calc_size(struct xfs_mount *mp, unsigned long long len); +unsigned int xfs_bmbt_maxlevels_ondisk(void); + +int __init xfs_bmbt_init_cur_cache(void); +void xfs_bmbt_destroy_cur_cache(void); + #endif /* __XFS_BMAP_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 298395481713..b4e19aacb9de 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -22,11 +22,11 @@ #include "xfs_log.h" #include "xfs_btree_staging.h" #include "xfs_ag.h" - -/* - * Cursor allocation zone. - */ -kmem_zone_t *xfs_btree_cur_zone; +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_rmap_btree.h" +#include "xfs_refcount_btree.h" /* * Btree magic numbers. @@ -367,8 +367,8 @@ xfs_btree_del_cursor( * way we won't have initialized all the entries down to 0. */ for (i = 0; i < cur->bc_nlevels; i++) { - if (cur->bc_bufs[i]) - xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]); + if (cur->bc_levels[i].bp) + xfs_trans_brelse(cur->bc_tp, cur->bc_levels[i].bp); else if (!error) break; } @@ -379,7 +379,7 @@ xfs_btree_del_cursor( kmem_free(cur->bc_ops); if (!(cur->bc_flags & XFS_BTREE_LONG_PTRS) && cur->bc_ag.pag) xfs_perag_put(cur->bc_ag.pag); - kmem_cache_free(xfs_btree_cur_zone, cur); + kmem_cache_free(cur->bc_cache, cur); } /* @@ -388,14 +388,14 @@ xfs_btree_del_cursor( */ int /* error */ xfs_btree_dup_cursor( - xfs_btree_cur_t *cur, /* input cursor */ - xfs_btree_cur_t **ncur) /* output cursor */ + struct xfs_btree_cur *cur, /* input cursor */ + struct xfs_btree_cur **ncur) /* output cursor */ { struct xfs_buf *bp; /* btree block's buffer pointer */ int error; /* error return value */ int i; /* level number of btree block */ xfs_mount_t *mp; /* mount structure for filesystem */ - xfs_btree_cur_t *new; /* new cursor value */ + struct xfs_btree_cur *new; /* new cursor value */ xfs_trans_t *tp; /* transaction pointer, can be NULL */ tp = cur->bc_tp; @@ -415,9 +415,9 @@ xfs_btree_dup_cursor( * For each level current, re-get the buffer and copy the ptr value. */ for (i = 0; i < new->bc_nlevels; i++) { - new->bc_ptrs[i] = cur->bc_ptrs[i]; - new->bc_ra[i] = cur->bc_ra[i]; - bp = cur->bc_bufs[i]; + new->bc_levels[i].ptr = cur->bc_levels[i].ptr; + new->bc_levels[i].ra = cur->bc_levels[i].ra; + bp = cur->bc_levels[i].bp; if (bp) { error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, xfs_buf_daddr(bp), mp->m_bsize, @@ -429,7 +429,7 @@ xfs_btree_dup_cursor( return error; } } - new->bc_bufs[i] = bp; + new->bc_levels[i].bp = bp; } *ncur = new; return 0; @@ -681,7 +681,7 @@ xfs_btree_get_block( return xfs_btree_get_iroot(cur); } - *bpp = cur->bc_bufs[level]; + *bpp = cur->bc_levels[level].bp; return XFS_BUF_TO_BLOCK(*bpp); } @@ -691,7 +691,7 @@ xfs_btree_get_block( */ STATIC int /* success=1, failure=0 */ xfs_btree_firstrec( - xfs_btree_cur_t *cur, /* btree cursor */ + struct xfs_btree_cur *cur, /* btree cursor */ int level) /* level to change */ { struct xfs_btree_block *block; /* generic btree block pointer */ @@ -711,7 +711,7 @@ xfs_btree_firstrec( /* * Set the ptr value to 1, that's the first record/key. */ - cur->bc_ptrs[level] = 1; + cur->bc_levels[level].ptr = 1; return 1; } @@ -721,7 +721,7 @@ xfs_btree_firstrec( */ STATIC int /* success=1, failure=0 */ xfs_btree_lastrec( - xfs_btree_cur_t *cur, /* btree cursor */ + struct xfs_btree_cur *cur, /* btree cursor */ int level) /* level to change */ { struct xfs_btree_block *block; /* generic btree block pointer */ @@ -741,7 +741,7 @@ xfs_btree_lastrec( /* * Set the ptr value to numrecs, that's the last record/key. */ - cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs); + cur->bc_levels[level].ptr = be16_to_cpu(block->bb_numrecs); return 1; } @@ -922,11 +922,11 @@ xfs_btree_readahead( (lev == cur->bc_nlevels - 1)) return 0; - if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev]) + if ((cur->bc_levels[lev].ra | lr) == cur->bc_levels[lev].ra) return 0; - cur->bc_ra[lev] |= lr; - block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]); + cur->bc_levels[lev].ra |= lr; + block = XFS_BUF_TO_BLOCK(cur->bc_levels[lev].bp); if (cur->bc_flags & XFS_BTREE_LONG_PTRS) return xfs_btree_readahead_lblock(cur, lr, block); @@ -985,28 +985,28 @@ xfs_btree_readahead_ptr( */ STATIC void xfs_btree_setbuf( - xfs_btree_cur_t *cur, /* btree cursor */ + struct xfs_btree_cur *cur, /* btree cursor */ int lev, /* level in btree */ struct xfs_buf *bp) /* new buffer to set */ { struct xfs_btree_block *b; /* btree block */ - if (cur->bc_bufs[lev]) - xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[lev]); - cur->bc_bufs[lev] = bp; - cur->bc_ra[lev] = 0; + if (cur->bc_levels[lev].bp) + xfs_trans_brelse(cur->bc_tp, cur->bc_levels[lev].bp); + cur->bc_levels[lev].bp = bp; + cur->bc_levels[lev].ra = 0; b = XFS_BUF_TO_BLOCK(bp); if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK)) - cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; + cur->bc_levels[lev].ra |= XFS_BTCUR_LEFTRA; if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK)) - cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA; + cur->bc_levels[lev].ra |= XFS_BTCUR_RIGHTRA; } else { if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK)) - cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; + cur->bc_levels[lev].ra |= XFS_BTCUR_LEFTRA; if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK)) - cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA; + cur->bc_levels[lev].ra |= XFS_BTCUR_RIGHTRA; } } @@ -1548,7 +1548,7 @@ xfs_btree_increment( #endif /* We're done if we remain in the block after the increment. */ - if (++cur->bc_ptrs[level] <= xfs_btree_get_numrecs(block)) + if (++cur->bc_levels[level].ptr <= xfs_btree_get_numrecs(block)) goto out1; /* Fail if we just went off the right edge of the tree. */ @@ -1571,7 +1571,7 @@ xfs_btree_increment( goto error0; #endif - if (++cur->bc_ptrs[lev] <= xfs_btree_get_numrecs(block)) + if (++cur->bc_levels[lev].ptr <= xfs_btree_get_numrecs(block)) break; /* Read-ahead the right block for the next loop. */ @@ -1598,14 +1598,14 @@ xfs_btree_increment( for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) { union xfs_btree_ptr *ptrp; - ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block); + ptrp = xfs_btree_ptr_addr(cur, cur->bc_levels[lev].ptr, block); --lev; error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp); if (error) goto error0; xfs_btree_setbuf(cur, lev, bp); - cur->bc_ptrs[lev] = 1; + cur->bc_levels[lev].ptr = 1; } out1: *stat = 1; @@ -1641,7 +1641,7 @@ xfs_btree_decrement( xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA); /* We're done if we remain in the block after the decrement. */ - if (--cur->bc_ptrs[level] > 0) + if (--cur->bc_levels[level].ptr > 0) goto out1; /* Get a pointer to the btree block. */ @@ -1665,7 +1665,7 @@ xfs_btree_decrement( * Stop when we don't go off the left edge of a block. */ for (lev = level + 1; lev < cur->bc_nlevels; lev++) { - if (--cur->bc_ptrs[lev] > 0) + if (--cur->bc_levels[lev].ptr > 0) break; /* Read-ahead the left block for the next loop. */ xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA); @@ -1691,13 +1691,13 @@ xfs_btree_decrement( for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) { union xfs_btree_ptr *ptrp; - ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block); + ptrp = xfs_btree_ptr_addr(cur, cur->bc_levels[lev].ptr, block); --lev; error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp); if (error) goto error0; xfs_btree_setbuf(cur, lev, bp); - cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block); + cur->bc_levels[lev].ptr = xfs_btree_get_numrecs(block); } out1: *stat = 1; @@ -1735,7 +1735,7 @@ xfs_btree_lookup_get_block( * * Otherwise throw it away and get a new one. */ - bp = cur->bc_bufs[level]; + bp = cur->bc_levels[level].bp; error = xfs_btree_ptr_to_daddr(cur, pp, &daddr); if (error) return error; @@ -1864,7 +1864,7 @@ xfs_btree_lookup( return -EFSCORRUPTED; } - cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE; + cur->bc_levels[0].ptr = dir != XFS_LOOKUP_LE; *stat = 0; return 0; } @@ -1916,7 +1916,7 @@ xfs_btree_lookup( if (error) goto error0; - cur->bc_ptrs[level] = keyno; + cur->bc_levels[level].ptr = keyno; } } @@ -1933,7 +1933,7 @@ xfs_btree_lookup( !xfs_btree_ptr_is_null(cur, &ptr)) { int i; - cur->bc_ptrs[0] = keyno; + cur->bc_levels[0].ptr = keyno; error = xfs_btree_increment(cur, 0, &i); if (error) goto error0; @@ -1944,7 +1944,7 @@ xfs_btree_lookup( } } else if (dir == XFS_LOOKUP_LE && diff > 0) keyno--; - cur->bc_ptrs[0] = keyno; + cur->bc_levels[0].ptr = keyno; /* Return if we succeeded or not. */ if (keyno == 0 || keyno > xfs_btree_get_numrecs(block)) @@ -2104,7 +2104,7 @@ __xfs_btree_updkeys( if (error) return error; #endif - ptr = cur->bc_ptrs[level]; + ptr = cur->bc_levels[level].ptr; nlkey = xfs_btree_key_addr(cur, ptr, block); nhkey = xfs_btree_high_key_addr(cur, ptr, block); if (!force_all && @@ -2171,7 +2171,7 @@ xfs_btree_update_keys( if (error) return error; #endif - ptr = cur->bc_ptrs[level]; + ptr = cur->bc_levels[level].ptr; kp = xfs_btree_key_addr(cur, ptr, block); xfs_btree_copy_keys(cur, kp, &key, 1); xfs_btree_log_keys(cur, bp, ptr, ptr); @@ -2205,7 +2205,7 @@ xfs_btree_update( goto error0; #endif /* Get the address of the rec to be updated. */ - ptr = cur->bc_ptrs[0]; + ptr = cur->bc_levels[0].ptr; rp = xfs_btree_rec_addr(cur, ptr, block); /* Fill in the new contents and log them. */ @@ -2280,7 +2280,7 @@ xfs_btree_lshift( * If the cursor entry is the one that would be moved, don't * do it... it's too complicated. */ - if (cur->bc_ptrs[level] <= 1) + if (cur->bc_levels[level].ptr <= 1) goto out0; /* Set up the left neighbor as "left". */ @@ -2414,7 +2414,7 @@ xfs_btree_lshift( goto error0; /* Slide the cursor value left one. */ - cur->bc_ptrs[level]--; + cur->bc_levels[level].ptr--; *stat = 1; return 0; @@ -2476,7 +2476,7 @@ xfs_btree_rshift( * do it... it's too complicated. */ lrecs = xfs_btree_get_numrecs(left); - if (cur->bc_ptrs[level] >= lrecs) + if (cur->bc_levels[level].ptr >= lrecs) goto out0; /* Set up the right neighbor as "right". */ @@ -2664,7 +2664,7 @@ __xfs_btree_split( */ lrecs = xfs_btree_get_numrecs(left); rrecs = lrecs / 2; - if ((lrecs & 1) && cur->bc_ptrs[level] <= rrecs + 1) + if ((lrecs & 1) && cur->bc_levels[level].ptr <= rrecs + 1) rrecs++; src_index = (lrecs - rrecs + 1); @@ -2760,9 +2760,9 @@ __xfs_btree_split( * If it's just pointing past the last entry in left, then we'll * insert there, so don't change anything in that case. */ - if (cur->bc_ptrs[level] > lrecs + 1) { + if (cur->bc_levels[level].ptr > lrecs + 1) { xfs_btree_setbuf(cur, level, rbp); - cur->bc_ptrs[level] -= lrecs; + cur->bc_levels[level].ptr -= lrecs; } /* * If there are more levels, we'll need another cursor which refers @@ -2772,7 +2772,7 @@ __xfs_btree_split( error = xfs_btree_dup_cursor(cur, curp); if (error) goto error0; - (*curp)->bc_ptrs[level + 1]++; + (*curp)->bc_levels[level + 1].ptr++; } *ptrp = rptr; *stat = 1; @@ -2933,7 +2933,8 @@ xfs_btree_new_iroot( be16_add_cpu(&block->bb_level, 1); xfs_btree_set_numrecs(block, 1); cur->bc_nlevels++; - cur->bc_ptrs[level + 1] = 1; + ASSERT(cur->bc_nlevels <= cur->bc_maxlevels); + cur->bc_levels[level + 1].ptr = 1; kp = xfs_btree_key_addr(cur, 1, block); ckp = xfs_btree_key_addr(cur, 1, cblock); @@ -3094,8 +3095,9 @@ xfs_btree_new_root( /* Fix up the cursor. */ xfs_btree_setbuf(cur, cur->bc_nlevels, nbp); - cur->bc_ptrs[cur->bc_nlevels] = nptr; + cur->bc_levels[cur->bc_nlevels].ptr = nptr; cur->bc_nlevels++; + ASSERT(cur->bc_nlevels <= cur->bc_maxlevels); *stat = 1; return 0; error0: @@ -3152,7 +3154,7 @@ xfs_btree_make_block_unfull( return error; if (*stat) { - *oindex = *index = cur->bc_ptrs[level]; + *oindex = *index = cur->bc_levels[level].ptr; return 0; } @@ -3167,7 +3169,7 @@ xfs_btree_make_block_unfull( return error; - *index = cur->bc_ptrs[level]; + *index = cur->bc_levels[level].ptr; return 0; } @@ -3214,7 +3216,7 @@ xfs_btree_insrec( } /* If we're off the left edge, return failure. */ - ptr = cur->bc_ptrs[level]; + ptr = cur->bc_levels[level].ptr; if (ptr == 0) { *stat = 0; return 0; @@ -3557,7 +3559,7 @@ xfs_btree_kill_iroot( if (error) return error; - cur->bc_bufs[level - 1] = NULL; + cur->bc_levels[level - 1].bp = NULL; be16_add_cpu(&block->bb_level, -1); xfs_trans_log_inode(cur->bc_tp, ip, XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_ino.whichfork)); @@ -3590,8 +3592,8 @@ xfs_btree_kill_root( if (error) return error; - cur->bc_bufs[level] = NULL; - cur->bc_ra[level] = 0; + cur->bc_levels[level].bp = NULL; + cur->bc_levels[level].ra = 0; cur->bc_nlevels--; return 0; @@ -3650,7 +3652,7 @@ xfs_btree_delrec( tcur = NULL; /* Get the index of the entry being deleted, check for nothing there. */ - ptr = cur->bc_ptrs[level]; + ptr = cur->bc_levels[level].ptr; if (ptr == 0) { *stat = 0; return 0; @@ -3960,7 +3962,7 @@ xfs_btree_delrec( xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); tcur = NULL; if (level == 0) - cur->bc_ptrs[0]++; + cur->bc_levels[0].ptr++; *stat = 1; return 0; @@ -4097,9 +4099,9 @@ xfs_btree_delrec( * cursor to the left block, and fix up the index. */ if (bp != lbp) { - cur->bc_bufs[level] = lbp; - cur->bc_ptrs[level] += lrecs; - cur->bc_ra[level] = 0; + cur->bc_levels[level].bp = lbp; + cur->bc_levels[level].ptr += lrecs; + cur->bc_levels[level].ra = 0; } /* * If we joined with the right neighbor and there's a level above @@ -4119,16 +4121,16 @@ xfs_btree_delrec( * We can't use decrement because it would change the next level up. */ if (level > 0) - cur->bc_ptrs[level]--; + cur->bc_levels[level].ptr--; /* * We combined blocks, so we have to update the parent keys if the - * btree supports overlapped intervals. However, bc_ptrs[level + 1] - * points to the old block so that the caller knows which record to - * delete. Therefore, the caller must be savvy enough to call updkeys - * for us if we return stat == 2. The other exit points from this - * function don't require deletions further up the tree, so they can - * call updkeys directly. + * btree supports overlapped intervals. However, + * bc_levels[level + 1].ptr points to the old block so that the caller + * knows which record to delete. Therefore, the caller must be savvy + * enough to call updkeys for us if we return stat == 2. The other + * exit points from this function don't require deletions further up + * the tree, so they can call updkeys directly. */ /* Return value means the next level up has something to do. */ @@ -4182,7 +4184,7 @@ xfs_btree_delete( if (i == 0) { for (level = 1; level < cur->bc_nlevels; level++) { - if (cur->bc_ptrs[level] == 0) { + if (cur->bc_levels[level].ptr == 0) { error = xfs_btree_decrement(cur, level, &i); if (error) goto error0; @@ -4213,7 +4215,7 @@ xfs_btree_get_rec( int error; /* error return value */ #endif - ptr = cur->bc_ptrs[0]; + ptr = cur->bc_levels[0].ptr; block = xfs_btree_get_block(cur, 0, &bp); #ifdef DEBUG @@ -4512,21 +4514,76 @@ xfs_btree_sblock_verify( } /* - * Calculate the number of btree levels needed to store a given number of - * records in a short-format btree. + * For the given limits on leaf and keyptr records per block, calculate the + * height of the tree needed to index the number of leaf records. */ -uint +unsigned int xfs_btree_compute_maxlevels( - uint *limits, - unsigned long len) + const unsigned int *limits, + unsigned long long records) +{ + unsigned long long level_blocks = howmany_64(records, limits[0]); + unsigned int height = 1; + + while (level_blocks > 1) { + level_blocks = howmany_64(level_blocks, limits[1]); + height++; + } + + return height; +} + +/* + * For the given limits on leaf and keyptr records per block, calculate the + * number of blocks needed to index the given number of leaf records. + */ +unsigned long long +xfs_btree_calc_size( + const unsigned int *limits, + unsigned long long records) +{ + unsigned long long level_blocks = howmany_64(records, limits[0]); + unsigned long long blocks = level_blocks; + + while (level_blocks > 1) { + level_blocks = howmany_64(level_blocks, limits[1]); + blocks += level_blocks; + } + + return blocks; +} + +/* + * Given a number of available blocks for the btree to consume with records and + * pointers, calculate the height of the tree needed to index all the records + * that space can hold based on the number of pointers each interior node + * holds. + * + * We start by assuming a single level tree consumes a single block, then track + * the number of blocks each node level consumes until we no longer have space + * to store the next node level. At this point, we are indexing all the leaf + * blocks in the space, and there's no more free space to split the tree any + * further. That's our maximum btree height. + */ +unsigned int +xfs_btree_space_to_height( + const unsigned int *limits, + unsigned long long leaf_blocks) { - uint level; - unsigned long maxblocks; + unsigned long long node_blocks = limits[1]; + unsigned long long blocks_left = leaf_blocks - 1; + unsigned int height = 1; + + if (leaf_blocks < 1) + return 0; + + while (node_blocks < blocks_left) { + blocks_left -= node_blocks; + node_blocks *= limits[1]; + height++; + } - maxblocks = (len + limits[0] - 1) / limits[0]; - for (level = 1; maxblocks > 1; level++) - maxblocks = (maxblocks + limits[1] - 1) / limits[1]; - return level; + return height; } /* @@ -4661,23 +4718,25 @@ xfs_btree_overlapped_query_range( if (error) goto out; #endif - cur->bc_ptrs[level] = 1; + cur->bc_levels[level].ptr = 1; while (level < cur->bc_nlevels) { block = xfs_btree_get_block(cur, level, &bp); /* End of node, pop back towards the root. */ - if (cur->bc_ptrs[level] > be16_to_cpu(block->bb_numrecs)) { + if (cur->bc_levels[level].ptr > + be16_to_cpu(block->bb_numrecs)) { pop_up: if (level < cur->bc_nlevels - 1) - cur->bc_ptrs[level + 1]++; + cur->bc_levels[level + 1].ptr++; level++; continue; } if (level == 0) { /* Handle a leaf node. */ - recp = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block); + recp = xfs_btree_rec_addr(cur, cur->bc_levels[0].ptr, + block); cur->bc_ops->init_high_key_from_rec(&rec_hkey, recp); ldiff = cur->bc_ops->diff_two_keys(cur, &rec_hkey, @@ -4700,14 +4759,15 @@ pop_up: /* Record is larger than high key; pop. */ goto pop_up; } - cur->bc_ptrs[level]++; + cur->bc_levels[level].ptr++; continue; } /* Handle an internal node. */ - lkp = xfs_btree_key_addr(cur, cur->bc_ptrs[level], block); - hkp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level], block); - pp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[level], block); + lkp = xfs_btree_key_addr(cur, cur->bc_levels[level].ptr, block); + hkp = xfs_btree_high_key_addr(cur, cur->bc_levels[level].ptr, + block); + pp = xfs_btree_ptr_addr(cur, cur->bc_levels[level].ptr, block); ldiff = cur->bc_ops->diff_two_keys(cur, hkp, low_key); hdiff = cur->bc_ops->diff_two_keys(cur, high_key, lkp); @@ -4730,13 +4790,13 @@ pop_up: if (error) goto out; #endif - cur->bc_ptrs[level] = 1; + cur->bc_levels[level].ptr = 1; continue; } else if (hdiff < 0) { /* The low key is larger than the upper range; pop. */ goto pop_up; } - cur->bc_ptrs[level]++; + cur->bc_levels[level].ptr++; } out: @@ -4747,13 +4807,14 @@ out: * with a zero-results range query, so release the buffers if we * failed to return any results. */ - if (cur->bc_bufs[0] == NULL) { + if (cur->bc_levels[0].bp == NULL) { for (i = 0; i < cur->bc_nlevels; i++) { - if (cur->bc_bufs[i]) { - xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]); - cur->bc_bufs[i] = NULL; - cur->bc_ptrs[i] = 0; - cur->bc_ra[i] = 0; + if (cur->bc_levels[i].bp) { + xfs_trans_brelse(cur->bc_tp, + cur->bc_levels[i].bp); + cur->bc_levels[i].bp = NULL; + cur->bc_levels[i].ptr = 0; + cur->bc_levels[i].ra = 0; } } } @@ -4816,29 +4877,6 @@ xfs_btree_query_all( return xfs_btree_simple_query_range(cur, &low_key, &high_key, fn, priv); } -/* - * Calculate the number of blocks needed to store a given number of records - * in a short-format (per-AG metadata) btree. - */ -unsigned long long -xfs_btree_calc_size( - uint *limits, - unsigned long long len) -{ - int level; - int maxrecs; - unsigned long long rval; - - maxrecs = limits[0]; - for (level = 0, rval = 0; len > 1; level++) { - len += maxrecs - 1; - do_div(len, maxrecs); - maxrecs = limits[1]; - rval += len; - } - return rval; -} - static int xfs_btree_count_blocks_helper( struct xfs_btree_cur *cur, @@ -4915,7 +4953,7 @@ xfs_btree_has_more_records( block = xfs_btree_get_block(cur, 0, &bp); /* There are still records in this block. */ - if (cur->bc_ptrs[0] < xfs_btree_get_numrecs(block)) + if (cur->bc_levels[0].ptr < xfs_btree_get_numrecs(block)) return true; /* There are more record blocks. */ @@ -4924,3 +4962,42 @@ xfs_btree_has_more_records( else return block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK); } + +/* Set up all the btree cursor caches. */ +int __init +xfs_btree_init_cur_caches(void) +{ + int error; + + error = xfs_allocbt_init_cur_cache(); + if (error) + return error; + error = xfs_inobt_init_cur_cache(); + if (error) + goto err; + error = xfs_bmbt_init_cur_cache(); + if (error) + goto err; + error = xfs_rmapbt_init_cur_cache(); + if (error) + goto err; + error = xfs_refcountbt_init_cur_cache(); + if (error) + goto err; + + return 0; +err: + xfs_btree_destroy_cur_caches(); + return error; +} + +/* Destroy all the btree cursor caches, if they've been allocated. */ +void +xfs_btree_destroy_cur_caches(void) +{ + xfs_allocbt_destroy_cur_cache(); + xfs_inobt_destroy_cur_cache(); + xfs_bmbt_destroy_cur_cache(); + xfs_rmapbt_destroy_cur_cache(); + xfs_refcountbt_destroy_cur_cache(); +} diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index 4eaf8517f850..22d9f411fde6 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -13,8 +13,6 @@ struct xfs_trans; struct xfs_ifork; struct xfs_perag; -extern kmem_zone_t *xfs_btree_cur_zone; - /* * Generic key, ptr and record wrapper structures. * @@ -92,8 +90,6 @@ uint32_t xfs_btree_magic(int crc, xfs_btnum_t btnum); #define XFS_BTREE_STATS_ADD(cur, stat, val) \ XFS_STATS_ADD_OFF((cur)->bc_mp, (cur)->bc_statoff + __XBTS_ ## stat, val) -#define XFS_BTREE_MAXLEVELS 9 /* max of all btrees */ - struct xfs_btree_ops { /* size of the key and record structures */ size_t key_len; @@ -181,18 +177,18 @@ union xfs_btree_irec { /* Per-AG btree information. */ struct xfs_btree_cur_ag { - struct xfs_perag *pag; + struct xfs_perag *pag; union { struct xfs_buf *agbp; struct xbtree_afakeroot *afake; /* for staging cursor */ }; union { struct { - unsigned long nr_ops; /* # record updates */ - int shape_changes; /* # of extent splits */ + unsigned int nr_ops; /* # record updates */ + unsigned int shape_changes; /* # of extent splits */ } refc; struct { - bool active; /* allocation cursor state */ + bool active; /* allocation cursor state */ } abt; }; }; @@ -212,26 +208,35 @@ struct xfs_btree_cur_ino { #define XFS_BTCUR_BMBT_INVALID_OWNER (1 << 1) }; +struct xfs_btree_level { + /* buffer pointer */ + struct xfs_buf *bp; + + /* key/record number */ + uint16_t ptr; + + /* readahead info */ +#define XFS_BTCUR_LEFTRA (1 << 0) /* left sibling has been read-ahead */ +#define XFS_BTCUR_RIGHTRA (1 << 1) /* right sibling has been read-ahead */ + uint16_t ra; +}; + /* * Btree cursor structure. * This collects all information needed by the btree code in one place. */ -typedef struct xfs_btree_cur +struct xfs_btree_cur { struct xfs_trans *bc_tp; /* transaction we're in, if any */ struct xfs_mount *bc_mp; /* file system mount struct */ const struct xfs_btree_ops *bc_ops; - uint bc_flags; /* btree features - below */ + struct kmem_cache *bc_cache; /* cursor cache */ + unsigned int bc_flags; /* btree features - below */ + xfs_btnum_t bc_btnum; /* identifies which btree type */ union xfs_btree_irec bc_rec; /* current insert/search record value */ - struct xfs_buf *bc_bufs[XFS_BTREE_MAXLEVELS]; /* buf ptr per level */ - int bc_ptrs[XFS_BTREE_MAXLEVELS]; /* key/record # */ - uint8_t bc_ra[XFS_BTREE_MAXLEVELS]; /* readahead bits */ -#define XFS_BTCUR_LEFTRA 1 /* left sibling has been read-ahead */ -#define XFS_BTCUR_RIGHTRA 2 /* right sibling has been read-ahead */ - uint8_t bc_nlevels; /* number of levels in the tree */ - uint8_t bc_blocklog; /* log2(blocksize) of btree blocks */ - xfs_btnum_t bc_btnum; /* identifies which btree type */ - int bc_statoff; /* offset of btre stats array */ + uint8_t bc_nlevels; /* number of levels in the tree */ + uint8_t bc_maxlevels; /* maximum levels for this btree type */ + int bc_statoff; /* offset of btree stats array */ /* * Short btree pointers need an agno to be able to turn the pointers @@ -243,7 +248,21 @@ typedef struct xfs_btree_cur struct xfs_btree_cur_ag bc_ag; struct xfs_btree_cur_ino bc_ino; }; -} xfs_btree_cur_t; + + /* Must be at the end of the struct! */ + struct xfs_btree_level bc_levels[]; +}; + +/* + * Compute the size of a btree cursor that can handle a btree of a given + * height. The bc_levels array handles node and leaf blocks, so its size + * is exactly nlevels. + */ +static inline size_t +xfs_btree_cur_sizeof(unsigned int nlevels) +{ + return struct_size((struct xfs_btree_cur *)NULL, bc_levels, nlevels); +} /* cursor flags */ #define XFS_BTREE_LONG_PTRS (1<<0) /* pointers are 64bits long */ @@ -258,7 +277,6 @@ typedef struct xfs_btree_cur */ #define XFS_BTREE_STAGING (1<<5) - #define XFS_BTREE_NOERROR 0 #define XFS_BTREE_ERROR 1 @@ -309,7 +327,7 @@ xfs_btree_check_sptr( */ void xfs_btree_del_cursor( - xfs_btree_cur_t *cur, /* btree cursor */ + struct xfs_btree_cur *cur, /* btree cursor */ int error); /* del because of error */ /* @@ -318,8 +336,8 @@ xfs_btree_del_cursor( */ int /* error */ xfs_btree_dup_cursor( - xfs_btree_cur_t *cur, /* input cursor */ - xfs_btree_cur_t **ncur);/* output cursor */ + struct xfs_btree_cur *cur, /* input cursor */ + struct xfs_btree_cur **ncur);/* output cursor */ /* * Compute first and last byte offsets for the fields given. @@ -460,8 +478,12 @@ xfs_failaddr_t xfs_btree_lblock_v5hdr_verify(struct xfs_buf *bp, xfs_failaddr_t xfs_btree_lblock_verify(struct xfs_buf *bp, unsigned int max_recs); -uint xfs_btree_compute_maxlevels(uint *limits, unsigned long len); -unsigned long long xfs_btree_calc_size(uint *limits, unsigned long long len); +unsigned int xfs_btree_compute_maxlevels(const unsigned int *limits, + unsigned long long records); +unsigned long long xfs_btree_calc_size(const unsigned int *limits, + unsigned long long records); +unsigned int xfs_btree_space_to_height(const unsigned int *limits, + unsigned long long blocks); /* * Return codes for the query range iterator function are 0 to continue @@ -527,7 +549,7 @@ struct xfs_ifork *xfs_btree_ifork_ptr(struct xfs_btree_cur *cur); /* Does this cursor point to the last block in the given level? */ static inline bool xfs_btree_islastblock( - xfs_btree_cur_t *cur, + struct xfs_btree_cur *cur, int level) { struct xfs_btree_block *block; @@ -558,4 +580,27 @@ void xfs_btree_copy_keys(struct xfs_btree_cur *cur, union xfs_btree_key *dst_key, const union xfs_btree_key *src_key, int numkeys); +static inline struct xfs_btree_cur * +xfs_btree_alloc_cursor( + struct xfs_mount *mp, + struct xfs_trans *tp, + xfs_btnum_t btnum, + uint8_t maxlevels, + struct kmem_cache *cache) +{ + struct xfs_btree_cur *cur; + + cur = kmem_cache_zalloc(cache, GFP_NOFS | __GFP_NOFAIL); + cur->bc_tp = tp; + cur->bc_mp = mp; + cur->bc_btnum = btnum; + cur->bc_maxlevels = maxlevels; + cur->bc_cache = cache; + + return cur; +} + +int __init xfs_btree_init_cur_caches(void); +void xfs_btree_destroy_cur_caches(void); + #endif /* __XFS_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_btree_staging.c b/fs/xfs/libxfs/xfs_btree_staging.c index ac9e80152b5c..dd75e208b543 100644 --- a/fs/xfs/libxfs/xfs_btree_staging.c +++ b/fs/xfs/libxfs/xfs_btree_staging.c @@ -657,12 +657,12 @@ xfs_btree_bload_compute_geometry( * checking levels 0 and 1 here, so set bc_nlevels such that the btree * code doesn't interpret either as the root level. */ - cur->bc_nlevels = XFS_BTREE_MAXLEVELS - 1; + cur->bc_nlevels = cur->bc_maxlevels - 1; xfs_btree_bload_ensure_slack(cur, &bbl->leaf_slack, 0); xfs_btree_bload_ensure_slack(cur, &bbl->node_slack, 1); bbl->nr_records = nr_this_level = nr_records; - for (cur->bc_nlevels = 1; cur->bc_nlevels < XFS_BTREE_MAXLEVELS;) { + for (cur->bc_nlevels = 1; cur->bc_nlevels <= cur->bc_maxlevels;) { uint64_t level_blocks; uint64_t dontcare64; unsigned int level = cur->bc_nlevels - 1; @@ -703,6 +703,7 @@ xfs_btree_bload_compute_geometry( * block-based btree level. */ cur->bc_nlevels++; + ASSERT(cur->bc_nlevels <= cur->bc_maxlevels); xfs_btree_bload_level_geometry(cur, bbl, level, nr_this_level, &avg_per_block, &level_blocks, &dontcare64); @@ -718,13 +719,14 @@ xfs_btree_bload_compute_geometry( /* Otherwise, we need another level of btree. */ cur->bc_nlevels++; + ASSERT(cur->bc_nlevels <= cur->bc_maxlevels); } nr_blocks += level_blocks; nr_this_level = level_blocks; } - if (cur->bc_nlevels == XFS_BTREE_MAXLEVELS) + if (cur->bc_nlevels > cur->bc_maxlevels) return -EOVERFLOW; bbl->btree_height = cur->bc_nlevels; diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index c062e2c85178..dd7a2dbce1d1 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -72,7 +72,7 @@ STATIC int xfs_da3_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *save_blk); -kmem_zone_t *xfs_da_state_zone; /* anchor for state struct zone */ +struct kmem_cache *xfs_da_state_cache; /* anchor for dir/attr state */ /* * Allocate a dir-state structure. @@ -84,7 +84,7 @@ xfs_da_state_alloc( { struct xfs_da_state *state; - state = kmem_cache_zalloc(xfs_da_state_zone, GFP_NOFS | __GFP_NOFAIL); + state = kmem_cache_zalloc(xfs_da_state_cache, GFP_NOFS | __GFP_NOFAIL); state->args = args; state->mp = args->dp->i_mount; return state; @@ -113,7 +113,7 @@ xfs_da_state_free(xfs_da_state_t *state) #ifdef DEBUG memset((char *)state, 0, sizeof(*state)); #endif /* DEBUG */ - kmem_cache_free(xfs_da_state_zone, state); + kmem_cache_free(xfs_da_state_cache, state); } static inline int xfs_dabuf_nfsb(struct xfs_mount *mp, int whichfork) diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h index ad5dd324631a..0faf7d9ac241 100644 --- a/fs/xfs/libxfs/xfs_da_btree.h +++ b/fs/xfs/libxfs/xfs_da_btree.h @@ -9,7 +9,6 @@ struct xfs_inode; struct xfs_trans; -struct zone; /* * Directory/attribute geometry information. There will be one of these for each @@ -227,6 +226,6 @@ void xfs_da3_node_hdr_from_disk(struct xfs_mount *mp, void xfs_da3_node_hdr_to_disk(struct xfs_mount *mp, struct xfs_da_intnode *to, struct xfs_da3_icnode_hdr *from); -extern struct kmem_zone *xfs_da_state_zone; +extern struct kmem_cache *xfs_da_state_cache; #endif /* __XFS_DA_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c index eff4a127188e..0805ade2d300 100644 --- a/fs/xfs/libxfs/xfs_defer.c +++ b/fs/xfs/libxfs/xfs_defer.c @@ -18,6 +18,12 @@ #include "xfs_trace.h" #include "xfs_icache.h" #include "xfs_log.h" +#include "xfs_rmap.h" +#include "xfs_refcount.h" +#include "xfs_bmap.h" +#include "xfs_alloc.h" + +static struct kmem_cache *xfs_defer_pending_cache; /* * Deferred Operations in XFS @@ -232,23 +238,20 @@ xfs_defer_trans_abort( } } -/* Roll a transaction so we can do some deferred op processing. */ -STATIC int -xfs_defer_trans_roll( - struct xfs_trans **tpp) +/* + * Capture resources that the caller said not to release ("held") when the + * transaction commits. Caller is responsible for zero-initializing @dres. + */ +static int +xfs_defer_save_resources( + struct xfs_defer_resources *dres, + struct xfs_trans *tp) { - struct xfs_trans *tp = *tpp; struct xfs_buf_log_item *bli; struct xfs_inode_log_item *ili; struct xfs_log_item *lip; - struct xfs_buf *bplist[XFS_DEFER_OPS_NR_BUFS]; - struct xfs_inode *iplist[XFS_DEFER_OPS_NR_INODES]; - unsigned int ordered = 0; /* bitmap */ - int bpcount = 0, ipcount = 0; - int i; - int error; - BUILD_BUG_ON(NBBY * sizeof(ordered) < XFS_DEFER_OPS_NR_BUFS); + BUILD_BUG_ON(NBBY * sizeof(dres->dr_ordered) < XFS_DEFER_OPS_NR_BUFS); list_for_each_entry(lip, &tp->t_items, li_trans) { switch (lip->li_type) { @@ -256,28 +259,29 @@ xfs_defer_trans_roll( bli = container_of(lip, struct xfs_buf_log_item, bli_item); if (bli->bli_flags & XFS_BLI_HOLD) { - if (bpcount >= XFS_DEFER_OPS_NR_BUFS) { + if (dres->dr_bufs >= XFS_DEFER_OPS_NR_BUFS) { ASSERT(0); return -EFSCORRUPTED; } if (bli->bli_flags & XFS_BLI_ORDERED) - ordered |= (1U << bpcount); + dres->dr_ordered |= + (1U << dres->dr_bufs); else xfs_trans_dirty_buf(tp, bli->bli_buf); - bplist[bpcount++] = bli->bli_buf; + dres->dr_bp[dres->dr_bufs++] = bli->bli_buf; } break; case XFS_LI_INODE: ili = container_of(lip, struct xfs_inode_log_item, ili_item); if (ili->ili_lock_flags == 0) { - if (ipcount >= XFS_DEFER_OPS_NR_INODES) { + if (dres->dr_inos >= XFS_DEFER_OPS_NR_INODES) { ASSERT(0); return -EFSCORRUPTED; } xfs_trans_log_inode(tp, ili->ili_inode, XFS_ILOG_CORE); - iplist[ipcount++] = ili->ili_inode; + dres->dr_ip[dres->dr_inos++] = ili->ili_inode; } break; default: @@ -285,7 +289,43 @@ xfs_defer_trans_roll( } } - trace_xfs_defer_trans_roll(tp, _RET_IP_); + return 0; +} + +/* Attach the held resources to the transaction. */ +static void +xfs_defer_restore_resources( + struct xfs_trans *tp, + struct xfs_defer_resources *dres) +{ + unsigned short i; + + /* Rejoin the joined inodes. */ + for (i = 0; i < dres->dr_inos; i++) + xfs_trans_ijoin(tp, dres->dr_ip[i], 0); + + /* Rejoin the buffers and dirty them so the log moves forward. */ + for (i = 0; i < dres->dr_bufs; i++) { + xfs_trans_bjoin(tp, dres->dr_bp[i]); + if (dres->dr_ordered & (1U << i)) + xfs_trans_ordered_buf(tp, dres->dr_bp[i]); + xfs_trans_bhold(tp, dres->dr_bp[i]); + } +} + +/* Roll a transaction so we can do some deferred op processing. */ +STATIC int +xfs_defer_trans_roll( + struct xfs_trans **tpp) +{ + struct xfs_defer_resources dres = { }; + int error; + + error = xfs_defer_save_resources(&dres, *tpp); + if (error) + return error; + + trace_xfs_defer_trans_roll(*tpp, _RET_IP_); /* * Roll the transaction. Rolling always given a new transaction (even @@ -295,22 +335,11 @@ xfs_defer_trans_roll( * happened. */ error = xfs_trans_roll(tpp); - tp = *tpp; - /* Rejoin the joined inodes. */ - for (i = 0; i < ipcount; i++) - xfs_trans_ijoin(tp, iplist[i], 0); - - /* Rejoin the buffers and dirty them so the log moves forward. */ - for (i = 0; i < bpcount; i++) { - xfs_trans_bjoin(tp, bplist[i]); - if (ordered & (1U << i)) - xfs_trans_ordered_buf(tp, bplist[i]); - xfs_trans_bhold(tp, bplist[i]); - } + xfs_defer_restore_resources(*tpp, &dres); if (error) - trace_xfs_defer_trans_roll_error(tp, error); + trace_xfs_defer_trans_roll_error(*tpp, error); return error; } @@ -342,7 +371,7 @@ xfs_defer_cancel_list( ops->cancel_item(pwi); } ASSERT(dfp->dfp_count == 0); - kmem_free(dfp); + kmem_cache_free(xfs_defer_pending_cache, dfp); } } @@ -439,7 +468,7 @@ xfs_defer_finish_one( /* Done with the dfp, free it. */ list_del(&dfp->dfp_list); - kmem_free(dfp); + kmem_cache_free(xfs_defer_pending_cache, dfp); out: if (ops->finish_cleanup) ops->finish_cleanup(tp, state, error); @@ -573,8 +602,8 @@ xfs_defer_add( dfp = NULL; } if (!dfp) { - dfp = kmem_alloc(sizeof(struct xfs_defer_pending), - KM_NOFS); + dfp = kmem_cache_zalloc(xfs_defer_pending_cache, + GFP_NOFS | __GFP_NOFAIL); dfp->dfp_type = type; dfp->dfp_intent = NULL; dfp->dfp_done = NULL; @@ -627,10 +656,11 @@ xfs_defer_move( */ static struct xfs_defer_capture * xfs_defer_ops_capture( - struct xfs_trans *tp, - struct xfs_inode *capture_ip) + struct xfs_trans *tp) { struct xfs_defer_capture *dfc; + unsigned short i; + int error; if (list_empty(&tp->t_dfops)) return NULL; @@ -654,27 +684,48 @@ xfs_defer_ops_capture( /* Preserve the log reservation size. */ dfc->dfc_logres = tp->t_log_res; + error = xfs_defer_save_resources(&dfc->dfc_held, tp); + if (error) { + /* + * Resource capture should never fail, but if it does, we + * still have to shut down the log and release things + * properly. + */ + xfs_force_shutdown(tp->t_mountp, SHUTDOWN_CORRUPT_INCORE); + } + /* - * Grab an extra reference to this inode and attach it to the capture - * structure. + * Grab extra references to the inodes and buffers because callers are + * expected to release their held references after we commit the + * transaction. */ - if (capture_ip) { - ihold(VFS_I(capture_ip)); - dfc->dfc_capture_ip = capture_ip; + for (i = 0; i < dfc->dfc_held.dr_inos; i++) { + ASSERT(xfs_isilocked(dfc->dfc_held.dr_ip[i], XFS_ILOCK_EXCL)); + ihold(VFS_I(dfc->dfc_held.dr_ip[i])); } + for (i = 0; i < dfc->dfc_held.dr_bufs; i++) + xfs_buf_hold(dfc->dfc_held.dr_bp[i]); + return dfc; } /* Release all resources that we used to capture deferred ops. */ void -xfs_defer_ops_release( +xfs_defer_ops_capture_free( struct xfs_mount *mp, struct xfs_defer_capture *dfc) { + unsigned short i; + xfs_defer_cancel_list(mp, &dfc->dfc_dfops); - if (dfc->dfc_capture_ip) - xfs_irele(dfc->dfc_capture_ip); + + for (i = 0; i < dfc->dfc_held.dr_bufs; i++) + xfs_buf_relse(dfc->dfc_held.dr_bp[i]); + + for (i = 0; i < dfc->dfc_held.dr_inos; i++) + xfs_irele(dfc->dfc_held.dr_ip[i]); + kmem_free(dfc); } @@ -689,24 +740,21 @@ xfs_defer_ops_release( int xfs_defer_ops_capture_and_commit( struct xfs_trans *tp, - struct xfs_inode *capture_ip, struct list_head *capture_list) { struct xfs_mount *mp = tp->t_mountp; struct xfs_defer_capture *dfc; int error; - ASSERT(!capture_ip || xfs_isilocked(capture_ip, XFS_ILOCK_EXCL)); - /* If we don't capture anything, commit transaction and exit. */ - dfc = xfs_defer_ops_capture(tp, capture_ip); + dfc = xfs_defer_ops_capture(tp); if (!dfc) return xfs_trans_commit(tp); /* Commit the transaction and add the capture structure to the list. */ error = xfs_trans_commit(tp); if (error) { - xfs_defer_ops_release(mp, dfc); + xfs_defer_ops_capture_free(mp, dfc); return error; } @@ -724,17 +772,19 @@ void xfs_defer_ops_continue( struct xfs_defer_capture *dfc, struct xfs_trans *tp, - struct xfs_inode **captured_ipp) + struct xfs_defer_resources *dres) { ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); ASSERT(!(tp->t_flags & XFS_TRANS_DIRTY)); /* Lock and join the captured inode to the new transaction. */ - if (dfc->dfc_capture_ip) { - xfs_ilock(dfc->dfc_capture_ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, dfc->dfc_capture_ip, 0); - } - *captured_ipp = dfc->dfc_capture_ip; + if (dfc->dfc_held.dr_inos == 2) + xfs_lock_two_inodes(dfc->dfc_held.dr_ip[0], XFS_ILOCK_EXCL, + dfc->dfc_held.dr_ip[1], XFS_ILOCK_EXCL); + else if (dfc->dfc_held.dr_inos == 1) + xfs_ilock(dfc->dfc_held.dr_ip[0], XFS_ILOCK_EXCL); + xfs_defer_restore_resources(tp, &dfc->dfc_held); + memcpy(dres, &dfc->dfc_held, sizeof(struct xfs_defer_resources)); /* Move captured dfops chain and state to the transaction. */ list_splice_init(&dfc->dfc_dfops, &tp->t_dfops); @@ -742,3 +792,82 @@ xfs_defer_ops_continue( kmem_free(dfc); } + +/* Release the resources captured and continued during recovery. */ +void +xfs_defer_resources_rele( + struct xfs_defer_resources *dres) +{ + unsigned short i; + + for (i = 0; i < dres->dr_inos; i++) { + xfs_iunlock(dres->dr_ip[i], XFS_ILOCK_EXCL); + xfs_irele(dres->dr_ip[i]); + dres->dr_ip[i] = NULL; + } + + for (i = 0; i < dres->dr_bufs; i++) { + xfs_buf_relse(dres->dr_bp[i]); + dres->dr_bp[i] = NULL; + } + + dres->dr_inos = 0; + dres->dr_bufs = 0; + dres->dr_ordered = 0; +} + +static inline int __init +xfs_defer_init_cache(void) +{ + xfs_defer_pending_cache = kmem_cache_create("xfs_defer_pending", + sizeof(struct xfs_defer_pending), + 0, 0, NULL); + + return xfs_defer_pending_cache != NULL ? 0 : -ENOMEM; +} + +static inline void +xfs_defer_destroy_cache(void) +{ + kmem_cache_destroy(xfs_defer_pending_cache); + xfs_defer_pending_cache = NULL; +} + +/* Set up caches for deferred work items. */ +int __init +xfs_defer_init_item_caches(void) +{ + int error; + + error = xfs_defer_init_cache(); + if (error) + return error; + error = xfs_rmap_intent_init_cache(); + if (error) + goto err; + error = xfs_refcount_intent_init_cache(); + if (error) + goto err; + error = xfs_bmap_intent_init_cache(); + if (error) + goto err; + error = xfs_extfree_intent_init_cache(); + if (error) + goto err; + + return 0; +err: + xfs_defer_destroy_item_caches(); + return error; +} + +/* Destroy all the deferred work item caches, if they've been allocated. */ +void +xfs_defer_destroy_item_caches(void) +{ + xfs_extfree_intent_destroy_cache(); + xfs_bmap_intent_destroy_cache(); + xfs_refcount_intent_destroy_cache(); + xfs_rmap_intent_destroy_cache(); + xfs_defer_destroy_cache(); +} diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h index 05472f71fffe..7bb8a31ad65b 100644 --- a/fs/xfs/libxfs/xfs_defer.h +++ b/fs/xfs/libxfs/xfs_defer.h @@ -65,6 +65,30 @@ extern const struct xfs_defer_op_type xfs_extent_free_defer_type; extern const struct xfs_defer_op_type xfs_agfl_free_defer_type; /* + * Deferred operation item relogging limits. + */ +#define XFS_DEFER_OPS_NR_INODES 2 /* join up to two inodes */ +#define XFS_DEFER_OPS_NR_BUFS 2 /* join up to two buffers */ + +/* Resources that must be held across a transaction roll. */ +struct xfs_defer_resources { + /* held buffers */ + struct xfs_buf *dr_bp[XFS_DEFER_OPS_NR_BUFS]; + + /* inodes with no unlock flags */ + struct xfs_inode *dr_ip[XFS_DEFER_OPS_NR_INODES]; + + /* number of held buffers */ + unsigned short dr_bufs; + + /* bitmap of ordered buffers */ + unsigned short dr_ordered; + + /* number of held inodes */ + unsigned short dr_inos; +}; + +/* * This structure enables a dfops user to detach the chain of deferred * operations from a transaction so that they can be continued later. */ @@ -83,11 +107,7 @@ struct xfs_defer_capture { /* Log reservation saved from the transaction. */ unsigned int dfc_logres; - /* - * An inode reference that must be maintained to complete the deferred - * work. - */ - struct xfs_inode *dfc_capture_ip; + struct xfs_defer_resources dfc_held; }; /* @@ -95,9 +115,14 @@ struct xfs_defer_capture { * This doesn't normally happen except log recovery. */ int xfs_defer_ops_capture_and_commit(struct xfs_trans *tp, - struct xfs_inode *capture_ip, struct list_head *capture_list); + struct list_head *capture_list); void xfs_defer_ops_continue(struct xfs_defer_capture *d, struct xfs_trans *tp, - struct xfs_inode **captured_ipp); -void xfs_defer_ops_release(struct xfs_mount *mp, struct xfs_defer_capture *d); + struct xfs_defer_resources *dres); +void xfs_defer_ops_capture_free(struct xfs_mount *mp, + struct xfs_defer_capture *d); +void xfs_defer_resources_rele(struct xfs_defer_resources *dres); + +int __init xfs_defer_init_item_caches(void); +void xfs_defer_destroy_item_caches(void); #endif /* __XFS_DEFER_H__ */ diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c index deeb74becabc..15a362e2f5ea 100644 --- a/fs/xfs/libxfs/xfs_dquot_buf.c +++ b/fs/xfs/libxfs/xfs_dquot_buf.c @@ -22,7 +22,7 @@ xfs_calc_dquots_per_chunk( unsigned int nbblks) /* basic block units */ { ASSERT(nbblks > 0); - return BBTOB(nbblks) / sizeof(xfs_dqblk_t); + return BBTOB(nbblks) / sizeof(struct xfs_dqblk); } /* @@ -127,7 +127,7 @@ xfs_dqblk_repair( * Typically, a repair is only requested by quotacheck. */ ASSERT(id != -1); - memset(dqb, 0, sizeof(xfs_dqblk_t)); + memset(dqb, 0, sizeof(struct xfs_dqblk)); dqb->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); dqb->dd_diskdq.d_version = XFS_DQUOT_VERSION; diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 2d7057b7984b..d665c04e69dd 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -184,7 +184,7 @@ typedef struct xfs_sb { * Superblock - on disk version. Must match the in core version above. * Must be padded to 64 bit alignment. */ -typedef struct xfs_dsb { +struct xfs_dsb { __be32 sb_magicnum; /* magic number == XFS_SB_MAGIC */ __be32 sb_blocksize; /* logical block size, bytes */ __be64 sb_dblocks; /* number of data blocks */ @@ -263,7 +263,7 @@ typedef struct xfs_dsb { uuid_t sb_meta_uuid; /* metadata file system unique id */ /* must be padded to 64 bit alignment */ -} xfs_dsb_t; +}; /* * Misc. Flags - warning - these will be cleared by xfs_repair unless @@ -780,7 +780,7 @@ static inline time64_t xfs_bigtime_to_unix(uint64_t ondisk_seconds) * padding field for v3 inodes. */ #define XFS_DINODE_MAGIC 0x494e /* 'IN' */ -typedef struct xfs_dinode { +struct xfs_dinode { __be16 di_magic; /* inode magic # = XFS_DINODE_MAGIC */ __be16 di_mode; /* mode and type of file */ __u8 di_version; /* inode version */ @@ -825,7 +825,7 @@ typedef struct xfs_dinode { uuid_t di_uuid; /* UUID of the filesystem */ /* structure must be padded to 64 bit alignment */ -} xfs_dinode_t; +}; #define XFS_DINODE_CRC_OFF offsetof(struct xfs_dinode, di_crc) @@ -1215,7 +1215,7 @@ struct xfs_disk_dquot { * This is what goes on disk. This is separated from the xfs_disk_dquot because * carrying the unnecessary padding would be a waste of memory. */ -typedef struct xfs_dqblk { +struct xfs_dqblk { struct xfs_disk_dquot dd_diskdq; /* portion living incore as well */ char dd_fill[4];/* filling for posterity */ @@ -1225,7 +1225,7 @@ typedef struct xfs_dqblk { __be32 dd_crc; /* checksum */ __be64 dd_lsn; /* last modification in log */ uuid_t dd_uuid; /* location information */ -} xfs_dqblk_t; +}; #define XFS_DQUOT_CRC_OFF offsetof(struct xfs_dqblk, dd_crc) diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index bde2b4c64dbe..c43877c8a279 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -268,6 +268,8 @@ typedef struct xfs_fsop_resblks { */ #define XFS_MIN_AG_BYTES (1ULL << 24) /* 16 MB */ #define XFS_MAX_AG_BYTES (1ULL << 40) /* 1 TB */ +#define XFS_MAX_AG_BLOCKS (XFS_MAX_AG_BYTES / XFS_MIN_BLOCKSIZE) +#define XFS_MAX_CRC_AG_BLOCKS (XFS_MAX_AG_BYTES / XFS_MIN_CRC_BLOCKSIZE) /* keep the maximum size under 2^31 by a small amount */ #define XFS_MAX_LOG_BYTES \ diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 994ad783d407..b418fe0c0679 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -1827,7 +1827,7 @@ xfs_difree_inode_chunk( if (!xfs_inobt_issparse(rec->ir_holemask)) { /* not sparse, calculate extent info directly */ - xfs_bmap_add_free(tp, XFS_AGB_TO_FSB(mp, agno, sagbno), + xfs_free_extent_later(tp, XFS_AGB_TO_FSB(mp, agno, sagbno), M_IGEO(mp)->ialloc_blks, &XFS_RMAP_OINFO_INODES); return; @@ -1872,7 +1872,7 @@ xfs_difree_inode_chunk( ASSERT(agbno % mp->m_sb.sb_spino_align == 0); ASSERT(contigblk % mp->m_sb.sb_spino_align == 0); - xfs_bmap_add_free(tp, XFS_AGB_TO_FSB(mp, agno, agbno), + xfs_free_extent_later(tp, XFS_AGB_TO_FSB(mp, agno, agbno), contigblk, &XFS_RMAP_OINFO_INODES); /* reset range to current bit and carry on... */ @@ -2793,6 +2793,7 @@ xfs_ialloc_setup_geometry( inodes = (1LL << XFS_INO_AGINO_BITS(mp)) >> XFS_INODES_PER_CHUNK_LOG; igeo->inobt_maxlevels = xfs_btree_compute_maxlevels(igeo->inobt_mnr, inodes); + ASSERT(igeo->inobt_maxlevels <= xfs_iallocbt_maxlevels_ondisk()); /* * Set the maximum inode count for this filesystem, being careful not diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index 27190840c5d8..b2ad2fdc40f5 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -22,6 +22,8 @@ #include "xfs_rmap.h" #include "xfs_ag.h" +static struct kmem_cache *xfs_inobt_cur_cache; + STATIC int xfs_inobt_get_minrecs( struct xfs_btree_cur *cur, @@ -432,10 +434,8 @@ xfs_inobt_init_common( { struct xfs_btree_cur *cur; - cur = kmem_cache_zalloc(xfs_btree_cur_zone, GFP_NOFS | __GFP_NOFAIL); - cur->bc_tp = tp; - cur->bc_mp = mp; - cur->bc_btnum = btnum; + cur = xfs_btree_alloc_cursor(mp, tp, btnum, + M_IGEO(mp)->inobt_maxlevels, xfs_inobt_cur_cache); if (btnum == XFS_BTNUM_INO) { cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_ibt_2); cur->bc_ops = &xfs_inobt_ops; @@ -444,8 +444,6 @@ xfs_inobt_init_common( cur->bc_ops = &xfs_finobt_ops; } - cur->bc_blocklog = mp->m_sb.sb_blocklog; - if (xfs_has_crc(mp)) cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; @@ -530,6 +528,17 @@ xfs_inobt_commit_staged_btree( } } +/* Calculate number of records in an inode btree block. */ +static inline unsigned int +xfs_inobt_block_maxrecs( + unsigned int blocklen, + bool leaf) +{ + if (leaf) + return blocklen / sizeof(xfs_inobt_rec_t); + return blocklen / (sizeof(xfs_inobt_key_t) + sizeof(xfs_inobt_ptr_t)); +} + /* * Calculate number of records in an inobt btree block. */ @@ -540,10 +549,54 @@ xfs_inobt_maxrecs( int leaf) { blocklen -= XFS_INOBT_BLOCK_LEN(mp); + return xfs_inobt_block_maxrecs(blocklen, leaf); +} - if (leaf) - return blocklen / sizeof(xfs_inobt_rec_t); - return blocklen / (sizeof(xfs_inobt_key_t) + sizeof(xfs_inobt_ptr_t)); +/* + * Maximum number of inode btree records per AG. Pretend that we can fill an + * entire AG completely full of inodes except for the AG headers. + */ +#define XFS_MAX_INODE_RECORDS \ + ((XFS_MAX_AG_BYTES - (4 * BBSIZE)) / XFS_DINODE_MIN_SIZE) / \ + XFS_INODES_PER_CHUNK + +/* Compute the max possible height for the inode btree. */ +static inline unsigned int +xfs_inobt_maxlevels_ondisk(void) +{ + unsigned int minrecs[2]; + unsigned int blocklen; + + blocklen = min(XFS_MIN_BLOCKSIZE - XFS_BTREE_SBLOCK_LEN, + XFS_MIN_CRC_BLOCKSIZE - XFS_BTREE_SBLOCK_CRC_LEN); + + minrecs[0] = xfs_inobt_block_maxrecs(blocklen, true) / 2; + minrecs[1] = xfs_inobt_block_maxrecs(blocklen, false) / 2; + + return xfs_btree_compute_maxlevels(minrecs, XFS_MAX_INODE_RECORDS); +} + +/* Compute the max possible height for the free inode btree. */ +static inline unsigned int +xfs_finobt_maxlevels_ondisk(void) +{ + unsigned int minrecs[2]; + unsigned int blocklen; + + blocklen = XFS_MIN_CRC_BLOCKSIZE - XFS_BTREE_SBLOCK_CRC_LEN; + + minrecs[0] = xfs_inobt_block_maxrecs(blocklen, true) / 2; + minrecs[1] = xfs_inobt_block_maxrecs(blocklen, false) / 2; + + return xfs_btree_compute_maxlevels(minrecs, XFS_MAX_INODE_RECORDS); +} + +/* Compute the max possible height for either inode btree. */ +unsigned int +xfs_iallocbt_maxlevels_ondisk(void) +{ + return max(xfs_inobt_maxlevels_ondisk(), + xfs_finobt_maxlevels_ondisk()); } /* @@ -761,3 +814,22 @@ xfs_iallocbt_calc_size( { return xfs_btree_calc_size(M_IGEO(mp)->inobt_mnr, len); } + +int __init +xfs_inobt_init_cur_cache(void) +{ + xfs_inobt_cur_cache = kmem_cache_create("xfs_inobt_cur", + xfs_btree_cur_sizeof(xfs_inobt_maxlevels_ondisk()), + 0, 0, NULL); + + if (!xfs_inobt_cur_cache) + return -ENOMEM; + return 0; +} + +void +xfs_inobt_destroy_cur_cache(void) +{ + kmem_cache_destroy(xfs_inobt_cur_cache); + xfs_inobt_cur_cache = NULL; +} diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h index 8a322d402e61..26451cb76b98 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.h +++ b/fs/xfs/libxfs/xfs_ialloc_btree.h @@ -75,4 +75,9 @@ int xfs_inobt_cur(struct xfs_mount *mp, struct xfs_trans *tp, void xfs_inobt_commit_staged_btree(struct xfs_btree_cur *cur, struct xfs_trans *tp, struct xfs_buf *agbp); +unsigned int xfs_iallocbt_maxlevels_ondisk(void); + +int __init xfs_inobt_init_cur_cache(void); +void xfs_inobt_destroy_cur_cache(void); + #endif /* __XFS_IALLOC_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 3932b4ebf903..cae9708c8587 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -51,9 +51,9 @@ xfs_inode_buf_verify( agno = xfs_daddr_to_agno(mp, xfs_buf_daddr(bp)); ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock; for (i = 0; i < ni; i++) { - int di_ok; - xfs_dinode_t *dip; - xfs_agino_t unlinked_ino; + struct xfs_dinode *dip; + xfs_agino_t unlinked_ino; + int di_ok; dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog)); unlinked_ino = be32_to_cpu(dip->di_next_unlinked); diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index 1d174909f9bd..9149f4f796fc 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -26,7 +26,7 @@ #include "xfs_types.h" #include "xfs_errortag.h" -kmem_zone_t *xfs_ifork_zone; +struct kmem_cache *xfs_ifork_cache; void xfs_init_local_fork( @@ -67,10 +67,10 @@ xfs_init_local_fork( */ STATIC int xfs_iformat_local( - xfs_inode_t *ip, - xfs_dinode_t *dip, - int whichfork, - int size) + struct xfs_inode *ip, + struct xfs_dinode *dip, + int whichfork, + int size) { /* * If the size is unreasonable, then something @@ -162,8 +162,8 @@ xfs_iformat_extents( */ STATIC int xfs_iformat_btree( - xfs_inode_t *ip, - xfs_dinode_t *dip, + struct xfs_inode *ip, + struct xfs_dinode *dip, int whichfork) { struct xfs_mount *mp = ip->i_mount; @@ -284,7 +284,7 @@ xfs_ifork_alloc( { struct xfs_ifork *ifp; - ifp = kmem_cache_zalloc(xfs_ifork_zone, GFP_NOFS | __GFP_NOFAIL); + ifp = kmem_cache_zalloc(xfs_ifork_cache, GFP_NOFS | __GFP_NOFAIL); ifp->if_format = format; ifp->if_nextents = nextents; return ifp; @@ -325,7 +325,7 @@ xfs_iformat_attr_fork( } if (error) { - kmem_cache_free(xfs_ifork_zone, ip->i_afp); + kmem_cache_free(xfs_ifork_cache, ip->i_afp); ip->i_afp = NULL; } return error; @@ -580,8 +580,8 @@ xfs_iextents_copy( */ void xfs_iflush_fork( - xfs_inode_t *ip, - xfs_dinode_t *dip, + struct xfs_inode *ip, + struct xfs_dinode *dip, struct xfs_inode_log_item *iip, int whichfork) { @@ -676,7 +676,7 @@ xfs_ifork_init_cow( if (ip->i_cowfp) return; - ip->i_cowfp = kmem_cache_zalloc(xfs_ifork_zone, + ip->i_cowfp = kmem_cache_zalloc(xfs_ifork_cache, GFP_NOFS | __GFP_NOFAIL); ip->i_cowfp->if_format = XFS_DINODE_FMT_EXTENTS; } diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h index a6f7897b6887..3d64a3acb0ed 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.h +++ b/fs/xfs/libxfs/xfs_inode_fork.h @@ -221,7 +221,7 @@ static inline bool xfs_iext_peek_prev_extent(struct xfs_ifork *ifp, xfs_iext_get_extent((ifp), (ext), (got)); \ xfs_iext_next((ifp), (ext))) -extern struct kmem_zone *xfs_ifork_zone; +extern struct kmem_cache *xfs_ifork_cache; extern void xfs_ifork_init_cow(struct xfs_inode *ip); diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index e5d767a7fc5d..327ba25e9e17 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -24,6 +24,8 @@ #include "xfs_rmap.h" #include "xfs_ag.h" +struct kmem_cache *xfs_refcount_intent_cache; + /* Allowable refcount adjustment amounts. */ enum xfs_refc_adjust_op { XFS_REFCOUNT_ADJUST_INCREASE = 1, @@ -916,8 +918,7 @@ xfs_refcount_adjust_extents( struct xfs_btree_cur *cur, xfs_agblock_t *agbno, xfs_extlen_t *aglen, - enum xfs_refc_adjust_op adj, - struct xfs_owner_info *oinfo) + enum xfs_refc_adjust_op adj) { struct xfs_refcount_irec ext, tmp; int error; @@ -974,8 +975,8 @@ xfs_refcount_adjust_extents( fsbno = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_ag.pag->pag_agno, tmp.rc_startblock); - xfs_bmap_add_free(cur->bc_tp, fsbno, - tmp.rc_blockcount, oinfo); + xfs_free_extent_later(cur->bc_tp, fsbno, + tmp.rc_blockcount, NULL); } (*agbno) += tmp.rc_blockcount; @@ -1019,8 +1020,8 @@ xfs_refcount_adjust_extents( fsbno = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_ag.pag->pag_agno, ext.rc_startblock); - xfs_bmap_add_free(cur->bc_tp, fsbno, ext.rc_blockcount, - oinfo); + xfs_free_extent_later(cur->bc_tp, fsbno, + ext.rc_blockcount, NULL); } skip: @@ -1048,8 +1049,7 @@ xfs_refcount_adjust( xfs_extlen_t aglen, xfs_agblock_t *new_agbno, xfs_extlen_t *new_aglen, - enum xfs_refc_adjust_op adj, - struct xfs_owner_info *oinfo) + enum xfs_refc_adjust_op adj) { bool shape_changed; int shape_changes = 0; @@ -1092,8 +1092,7 @@ xfs_refcount_adjust( cur->bc_ag.refc.shape_changes++; /* Now that we've taken care of the ends, adjust the middle extents */ - error = xfs_refcount_adjust_extents(cur, new_agbno, new_aglen, - adj, oinfo); + error = xfs_refcount_adjust_extents(cur, new_agbno, new_aglen, adj); if (error) goto out_error; @@ -1188,12 +1187,12 @@ xfs_refcount_finish_one( switch (type) { case XFS_REFCOUNT_INCREASE: error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, - new_len, XFS_REFCOUNT_ADJUST_INCREASE, NULL); + new_len, XFS_REFCOUNT_ADJUST_INCREASE); *new_fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno); break; case XFS_REFCOUNT_DECREASE: error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, - new_len, XFS_REFCOUNT_ADJUST_DECREASE, NULL); + new_len, XFS_REFCOUNT_ADJUST_DECREASE); *new_fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno); break; case XFS_REFCOUNT_ALLOC_COW: @@ -1235,8 +1234,8 @@ __xfs_refcount_add( type, XFS_FSB_TO_AGBNO(tp->t_mountp, startblock), blockcount); - ri = kmem_alloc(sizeof(struct xfs_refcount_intent), - KM_NOFS); + ri = kmem_cache_alloc(xfs_refcount_intent_cache, + GFP_NOFS | __GFP_NOFAIL); INIT_LIST_HEAD(&ri->ri_list); ri->ri_type = type; ri->ri_startblock = startblock; @@ -1742,7 +1741,7 @@ xfs_refcount_recover_cow_leftovers( rr->rr_rrec.rc_blockcount); /* Free the block. */ - xfs_bmap_add_free(tp, fsb, rr->rr_rrec.rc_blockcount, NULL); + xfs_free_extent_later(tp, fsb, rr->rr_rrec.rc_blockcount, NULL); error = xfs_trans_commit(tp); if (error) @@ -1782,3 +1781,20 @@ xfs_refcount_has_record( return xfs_btree_has_record(cur, &low, &high, exists); } + +int __init +xfs_refcount_intent_init_cache(void) +{ + xfs_refcount_intent_cache = kmem_cache_create("xfs_refc_intent", + sizeof(struct xfs_refcount_intent), + 0, 0, NULL); + + return xfs_refcount_intent_cache != NULL ? 0 : -ENOMEM; +} + +void +xfs_refcount_intent_destroy_cache(void) +{ + kmem_cache_destroy(xfs_refcount_intent_cache); + xfs_refcount_intent_cache = NULL; +} diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h index 02cb3aa405be..9eb01edbd89d 100644 --- a/fs/xfs/libxfs/xfs_refcount.h +++ b/fs/xfs/libxfs/xfs_refcount.h @@ -32,8 +32,8 @@ enum xfs_refcount_intent_type { struct xfs_refcount_intent { struct list_head ri_list; enum xfs_refcount_intent_type ri_type; - xfs_fsblock_t ri_startblock; xfs_extlen_t ri_blockcount; + xfs_fsblock_t ri_startblock; }; void xfs_refcount_increase_extent(struct xfs_trans *tp, @@ -83,4 +83,9 @@ extern void xfs_refcount_btrec_to_irec(const union xfs_btree_rec *rec, extern int xfs_refcount_insert(struct xfs_btree_cur *cur, struct xfs_refcount_irec *irec, int *stat); +extern struct kmem_cache *xfs_refcount_intent_cache; + +int __init xfs_refcount_intent_init_cache(void); +void xfs_refcount_intent_destroy_cache(void); + #endif /* __XFS_REFCOUNT_H__ */ diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index 1ef9b99962ab..d14c1720b0fb 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -21,6 +21,8 @@ #include "xfs_rmap.h" #include "xfs_ag.h" +static struct kmem_cache *xfs_refcountbt_cur_cache; + static struct xfs_btree_cur * xfs_refcountbt_dup_cursor( struct xfs_btree_cur *cur) @@ -322,11 +324,8 @@ xfs_refcountbt_init_common( ASSERT(pag->pag_agno < mp->m_sb.sb_agcount); - cur = kmem_cache_zalloc(xfs_btree_cur_zone, GFP_NOFS | __GFP_NOFAIL); - cur->bc_tp = tp; - cur->bc_mp = mp; - cur->bc_btnum = XFS_BTNUM_REFC; - cur->bc_blocklog = mp->m_sb.sb_blocklog; + cur = xfs_btree_alloc_cursor(mp, tp, XFS_BTNUM_REFC, + mp->m_refc_maxlevels, xfs_refcountbt_cur_cache); cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_refcbt_2); cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; @@ -396,6 +395,18 @@ xfs_refcountbt_commit_staged_btree( xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_refcountbt_ops); } +/* Calculate number of records in a refcount btree block. */ +static inline unsigned int +xfs_refcountbt_block_maxrecs( + unsigned int blocklen, + bool leaf) +{ + if (leaf) + return blocklen / sizeof(struct xfs_refcount_rec); + return blocklen / (sizeof(struct xfs_refcount_key) + + sizeof(xfs_refcount_ptr_t)); +} + /* * Calculate the number of records in a refcount btree block. */ @@ -405,11 +416,22 @@ xfs_refcountbt_maxrecs( bool leaf) { blocklen -= XFS_REFCOUNT_BLOCK_LEN; + return xfs_refcountbt_block_maxrecs(blocklen, leaf); +} - if (leaf) - return blocklen / sizeof(struct xfs_refcount_rec); - return blocklen / (sizeof(struct xfs_refcount_key) + - sizeof(xfs_refcount_ptr_t)); +/* Compute the max possible height of the maximally sized refcount btree. */ +unsigned int +xfs_refcountbt_maxlevels_ondisk(void) +{ + unsigned int minrecs[2]; + unsigned int blocklen; + + blocklen = XFS_MIN_CRC_BLOCKSIZE - XFS_BTREE_SBLOCK_CRC_LEN; + + minrecs[0] = xfs_refcountbt_block_maxrecs(blocklen, true) / 2; + minrecs[1] = xfs_refcountbt_block_maxrecs(blocklen, false) / 2; + + return xfs_btree_compute_maxlevels(minrecs, XFS_MAX_CRC_AG_BLOCKS); } /* Compute the maximum height of a refcount btree. */ @@ -417,8 +439,14 @@ void xfs_refcountbt_compute_maxlevels( struct xfs_mount *mp) { + if (!xfs_has_reflink(mp)) { + mp->m_refc_maxlevels = 0; + return; + } + mp->m_refc_maxlevels = xfs_btree_compute_maxlevels( mp->m_refc_mnr, mp->m_sb.sb_agblocks); + ASSERT(mp->m_refc_maxlevels <= xfs_refcountbt_maxlevels_ondisk()); } /* Calculate the refcount btree size for some records. */ @@ -488,3 +516,22 @@ xfs_refcountbt_calc_reserves( return error; } + +int __init +xfs_refcountbt_init_cur_cache(void) +{ + xfs_refcountbt_cur_cache = kmem_cache_create("xfs_refcbt_cur", + xfs_btree_cur_sizeof(xfs_refcountbt_maxlevels_ondisk()), + 0, 0, NULL); + + if (!xfs_refcountbt_cur_cache) + return -ENOMEM; + return 0; +} + +void +xfs_refcountbt_destroy_cur_cache(void) +{ + kmem_cache_destroy(xfs_refcountbt_cur_cache); + xfs_refcountbt_cur_cache = NULL; +} diff --git a/fs/xfs/libxfs/xfs_refcount_btree.h b/fs/xfs/libxfs/xfs_refcount_btree.h index bd9ed9e1e41f..d66b37259bed 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.h +++ b/fs/xfs/libxfs/xfs_refcount_btree.h @@ -65,4 +65,9 @@ extern int xfs_refcountbt_calc_reserves(struct xfs_mount *mp, void xfs_refcountbt_commit_staged_btree(struct xfs_btree_cur *cur, struct xfs_trans *tp, struct xfs_buf *agbp); +unsigned int xfs_refcountbt_maxlevels_ondisk(void); + +int __init xfs_refcountbt_init_cur_cache(void); +void xfs_refcountbt_destroy_cur_cache(void); + #endif /* __XFS_REFCOUNT_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index f45929b1b94a..cd322174dbff 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -24,6 +24,8 @@ #include "xfs_inode.h" #include "xfs_ag.h" +struct kmem_cache *xfs_rmap_intent_cache; + /* * Lookup the first record less than or equal to [bno, len, owner, offset] * in the btree given by cur. @@ -2485,7 +2487,7 @@ __xfs_rmap_add( bmap->br_blockcount, bmap->br_state); - ri = kmem_alloc(sizeof(struct xfs_rmap_intent), KM_NOFS); + ri = kmem_cache_alloc(xfs_rmap_intent_cache, GFP_NOFS | __GFP_NOFAIL); INIT_LIST_HEAD(&ri->ri_list); ri->ri_type = type; ri->ri_owner = owner; @@ -2779,3 +2781,20 @@ const struct xfs_owner_info XFS_RMAP_OINFO_REFC = { const struct xfs_owner_info XFS_RMAP_OINFO_COW = { .oi_owner = XFS_RMAP_OWN_COW, }; + +int __init +xfs_rmap_intent_init_cache(void) +{ + xfs_rmap_intent_cache = kmem_cache_create("xfs_rmap_intent", + sizeof(struct xfs_rmap_intent), + 0, 0, NULL); + + return xfs_rmap_intent_cache != NULL ? 0 : -ENOMEM; +} + +void +xfs_rmap_intent_destroy_cache(void) +{ + kmem_cache_destroy(xfs_rmap_intent_cache); + xfs_rmap_intent_cache = NULL; +} diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h index fd67904ed446..b718ebeda372 100644 --- a/fs/xfs/libxfs/xfs_rmap.h +++ b/fs/xfs/libxfs/xfs_rmap.h @@ -159,8 +159,8 @@ enum xfs_rmap_intent_type { struct xfs_rmap_intent { struct list_head ri_list; enum xfs_rmap_intent_type ri_type; - uint64_t ri_owner; int ri_whichfork; + uint64_t ri_owner; struct xfs_bmbt_irec ri_bmap; }; @@ -215,4 +215,9 @@ extern const struct xfs_owner_info XFS_RMAP_OINFO_INODES; extern const struct xfs_owner_info XFS_RMAP_OINFO_REFC; extern const struct xfs_owner_info XFS_RMAP_OINFO_COW; +extern struct kmem_cache *xfs_rmap_intent_cache; + +int __init xfs_rmap_intent_init_cache(void); +void xfs_rmap_intent_destroy_cache(void); + #endif /* __XFS_RMAP_H__ */ diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index b7dbbfb3aeed..69e104d0277f 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -22,6 +22,8 @@ #include "xfs_ag.h" #include "xfs_ag_resv.h" +static struct kmem_cache *xfs_rmapbt_cur_cache; + /* * Reverse map btree. * @@ -451,13 +453,10 @@ xfs_rmapbt_init_common( { struct xfs_btree_cur *cur; - cur = kmem_cache_zalloc(xfs_btree_cur_zone, GFP_NOFS | __GFP_NOFAIL); - cur->bc_tp = tp; - cur->bc_mp = mp; /* Overlapping btree; 2 keys per pointer. */ - cur->bc_btnum = XFS_BTNUM_RMAP; + cur = xfs_btree_alloc_cursor(mp, tp, XFS_BTNUM_RMAP, + mp->m_rmap_maxlevels, xfs_rmapbt_cur_cache); cur->bc_flags = XFS_BTREE_CRC_BLOCKS | XFS_BTREE_OVERLAPPING; - cur->bc_blocklog = mp->m_sb.sb_blocklog; cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_rmap_2); cur->bc_ops = &xfs_rmapbt_ops; @@ -522,6 +521,18 @@ xfs_rmapbt_commit_staged_btree( xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_rmapbt_ops); } +/* Calculate number of records in a reverse mapping btree block. */ +static inline unsigned int +xfs_rmapbt_block_maxrecs( + unsigned int blocklen, + bool leaf) +{ + if (leaf) + return blocklen / sizeof(struct xfs_rmap_rec); + return blocklen / + (2 * sizeof(struct xfs_rmap_key) + sizeof(xfs_rmap_ptr_t)); +} + /* * Calculate number of records in an rmap btree block. */ @@ -531,11 +542,33 @@ xfs_rmapbt_maxrecs( int leaf) { blocklen -= XFS_RMAP_BLOCK_LEN; + return xfs_rmapbt_block_maxrecs(blocklen, leaf); +} - if (leaf) - return blocklen / sizeof(struct xfs_rmap_rec); - return blocklen / - (2 * sizeof(struct xfs_rmap_key) + sizeof(xfs_rmap_ptr_t)); +/* Compute the max possible height for reverse mapping btrees. */ +unsigned int +xfs_rmapbt_maxlevels_ondisk(void) +{ + unsigned int minrecs[2]; + unsigned int blocklen; + + blocklen = XFS_MIN_CRC_BLOCKSIZE - XFS_BTREE_SBLOCK_CRC_LEN; + + minrecs[0] = xfs_rmapbt_block_maxrecs(blocklen, true) / 2; + minrecs[1] = xfs_rmapbt_block_maxrecs(blocklen, false) / 2; + + /* + * Compute the asymptotic maxlevels for an rmapbt on any reflink fs. + * + * On a reflink filesystem, each AG block can have up to 2^32 (per the + * refcount record format) owners, which means that theoretically we + * could face up to 2^64 rmap records. However, we're likely to run + * out of blocks in the AG long before that happens, which means that + * we must compute the max height based on what the btree will look + * like if it consumes almost all the blocks in the AG due to maximal + * sharing factor. + */ + return xfs_btree_space_to_height(minrecs, XFS_MAX_CRC_AG_BLOCKS); } /* Compute the maximum height of an rmap btree. */ @@ -543,26 +576,36 @@ void xfs_rmapbt_compute_maxlevels( struct xfs_mount *mp) { - /* - * On a non-reflink filesystem, the maximum number of rmap - * records is the number of blocks in the AG, hence the max - * rmapbt height is log_$maxrecs($agblocks). However, with - * reflink each AG block can have up to 2^32 (per the refcount - * record format) owners, which means that theoretically we - * could face up to 2^64 rmap records. - * - * That effectively means that the max rmapbt height must be - * XFS_BTREE_MAXLEVELS. "Fortunately" we'll run out of AG - * blocks to feed the rmapbt long before the rmapbt reaches - * maximum height. The reflink code uses ag_resv_critical to - * disallow reflinking when less than 10% of the per-AG metadata - * block reservation since the fallback is a regular file copy. - */ - if (xfs_has_reflink(mp)) - mp->m_rmap_maxlevels = XFS_BTREE_MAXLEVELS; - else + if (!xfs_has_rmapbt(mp)) { + mp->m_rmap_maxlevels = 0; + return; + } + + if (xfs_has_reflink(mp)) { + /* + * Compute the asymptotic maxlevels for an rmap btree on a + * filesystem that supports reflink. + * + * On a reflink filesystem, each AG block can have up to 2^32 + * (per the refcount record format) owners, which means that + * theoretically we could face up to 2^64 rmap records. + * However, we're likely to run out of blocks in the AG long + * before that happens, which means that we must compute the + * max height based on what the btree will look like if it + * consumes almost all the blocks in the AG due to maximal + * sharing factor. + */ + mp->m_rmap_maxlevels = xfs_btree_space_to_height(mp->m_rmap_mnr, + mp->m_sb.sb_agblocks); + } else { + /* + * If there's no block sharing, compute the maximum rmapbt + * height assuming one rmap record per AG block. + */ mp->m_rmap_maxlevels = xfs_btree_compute_maxlevels( mp->m_rmap_mnr, mp->m_sb.sb_agblocks); + } + ASSERT(mp->m_rmap_maxlevels <= xfs_rmapbt_maxlevels_ondisk()); } /* Calculate the refcount btree size for some records. */ @@ -633,3 +676,22 @@ xfs_rmapbt_calc_reserves( return error; } + +int __init +xfs_rmapbt_init_cur_cache(void) +{ + xfs_rmapbt_cur_cache = kmem_cache_create("xfs_rmapbt_cur", + xfs_btree_cur_sizeof(xfs_rmapbt_maxlevels_ondisk()), + 0, 0, NULL); + + if (!xfs_rmapbt_cur_cache) + return -ENOMEM; + return 0; +} + +void +xfs_rmapbt_destroy_cur_cache(void) +{ + kmem_cache_destroy(xfs_rmapbt_cur_cache); + xfs_rmapbt_cur_cache = NULL; +} diff --git a/fs/xfs/libxfs/xfs_rmap_btree.h b/fs/xfs/libxfs/xfs_rmap_btree.h index f2eee6572af4..3244715dd111 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.h +++ b/fs/xfs/libxfs/xfs_rmap_btree.h @@ -59,4 +59,9 @@ extern xfs_extlen_t xfs_rmapbt_max_size(struct xfs_mount *mp, extern int xfs_rmapbt_calc_reserves(struct xfs_mount *mp, struct xfs_trans *tp, struct xfs_perag *pag, xfs_extlen_t *ask, xfs_extlen_t *used); +unsigned int xfs_rmapbt_maxlevels_ondisk(void); + +int __init xfs_rmapbt_init_cur_cache(void); +void xfs_rmapbt_destroy_cur_cache(void); + #endif /* __XFS_RMAP_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index e58349be78bd..f4e84aa1d50a 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -495,7 +495,7 @@ xfs_sb_quota_from_disk(struct xfs_sb *sbp) static void __xfs_sb_from_disk( struct xfs_sb *to, - xfs_dsb_t *from, + struct xfs_dsb *from, bool convert_xquota) { to->sb_magicnum = be32_to_cpu(from->sb_magicnum); @@ -571,7 +571,7 @@ __xfs_sb_from_disk( void xfs_sb_from_disk( struct xfs_sb *to, - xfs_dsb_t *from) + struct xfs_dsb *from) { __xfs_sb_from_disk(to, from, true); } diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index 5e300daa2559..6f83d9b306ee 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c @@ -70,7 +70,7 @@ xfs_allocfree_log_count( { uint blocks; - blocks = num_ops * 2 * (2 * mp->m_ag_maxlevels - 1); + blocks = num_ops * 2 * (2 * mp->m_alloc_maxlevels - 1); if (xfs_has_rmapbt(mp)) blocks += num_ops * (2 * mp->m_rmap_maxlevels - 1); if (xfs_has_reflink(mp)) @@ -814,6 +814,19 @@ xfs_trans_resv_calc( struct xfs_mount *mp, struct xfs_trans_resv *resp) { + unsigned int rmap_maxlevels = mp->m_rmap_maxlevels; + + /* + * In the early days of rmap+reflink, we always set the rmap maxlevels + * to 9 even if the AG was small enough that it would never grow to + * that height. Transaction reservation sizes influence the minimum + * log size calculation, which influences the size of the log that mkfs + * creates. Use the old value here to ensure that newly formatted + * small filesystems will mount on older kernels. + */ + if (xfs_has_rmapbt(mp) && xfs_has_reflink(mp)) + mp->m_rmap_maxlevels = XFS_OLD_REFLINK_RMAP_MAXLEVELS; + /* * The following transactions are logged in physical format and * require a permanent reservation on space. @@ -916,4 +929,7 @@ xfs_trans_resv_calc( resp->tr_clearagi.tr_logres = xfs_calc_clear_agi_bucket_reservation(mp); resp->tr_growrtzero.tr_logres = xfs_calc_growrtzero_reservation(mp); resp->tr_growrtfree.tr_logres = xfs_calc_growrtfree_reservation(mp); + + /* Put everything back the way it was. This goes at the end. */ + mp->m_rmap_maxlevels = rmap_maxlevels; } diff --git a/fs/xfs/libxfs/xfs_trans_space.h b/fs/xfs/libxfs/xfs_trans_space.h index 50332be34388..87b31c69a773 100644 --- a/fs/xfs/libxfs/xfs_trans_space.h +++ b/fs/xfs/libxfs/xfs_trans_space.h @@ -17,6 +17,13 @@ /* Adding one rmap could split every level up to the top of the tree. */ #define XFS_RMAPADD_SPACE_RES(mp) ((mp)->m_rmap_maxlevels) +/* + * Note that we historically set m_rmap_maxlevels to 9 when reflink is enabled, + * so we must preserve this behavior to avoid changing the transaction space + * reservations and minimum log size calculations for existing filesystems. + */ +#define XFS_OLD_REFLINK_RMAP_MAXLEVELS 9 + /* Blocks we might need to add "b" rmaps to a tree. */ #define XFS_NRMAPADD_SPACE_RES(mp, b)\ (((b + XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp) - 1) / \ @@ -74,7 +81,7 @@ #define XFS_DIOSTRAT_SPACE_RES(mp, v) \ (XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK) + (v)) #define XFS_GROWFS_SPACE_RES(mp) \ - (2 * (mp)->m_ag_maxlevels) + (2 * (mp)->m_alloc_maxlevels) #define XFS_GROWFSRT_SPACE_RES(mp,b) \ ((b) + XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK)) #define XFS_LINK_SPACE_RES(mp,nl) \ diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c index ae3c9f6e2c69..bed798792226 100644 --- a/fs/xfs/scrub/agheader.c +++ b/fs/xfs/scrub/agheader.c @@ -555,11 +555,11 @@ xchk_agf( xchk_block_set_corrupt(sc, sc->sa.agf_bp); level = be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]); - if (level <= 0 || level > XFS_BTREE_MAXLEVELS) + if (level <= 0 || level > mp->m_alloc_maxlevels) xchk_block_set_corrupt(sc, sc->sa.agf_bp); level = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]); - if (level <= 0 || level > XFS_BTREE_MAXLEVELS) + if (level <= 0 || level > mp->m_alloc_maxlevels) xchk_block_set_corrupt(sc, sc->sa.agf_bp); if (xfs_has_rmapbt(mp)) { @@ -568,7 +568,7 @@ xchk_agf( xchk_block_set_corrupt(sc, sc->sa.agf_bp); level = be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]); - if (level <= 0 || level > XFS_BTREE_MAXLEVELS) + if (level <= 0 || level > mp->m_rmap_maxlevels) xchk_block_set_corrupt(sc, sc->sa.agf_bp); } @@ -578,7 +578,7 @@ xchk_agf( xchk_block_set_corrupt(sc, sc->sa.agf_bp); level = be32_to_cpu(agf->agf_refcount_level); - if (level <= 0 || level > XFS_BTREE_MAXLEVELS) + if (level <= 0 || level > mp->m_refc_maxlevels) xchk_block_set_corrupt(sc, sc->sa.agf_bp); } @@ -850,6 +850,7 @@ xchk_agi( struct xfs_mount *mp = sc->mp; struct xfs_agi *agi; struct xfs_perag *pag; + struct xfs_ino_geometry *igeo = M_IGEO(sc->mp); xfs_agnumber_t agno = sc->sm->sm_agno; xfs_agblock_t agbno; xfs_agblock_t eoag; @@ -880,7 +881,7 @@ xchk_agi( xchk_block_set_corrupt(sc, sc->sa.agi_bp); level = be32_to_cpu(agi->agi_level); - if (level <= 0 || level > XFS_BTREE_MAXLEVELS) + if (level <= 0 || level > igeo->inobt_maxlevels) xchk_block_set_corrupt(sc, sc->sa.agi_bp); if (xfs_has_finobt(mp)) { @@ -889,7 +890,7 @@ xchk_agi( xchk_block_set_corrupt(sc, sc->sa.agi_bp); level = be32_to_cpu(agi->agi_free_level); - if (level <= 0 || level > XFS_BTREE_MAXLEVELS) + if (level <= 0 || level > igeo->inobt_maxlevels) xchk_block_set_corrupt(sc, sc->sa.agi_bp); } diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c index 0f8deee66f15..d7bfed52f4cd 100644 --- a/fs/xfs/scrub/agheader_repair.c +++ b/fs/xfs/scrub/agheader_repair.c @@ -122,7 +122,7 @@ xrep_check_btree_root( xfs_agnumber_t agno = sc->sm->sm_agno; return xfs_verify_agbno(mp, agno, fab->root) && - fab->height <= XFS_BTREE_MAXLEVELS; + fab->height <= fab->maxlevels; } /* @@ -339,18 +339,22 @@ xrep_agf( [XREP_AGF_BNOBT] = { .rmap_owner = XFS_RMAP_OWN_AG, .buf_ops = &xfs_bnobt_buf_ops, + .maxlevels = sc->mp->m_alloc_maxlevels, }, [XREP_AGF_CNTBT] = { .rmap_owner = XFS_RMAP_OWN_AG, .buf_ops = &xfs_cntbt_buf_ops, + .maxlevels = sc->mp->m_alloc_maxlevels, }, [XREP_AGF_RMAPBT] = { .rmap_owner = XFS_RMAP_OWN_AG, .buf_ops = &xfs_rmapbt_buf_ops, + .maxlevels = sc->mp->m_rmap_maxlevels, }, [XREP_AGF_REFCOUNTBT] = { .rmap_owner = XFS_RMAP_OWN_REFC, .buf_ops = &xfs_refcountbt_buf_ops, + .maxlevels = sc->mp->m_refc_maxlevels, }, [XREP_AGF_END] = { .buf_ops = NULL, @@ -881,10 +885,12 @@ xrep_agi( [XREP_AGI_INOBT] = { .rmap_owner = XFS_RMAP_OWN_INOBT, .buf_ops = &xfs_inobt_buf_ops, + .maxlevels = M_IGEO(sc->mp)->inobt_maxlevels, }, [XREP_AGI_FINOBT] = { .rmap_owner = XFS_RMAP_OWN_INOBT, .buf_ops = &xfs_finobt_buf_ops, + .maxlevels = M_IGEO(sc->mp)->inobt_maxlevels, }, [XREP_AGI_END] = { .buf_ops = NULL diff --git a/fs/xfs/scrub/bitmap.c b/fs/xfs/scrub/bitmap.c index d6d24c866bc4..b89bf9de9b1c 100644 --- a/fs/xfs/scrub/bitmap.c +++ b/fs/xfs/scrub/bitmap.c @@ -222,21 +222,21 @@ out: * 1 2 3 * * Pretend for this example that each leaf block has 100 btree records. For - * the first btree record, we'll observe that bc_ptrs[0] == 1, so we record - * that we saw block 1. Then we observe that bc_ptrs[1] == 1, so we record - * block 4. The list is [1, 4]. + * the first btree record, we'll observe that bc_levels[0].ptr == 1, so we + * record that we saw block 1. Then we observe that bc_levels[1].ptr == 1, so + * we record block 4. The list is [1, 4]. * - * For the second btree record, we see that bc_ptrs[0] == 2, so we exit the - * loop. The list remains [1, 4]. + * For the second btree record, we see that bc_levels[0].ptr == 2, so we exit + * the loop. The list remains [1, 4]. * * For the 101st btree record, we've moved onto leaf block 2. Now - * bc_ptrs[0] == 1 again, so we record that we saw block 2. We see that - * bc_ptrs[1] == 2, so we exit the loop. The list is now [1, 4, 2]. + * bc_levels[0].ptr == 1 again, so we record that we saw block 2. We see that + * bc_levels[1].ptr == 2, so we exit the loop. The list is now [1, 4, 2]. * - * For the 102nd record, bc_ptrs[0] == 2, so we continue. + * For the 102nd record, bc_levels[0].ptr == 2, so we continue. * - * For the 201st record, we've moved on to leaf block 3. bc_ptrs[0] == 1, so - * we add 3 to the list. Now it is [1, 4, 2, 3]. + * For the 201st record, we've moved on to leaf block 3. + * bc_levels[0].ptr == 1, so we add 3 to the list. Now it is [1, 4, 2, 3]. * * For the 300th record we just exit, with the list being [1, 4, 2, 3]. */ @@ -256,7 +256,7 @@ xbitmap_set_btcur_path( int i; int error; - for (i = 0; i < cur->bc_nlevels && cur->bc_ptrs[i] == 1; i++) { + for (i = 0; i < cur->bc_nlevels && cur->bc_levels[i].ptr == 1; i++) { xfs_btree_get_block(cur, i, &bp); if (!bp) continue; diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index 017da9ceaee9..a4cbbc346f60 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -402,7 +402,7 @@ xchk_bmapbt_rec( * the root since the verifiers don't do that. */ if (xfs_has_crc(bs->cur->bc_mp) && - bs->cur->bc_ptrs[0] == 1) { + bs->cur->bc_levels[0].ptr == 1) { for (i = 0; i < bs->cur->bc_nlevels - 1; i++) { block = xfs_btree_get_block(bs->cur, i, &bp); owner = be64_to_cpu(block->bb_u.l.bb_owner); diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c index eccb855dc904..39dd46f038fe 100644 --- a/fs/xfs/scrub/btree.c +++ b/fs/xfs/scrub/btree.c @@ -136,14 +136,14 @@ xchk_btree_rec( struct xfs_buf *bp; block = xfs_btree_get_block(cur, 0, &bp); - rec = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block); + rec = xfs_btree_rec_addr(cur, cur->bc_levels[0].ptr, block); trace_xchk_btree_rec(bs->sc, cur, 0); /* If this isn't the first record, are they in order? */ - if (!bs->firstrec && !cur->bc_ops->recs_inorder(cur, &bs->lastrec, rec)) + if (cur->bc_levels[0].ptr > 1 && + !cur->bc_ops->recs_inorder(cur, &bs->lastrec, rec)) xchk_btree_set_corrupt(bs->sc, cur, 0); - bs->firstrec = false; memcpy(&bs->lastrec, rec, cur->bc_ops->rec_len); if (cur->bc_nlevels == 1) @@ -152,7 +152,7 @@ xchk_btree_rec( /* Is this at least as large as the parent low key? */ cur->bc_ops->init_key_from_rec(&key, rec); keyblock = xfs_btree_get_block(cur, 1, &bp); - keyp = xfs_btree_key_addr(cur, cur->bc_ptrs[1], keyblock); + keyp = xfs_btree_key_addr(cur, cur->bc_levels[1].ptr, keyblock); if (cur->bc_ops->diff_two_keys(cur, &key, keyp) < 0) xchk_btree_set_corrupt(bs->sc, cur, 1); @@ -161,7 +161,7 @@ xchk_btree_rec( /* Is this no larger than the parent high key? */ cur->bc_ops->init_high_key_from_rec(&hkey, rec); - keyp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[1], keyblock); + keyp = xfs_btree_high_key_addr(cur, cur->bc_levels[1].ptr, keyblock); if (cur->bc_ops->diff_two_keys(cur, keyp, &hkey) < 0) xchk_btree_set_corrupt(bs->sc, cur, 1); } @@ -183,23 +183,22 @@ xchk_btree_key( struct xfs_buf *bp; block = xfs_btree_get_block(cur, level, &bp); - key = xfs_btree_key_addr(cur, cur->bc_ptrs[level], block); + key = xfs_btree_key_addr(cur, cur->bc_levels[level].ptr, block); trace_xchk_btree_key(bs->sc, cur, level); /* If this isn't the first key, are they in order? */ - if (!bs->firstkey[level] && - !cur->bc_ops->keys_inorder(cur, &bs->lastkey[level], key)) + if (cur->bc_levels[level].ptr > 1 && + !cur->bc_ops->keys_inorder(cur, &bs->lastkey[level - 1], key)) xchk_btree_set_corrupt(bs->sc, cur, level); - bs->firstkey[level] = false; - memcpy(&bs->lastkey[level], key, cur->bc_ops->key_len); + memcpy(&bs->lastkey[level - 1], key, cur->bc_ops->key_len); if (level + 1 >= cur->bc_nlevels) return; /* Is this at least as large as the parent low key? */ keyblock = xfs_btree_get_block(cur, level + 1, &bp); - keyp = xfs_btree_key_addr(cur, cur->bc_ptrs[level + 1], keyblock); + keyp = xfs_btree_key_addr(cur, cur->bc_levels[level + 1].ptr, keyblock); if (cur->bc_ops->diff_two_keys(cur, key, keyp) < 0) xchk_btree_set_corrupt(bs->sc, cur, level); @@ -207,8 +206,9 @@ xchk_btree_key( return; /* Is this no larger than the parent high key? */ - key = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level], block); - keyp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level + 1], keyblock); + key = xfs_btree_high_key_addr(cur, cur->bc_levels[level].ptr, block); + keyp = xfs_btree_high_key_addr(cur, cur->bc_levels[level + 1].ptr, + keyblock); if (cur->bc_ops->diff_two_keys(cur, keyp, key) < 0) xchk_btree_set_corrupt(bs->sc, cur, level); } @@ -291,7 +291,7 @@ xchk_btree_block_check_sibling( /* Compare upper level pointer to sibling pointer. */ pblock = xfs_btree_get_block(ncur, level + 1, &pbp); - pp = xfs_btree_ptr_addr(ncur, ncur->bc_ptrs[level + 1], pblock); + pp = xfs_btree_ptr_addr(ncur, ncur->bc_levels[level + 1].ptr, pblock); if (!xchk_btree_ptr_ok(bs, level + 1, pp)) goto out; if (pbp) @@ -596,7 +596,7 @@ xchk_btree_block_keys( /* Obtain the parent's copy of the keys for this block. */ parent_block = xfs_btree_get_block(cur, level + 1, &bp); - parent_keys = xfs_btree_key_addr(cur, cur->bc_ptrs[level + 1], + parent_keys = xfs_btree_key_addr(cur, cur->bc_levels[level + 1].ptr, parent_block); if (cur->bc_ops->diff_two_keys(cur, &block_keys, parent_keys) != 0) @@ -607,7 +607,7 @@ xchk_btree_block_keys( /* Get high keys */ high_bk = xfs_btree_high_key_from_key(cur, &block_keys); - high_pk = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level + 1], + high_pk = xfs_btree_high_key_addr(cur, cur->bc_levels[level + 1].ptr, parent_block); if (cur->bc_ops->diff_two_keys(cur, high_bk, high_pk) != 0) @@ -627,35 +627,39 @@ xchk_btree( const struct xfs_owner_info *oinfo, void *private) { - struct xchk_btree bs = { - .cur = cur, - .scrub_rec = scrub_fn, - .oinfo = oinfo, - .firstrec = true, - .private = private, - .sc = sc, - }; union xfs_btree_ptr ptr; + struct xchk_btree *bs; union xfs_btree_ptr *pp; union xfs_btree_rec *recp; struct xfs_btree_block *block; - int level; struct xfs_buf *bp; struct check_owner *co; struct check_owner *n; - int i; + size_t cur_sz; + int level; int error = 0; - /* Initialize scrub state */ - for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) - bs.firstkey[i] = true; - INIT_LIST_HEAD(&bs.to_check); - - /* Don't try to check a tree with a height we can't handle. */ - if (cur->bc_nlevels > XFS_BTREE_MAXLEVELS) { + /* + * Allocate the btree scrub context from the heap, because this + * structure can get rather large. Don't let a caller feed us a + * totally absurd size. + */ + cur_sz = xchk_btree_sizeof(cur->bc_nlevels); + if (cur_sz > PAGE_SIZE) { xchk_btree_set_corrupt(sc, cur, 0); - goto out; + return 0; } + bs = kmem_zalloc(cur_sz, KM_NOFS | KM_MAYFAIL); + if (!bs) + return -ENOMEM; + bs->cur = cur; + bs->scrub_rec = scrub_fn; + bs->oinfo = oinfo; + bs->private = private; + bs->sc = sc; + + /* Initialize scrub state */ + INIT_LIST_HEAD(&bs->to_check); /* * Load the root of the btree. The helper function absorbs @@ -663,79 +667,82 @@ xchk_btree( */ level = cur->bc_nlevels - 1; cur->bc_ops->init_ptr_from_cur(cur, &ptr); - if (!xchk_btree_ptr_ok(&bs, cur->bc_nlevels, &ptr)) + if (!xchk_btree_ptr_ok(bs, cur->bc_nlevels, &ptr)) goto out; - error = xchk_btree_get_block(&bs, level, &ptr, &block, &bp); + error = xchk_btree_get_block(bs, level, &ptr, &block, &bp); if (error || !block) goto out; - cur->bc_ptrs[level] = 1; + cur->bc_levels[level].ptr = 1; while (level < cur->bc_nlevels) { block = xfs_btree_get_block(cur, level, &bp); if (level == 0) { /* End of leaf, pop back towards the root. */ - if (cur->bc_ptrs[level] > + if (cur->bc_levels[level].ptr > be16_to_cpu(block->bb_numrecs)) { - xchk_btree_block_keys(&bs, level, block); + xchk_btree_block_keys(bs, level, block); if (level < cur->bc_nlevels - 1) - cur->bc_ptrs[level + 1]++; + cur->bc_levels[level + 1].ptr++; level++; continue; } /* Records in order for scrub? */ - xchk_btree_rec(&bs); + xchk_btree_rec(bs); /* Call out to the record checker. */ - recp = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block); - error = bs.scrub_rec(&bs, recp); + recp = xfs_btree_rec_addr(cur, cur->bc_levels[0].ptr, + block); + error = bs->scrub_rec(bs, recp); if (error) break; if (xchk_should_terminate(sc, &error) || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) break; - cur->bc_ptrs[level]++; + cur->bc_levels[level].ptr++; continue; } /* End of node, pop back towards the root. */ - if (cur->bc_ptrs[level] > be16_to_cpu(block->bb_numrecs)) { - xchk_btree_block_keys(&bs, level, block); + if (cur->bc_levels[level].ptr > + be16_to_cpu(block->bb_numrecs)) { + xchk_btree_block_keys(bs, level, block); if (level < cur->bc_nlevels - 1) - cur->bc_ptrs[level + 1]++; + cur->bc_levels[level + 1].ptr++; level++; continue; } /* Keys in order for scrub? */ - xchk_btree_key(&bs, level); + xchk_btree_key(bs, level); /* Drill another level deeper. */ - pp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[level], block); - if (!xchk_btree_ptr_ok(&bs, level, pp)) { - cur->bc_ptrs[level]++; + pp = xfs_btree_ptr_addr(cur, cur->bc_levels[level].ptr, block); + if (!xchk_btree_ptr_ok(bs, level, pp)) { + cur->bc_levels[level].ptr++; continue; } level--; - error = xchk_btree_get_block(&bs, level, pp, &block, &bp); + error = xchk_btree_get_block(bs, level, pp, &block, &bp); if (error || !block) goto out; - cur->bc_ptrs[level] = 1; + cur->bc_levels[level].ptr = 1; } out: /* Process deferred owner checks on btree blocks. */ - list_for_each_entry_safe(co, n, &bs.to_check, list) { - if (!error && bs.cur) - error = xchk_btree_check_block_owner(&bs, - co->level, co->daddr); + list_for_each_entry_safe(co, n, &bs->to_check, list) { + if (!error && bs->cur) + error = xchk_btree_check_block_owner(bs, co->level, + co->daddr); list_del(&co->list); kmem_free(co); } + kmem_free(bs); return error; } diff --git a/fs/xfs/scrub/btree.h b/fs/xfs/scrub/btree.h index b7d2fc01fbf9..da61a53a0b61 100644 --- a/fs/xfs/scrub/btree.h +++ b/fs/xfs/scrub/btree.h @@ -39,11 +39,22 @@ struct xchk_btree { /* internal scrub state */ union xfs_btree_rec lastrec; - bool firstrec; - union xfs_btree_key lastkey[XFS_BTREE_MAXLEVELS]; - bool firstkey[XFS_BTREE_MAXLEVELS]; struct list_head to_check; + + /* this element must come last! */ + union xfs_btree_key lastkey[]; }; + +/* + * Calculate the size of a xchk_btree structure. There are nlevels-1 slots for + * keys because we track leaf records separately in lastrec. + */ +static inline size_t +xchk_btree_sizeof(unsigned int nlevels) +{ + return struct_size((struct xchk_btree *)NULL, lastkey, nlevels - 1); +} + int xchk_btree(struct xfs_scrub *sc, struct xfs_btree_cur *cur, xchk_btree_rec_fn scrub_fn, const struct xfs_owner_info *oinfo, void *private); diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c index 8a52514bc1ff..b962cfbbd92b 100644 --- a/fs/xfs/scrub/dabtree.c +++ b/fs/xfs/scrub/dabtree.c @@ -473,7 +473,7 @@ xchk_da_btree( xchk_da_btree_rec_fn scrub_fn, void *private) { - struct xchk_da_btree ds = {}; + struct xchk_da_btree *ds; struct xfs_mount *mp = sc->mp; struct xfs_da_state_blk *blks; struct xfs_da_node_entry *key; @@ -486,32 +486,35 @@ xchk_da_btree( return 0; /* Set up initial da state. */ - ds.dargs.dp = sc->ip; - ds.dargs.whichfork = whichfork; - ds.dargs.trans = sc->tp; - ds.dargs.op_flags = XFS_DA_OP_OKNOENT; - ds.state = xfs_da_state_alloc(&ds.dargs); - ds.sc = sc; - ds.private = private; + ds = kmem_zalloc(sizeof(struct xchk_da_btree), KM_NOFS | KM_MAYFAIL); + if (!ds) + return -ENOMEM; + ds->dargs.dp = sc->ip; + ds->dargs.whichfork = whichfork; + ds->dargs.trans = sc->tp; + ds->dargs.op_flags = XFS_DA_OP_OKNOENT; + ds->state = xfs_da_state_alloc(&ds->dargs); + ds->sc = sc; + ds->private = private; if (whichfork == XFS_ATTR_FORK) { - ds.dargs.geo = mp->m_attr_geo; - ds.lowest = 0; - ds.highest = 0; + ds->dargs.geo = mp->m_attr_geo; + ds->lowest = 0; + ds->highest = 0; } else { - ds.dargs.geo = mp->m_dir_geo; - ds.lowest = ds.dargs.geo->leafblk; - ds.highest = ds.dargs.geo->freeblk; + ds->dargs.geo = mp->m_dir_geo; + ds->lowest = ds->dargs.geo->leafblk; + ds->highest = ds->dargs.geo->freeblk; } - blkno = ds.lowest; + blkno = ds->lowest; level = 0; /* Find the root of the da tree, if present. */ - blks = ds.state->path.blk; - error = xchk_da_btree_block(&ds, level, blkno); + blks = ds->state->path.blk; + error = xchk_da_btree_block(ds, level, blkno); if (error) goto out_state; /* - * We didn't find a block at ds.lowest, which means that there's + * We didn't find a block at ds->lowest, which means that there's * no LEAF1/LEAFN tree (at least not where it's supposed to be), * so jump out now. */ @@ -523,16 +526,16 @@ xchk_da_btree( /* Handle leaf block. */ if (blks[level].magic != XFS_DA_NODE_MAGIC) { /* End of leaf, pop back towards the root. */ - if (blks[level].index >= ds.maxrecs[level]) { + if (blks[level].index >= ds->maxrecs[level]) { if (level > 0) blks[level - 1].index++; - ds.tree_level++; + ds->tree_level++; level--; continue; } /* Dispatch record scrubbing. */ - error = scrub_fn(&ds, level); + error = scrub_fn(ds, level); if (error) break; if (xchk_should_terminate(sc, &error) || @@ -545,17 +548,17 @@ xchk_da_btree( /* End of node, pop back towards the root. */ - if (blks[level].index >= ds.maxrecs[level]) { + if (blks[level].index >= ds->maxrecs[level]) { if (level > 0) blks[level - 1].index++; - ds.tree_level++; + ds->tree_level++; level--; continue; } /* Hashes in order for scrub? */ - key = xchk_da_btree_node_entry(&ds, level); - error = xchk_da_btree_hash(&ds, level, &key->hashval); + key = xchk_da_btree_node_entry(ds, level); + error = xchk_da_btree_hash(ds, level, &key->hashval); if (error) goto out; @@ -564,11 +567,11 @@ xchk_da_btree( level++; if (level >= XFS_DA_NODE_MAXDEPTH) { /* Too deep! */ - xchk_da_set_corrupt(&ds, level - 1); + xchk_da_set_corrupt(ds, level - 1); break; } - ds.tree_level--; - error = xchk_da_btree_block(&ds, level, blkno); + ds->tree_level--; + error = xchk_da_btree_block(ds, level, blkno); if (error) goto out; if (blks[level].bp == NULL) @@ -587,6 +590,7 @@ out: } out_state: - xfs_da_state_free(ds.state); + xfs_da_state_free(ds->state); + kmem_free(ds); return error; } diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h index 3bb152d52a07..840f74ec431c 100644 --- a/fs/xfs/scrub/repair.h +++ b/fs/xfs/scrub/repair.h @@ -44,6 +44,9 @@ struct xrep_find_ag_btree { /* in: buffer ops */ const struct xfs_buf_ops *buf_ops; + /* in: maximum btree height */ + unsigned int maxlevels; + /* out: the highest btree block found and the tree height */ xfs_agblock_t root; unsigned int height; diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index 51e4c61916d2..8d528d35b725 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -461,15 +461,10 @@ xfs_scrub_metadata( struct file *file, struct xfs_scrub_metadata *sm) { - struct xfs_scrub sc = { - .file = file, - .sm = sm, - }; + struct xfs_scrub *sc; struct xfs_mount *mp = XFS_I(file_inode(file))->i_mount; int error = 0; - sc.mp = mp; - BUILD_BUG_ON(sizeof(meta_scrub_ops) != (sizeof(struct xchk_meta_ops) * XFS_SCRUB_TYPE_NR)); @@ -489,59 +484,68 @@ xfs_scrub_metadata( xchk_experimental_warning(mp); - sc.ops = &meta_scrub_ops[sm->sm_type]; - sc.sick_mask = xchk_health_mask_for_scrub_type(sm->sm_type); + sc = kmem_zalloc(sizeof(struct xfs_scrub), KM_NOFS | KM_MAYFAIL); + if (!sc) { + error = -ENOMEM; + goto out; + } + + sc->mp = mp; + sc->file = file; + sc->sm = sm; + sc->ops = &meta_scrub_ops[sm->sm_type]; + sc->sick_mask = xchk_health_mask_for_scrub_type(sm->sm_type); retry_op: /* * When repairs are allowed, prevent freezing or readonly remount while * scrub is running with a real transaction. */ if (sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) { - error = mnt_want_write_file(sc.file); + error = mnt_want_write_file(sc->file); if (error) - goto out; + goto out_sc; } /* Set up for the operation. */ - error = sc.ops->setup(&sc); + error = sc->ops->setup(sc); if (error) goto out_teardown; /* Scrub for errors. */ - error = sc.ops->scrub(&sc); - if (!(sc.flags & XCHK_TRY_HARDER) && error == -EDEADLOCK) { + error = sc->ops->scrub(sc); + if (!(sc->flags & XCHK_TRY_HARDER) && error == -EDEADLOCK) { /* * Scrubbers return -EDEADLOCK to mean 'try harder'. * Tear down everything we hold, then set up again with * preparation for worst-case scenarios. */ - error = xchk_teardown(&sc, 0); + error = xchk_teardown(sc, 0); if (error) - goto out; - sc.flags |= XCHK_TRY_HARDER; + goto out_sc; + sc->flags |= XCHK_TRY_HARDER; goto retry_op; } else if (error || (sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE)) goto out_teardown; - xchk_update_health(&sc); + xchk_update_health(sc); - if ((sc.sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) && - !(sc.flags & XREP_ALREADY_FIXED)) { + if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) && + !(sc->flags & XREP_ALREADY_FIXED)) { bool needs_fix; /* Let debug users force us into the repair routines. */ if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR)) - sc.sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; - needs_fix = (sc.sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT | - XFS_SCRUB_OFLAG_XCORRUPT | - XFS_SCRUB_OFLAG_PREEN)); + needs_fix = (sc->sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT | + XFS_SCRUB_OFLAG_XCORRUPT | + XFS_SCRUB_OFLAG_PREEN)); /* * If userspace asked for a repair but it wasn't necessary, * report that back to userspace. */ if (!needs_fix) { - sc.sm->sm_flags |= XFS_SCRUB_OFLAG_NO_REPAIR_NEEDED; + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_NO_REPAIR_NEEDED; goto out_nofix; } @@ -549,26 +553,28 @@ retry_op: * If it's broken, userspace wants us to fix it, and we haven't * already tried to fix it, then attempt a repair. */ - error = xrep_attempt(&sc); + error = xrep_attempt(sc); if (error == -EAGAIN) { /* * Either the repair function succeeded or it couldn't * get all the resources it needs; either way, we go * back to the beginning and call the scrub function. */ - error = xchk_teardown(&sc, 0); + error = xchk_teardown(sc, 0); if (error) { xrep_failure(mp); - goto out; + goto out_sc; } goto retry_op; } } out_nofix: - xchk_postmortem(&sc); + xchk_postmortem(sc); out_teardown: - error = xchk_teardown(&sc, error); + error = xchk_teardown(sc, error); +out_sc: + kmem_free(sc); out: trace_xchk_done(XFS_I(file_inode(file)), sm, error); if (error == -EFSCORRUPTED || error == -EFSBADCRC) { diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c index c0ef53fe6611..b5f94676c37c 100644 --- a/fs/xfs/scrub/trace.c +++ b/fs/xfs/scrub/trace.c @@ -21,13 +21,14 @@ xchk_btree_cur_fsbno( struct xfs_btree_cur *cur, int level) { - if (level < cur->bc_nlevels && cur->bc_bufs[level]) + if (level < cur->bc_nlevels && cur->bc_levels[level].bp) return XFS_DADDR_TO_FSB(cur->bc_mp, - xfs_buf_daddr(cur->bc_bufs[level])); - if (level == cur->bc_nlevels - 1 && cur->bc_flags & XFS_BTREE_LONG_PTRS) + xfs_buf_daddr(cur->bc_levels[level].bp)); + + if (level == cur->bc_nlevels - 1 && + (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)) return XFS_INO_TO_FSB(cur->bc_mp, cur->bc_ino.ip->i_ino); - if (!(cur->bc_flags & XFS_BTREE_LONG_PTRS)) - return XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_ag.pag->pag_agno, 0); + return NULLFSBLOCK; } diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index a7bbb84f91a7..93ece6df02e3 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -348,7 +348,7 @@ TRACE_EVENT(xchk_btree_op_error, __entry->level = level; __entry->agno = XFS_FSB_TO_AGNO(cur->bc_mp, fsbno); __entry->bno = XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno); - __entry->ptr = cur->bc_ptrs[level]; + __entry->ptr = cur->bc_levels[level].ptr; __entry->error = error; __entry->ret_ip = ret_ip; ), @@ -389,7 +389,7 @@ TRACE_EVENT(xchk_ifork_btree_op_error, __entry->type = sc->sm->sm_type; __entry->btnum = cur->bc_btnum; __entry->level = level; - __entry->ptr = cur->bc_ptrs[level]; + __entry->ptr = cur->bc_levels[level].ptr; __entry->agno = XFS_FSB_TO_AGNO(cur->bc_mp, fsbno); __entry->bno = XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno); __entry->error = error; @@ -431,7 +431,7 @@ TRACE_EVENT(xchk_btree_error, __entry->level = level; __entry->agno = XFS_FSB_TO_AGNO(cur->bc_mp, fsbno); __entry->bno = XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno); - __entry->ptr = cur->bc_ptrs[level]; + __entry->ptr = cur->bc_levels[level].ptr; __entry->ret_ip = ret_ip; ), TP_printk("dev %d:%d type %s btree %s level %d ptr %d agno 0x%x agbno 0x%x ret_ip %pS", @@ -471,7 +471,7 @@ TRACE_EVENT(xchk_ifork_btree_error, __entry->level = level; __entry->agno = XFS_FSB_TO_AGNO(cur->bc_mp, fsbno); __entry->bno = XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno); - __entry->ptr = cur->bc_ptrs[level]; + __entry->ptr = cur->bc_levels[level].ptr; __entry->ret_ip = ret_ip; ), TP_printk("dev %d:%d ino 0x%llx fork %s type %s btree %s level %d ptr %d agno 0x%x agbno 0x%x ret_ip %pS", @@ -511,7 +511,7 @@ DECLARE_EVENT_CLASS(xchk_sbtree_class, __entry->bno = XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno); __entry->level = level; __entry->nlevels = cur->bc_nlevels; - __entry->ptr = cur->bc_ptrs[level]; + __entry->ptr = cur->bc_levels[level].ptr; ), TP_printk("dev %d:%d type %s btree %s agno 0x%x agbno 0x%x level %d nlevels %d ptr %d", MAJOR(__entry->dev), MINOR(__entry->dev), diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 34fc6148032a..c8c15c3c3147 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -82,6 +82,7 @@ xfs_end_ioend( struct iomap_ioend *ioend) { struct xfs_inode *ip = XFS_I(ioend->io_inode); + struct xfs_mount *mp = ip->i_mount; xfs_off_t offset = ioend->io_offset; size_t size = ioend->io_size; unsigned int nofs_flag; @@ -97,18 +98,26 @@ xfs_end_ioend( /* * Just clean up the in-memory structures if the fs has been shut down. */ - if (xfs_is_shutdown(ip->i_mount)) { + if (xfs_is_shutdown(mp)) { error = -EIO; goto done; } /* - * Clean up any COW blocks on an I/O error. + * Clean up all COW blocks and underlying data fork delalloc blocks on + * I/O error. The delalloc punch is required because this ioend was + * mapped to blocks in the COW fork and the associated pages are no + * longer dirty. If we don't remove delalloc blocks here, they become + * stale and can corrupt free space accounting on unmount. */ error = blk_status_to_errno(ioend->io_bio->bi_status); if (unlikely(error)) { - if (ioend->io_flags & IOMAP_F_SHARED) + if (ioend->io_flags & IOMAP_F_SHARED) { xfs_reflink_cancel_cow_range(ip, offset, size, true); + xfs_bmap_punch_delalloc_range(ip, + XFS_B_TO_FSBT(mp, offset), + XFS_B_TO_FSB(mp, size)); + } goto done; } diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c index 2b5da6218977..27265771f247 100644 --- a/fs/xfs/xfs_attr_inactive.c +++ b/fs/xfs/xfs_attr_inactive.c @@ -390,7 +390,7 @@ out_destroy_fork: /* kill the in-core attr fork before we drop the inode lock */ if (dp->i_afp) { xfs_idestroy_fork(dp->i_afp); - kmem_cache_free(xfs_ifork_zone, dp->i_afp); + kmem_cache_free(xfs_ifork_cache, dp->i_afp); dp->i_afp = NULL; } if (lock_mode) diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index 03159970133f..e1f4d7d5a011 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -25,8 +25,8 @@ #include "xfs_log_priv.h" #include "xfs_log_recover.h" -kmem_zone_t *xfs_bui_zone; -kmem_zone_t *xfs_bud_zone; +struct kmem_cache *xfs_bui_cache; +struct kmem_cache *xfs_bud_cache; static const struct xfs_item_ops xfs_bui_item_ops; @@ -39,7 +39,7 @@ STATIC void xfs_bui_item_free( struct xfs_bui_log_item *buip) { - kmem_cache_free(xfs_bui_zone, buip); + kmem_cache_free(xfs_bui_cache, buip); } /* @@ -138,7 +138,7 @@ xfs_bui_init( { struct xfs_bui_log_item *buip; - buip = kmem_cache_zalloc(xfs_bui_zone, GFP_KERNEL | __GFP_NOFAIL); + buip = kmem_cache_zalloc(xfs_bui_cache, GFP_KERNEL | __GFP_NOFAIL); xfs_log_item_init(mp, &buip->bui_item, XFS_LI_BUI, &xfs_bui_item_ops); buip->bui_format.bui_nextents = XFS_BUI_MAX_FAST_EXTENTS; @@ -198,7 +198,7 @@ xfs_bud_item_release( struct xfs_bud_log_item *budp = BUD_ITEM(lip); xfs_bui_release(budp->bud_buip); - kmem_cache_free(xfs_bud_zone, budp); + kmem_cache_free(xfs_bud_cache, budp); } static const struct xfs_item_ops xfs_bud_item_ops = { @@ -215,7 +215,7 @@ xfs_trans_get_bud( { struct xfs_bud_log_item *budp; - budp = kmem_cache_zalloc(xfs_bud_zone, GFP_KERNEL | __GFP_NOFAIL); + budp = kmem_cache_zalloc(xfs_bud_cache, GFP_KERNEL | __GFP_NOFAIL); xfs_log_item_init(tp->t_mountp, &budp->bud_item, XFS_LI_BUD, &xfs_bud_item_ops); budp->bud_buip = buip; @@ -384,7 +384,7 @@ xfs_bmap_update_finish_item( bmap->bi_bmap.br_blockcount = count; return -EAGAIN; } - kmem_free(bmap); + kmem_cache_free(xfs_bmap_intent_cache, bmap); return error; } @@ -404,7 +404,7 @@ xfs_bmap_update_cancel_item( struct xfs_bmap_intent *bmap; bmap = container_of(item, struct xfs_bmap_intent, bi_list); - kmem_free(bmap); + kmem_cache_free(xfs_bmap_intent_cache, bmap); } const struct xfs_defer_op_type xfs_bmap_update_defer_type = { @@ -532,7 +532,7 @@ xfs_bui_item_recover( * Commit transaction, which frees the transaction and saves the inode * for later replay activities. */ - error = xfs_defer_ops_capture_and_commit(tp, ip, capture_list); + error = xfs_defer_ops_capture_and_commit(tp, capture_list); if (error) goto err_unlock; diff --git a/fs/xfs/xfs_bmap_item.h b/fs/xfs/xfs_bmap_item.h index b9be62f8bd52..3fafd3881a0b 100644 --- a/fs/xfs/xfs_bmap_item.h +++ b/fs/xfs/xfs_bmap_item.h @@ -25,7 +25,7 @@ /* kernel only BUI/BUD definitions */ struct xfs_mount; -struct kmem_zone; +struct kmem_cache; /* * Max number of extents in fast allocation path. @@ -65,7 +65,7 @@ struct xfs_bud_log_item { struct xfs_bud_log_format bud_format; }; -extern struct kmem_zone *xfs_bui_zone; -extern struct kmem_zone *xfs_bud_zone; +extern struct kmem_cache *xfs_bui_cache; +extern struct kmem_cache *xfs_bud_cache; #endif /* __XFS_BMAP_ITEM_H__ */ diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 5fa6cd947dd4..631c5a61d89b 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -20,7 +20,7 @@ #include "xfs_error.h" #include "xfs_ag.h" -static kmem_zone_t *xfs_buf_zone; +static struct kmem_cache *xfs_buf_cache; /* * Locking orders @@ -220,7 +220,7 @@ _xfs_buf_alloc( int i; *bpp = NULL; - bp = kmem_cache_zalloc(xfs_buf_zone, GFP_NOFS | __GFP_NOFAIL); + bp = kmem_cache_zalloc(xfs_buf_cache, GFP_NOFS | __GFP_NOFAIL); /* * We don't want certain flags to appear in b_flags unless they are @@ -247,7 +247,7 @@ _xfs_buf_alloc( */ error = xfs_buf_get_maps(bp, nmaps); if (error) { - kmem_cache_free(xfs_buf_zone, bp); + kmem_cache_free(xfs_buf_cache, bp); return error; } @@ -307,7 +307,7 @@ xfs_buf_free( kmem_free(bp->b_addr); xfs_buf_free_maps(bp); - kmem_cache_free(xfs_buf_zone, bp); + kmem_cache_free(xfs_buf_cache, bp); } static int @@ -2258,12 +2258,12 @@ xfs_buf_delwri_pushbuf( int __init xfs_buf_init(void) { - xfs_buf_zone = kmem_cache_create("xfs_buf", sizeof(struct xfs_buf), 0, + xfs_buf_cache = kmem_cache_create("xfs_buf", sizeof(struct xfs_buf), 0, SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); - if (!xfs_buf_zone) + if (!xfs_buf_cache) goto out; return 0; @@ -2275,7 +2275,7 @@ xfs_buf_init(void) void xfs_buf_terminate(void) { - kmem_cache_destroy(xfs_buf_zone); + kmem_cache_destroy(xfs_buf_cache); } void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref) diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index b1ab100c09e1..a7a8e4528881 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -23,7 +23,7 @@ #include "xfs_log.h" -kmem_zone_t *xfs_buf_item_zone; +struct kmem_cache *xfs_buf_item_cache; static inline struct xfs_buf_log_item *BUF_ITEM(struct xfs_log_item *lip) { @@ -804,7 +804,7 @@ xfs_buf_item_init( return 0; } - bip = kmem_cache_zalloc(xfs_buf_item_zone, GFP_KERNEL | __GFP_NOFAIL); + bip = kmem_cache_zalloc(xfs_buf_item_cache, GFP_KERNEL | __GFP_NOFAIL); xfs_log_item_init(mp, &bip->bli_item, XFS_LI_BUF, &xfs_buf_item_ops); bip->bli_buf = bp; @@ -825,7 +825,7 @@ xfs_buf_item_init( map_size = DIV_ROUND_UP(chunks, NBWORD); if (map_size > XFS_BLF_DATAMAP_SIZE) { - kmem_cache_free(xfs_buf_item_zone, bip); + kmem_cache_free(xfs_buf_item_cache, bip); xfs_err(mp, "buffer item dirty bitmap (%u uints) too small to reflect %u bytes!", map_size, @@ -1002,7 +1002,7 @@ xfs_buf_item_free( { xfs_buf_item_free_format(bip); kmem_free(bip->bli_item.li_lv_shadow); - kmem_cache_free(xfs_buf_item_zone, bip); + kmem_cache_free(xfs_buf_item_cache, bip); } /* diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index 50aa0f5ef959..e11e9ef2338f 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h @@ -71,6 +71,6 @@ static inline void xfs_buf_dquot_io_fail(struct xfs_buf *bp) void xfs_buf_iodone(struct xfs_buf *); bool xfs_buf_log_check_iovec(struct xfs_log_iovec *iovec); -extern kmem_zone_t *xfs_buf_item_zone; +extern struct kmem_cache *xfs_buf_item_cache; #endif /* __XFS_BUF_ITEM_H__ */ diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c index a476c7ef5d53..70ca5751b13e 100644 --- a/fs/xfs/xfs_buf_item_recover.c +++ b/fs/xfs/xfs_buf_item_recover.c @@ -603,7 +603,7 @@ xlog_recover_do_inode_buffer( inodes_per_buf = BBTOB(bp->b_length) >> mp->m_sb.sb_inodelog; for (i = 0; i < inodes_per_buf; i++) { next_unlinked_offset = (i * mp->m_sb.sb_inodesize) + - offsetof(xfs_dinode_t, di_next_unlinked); + offsetof(struct xfs_dinode, di_next_unlinked); while (next_unlinked_offset >= (reg_buf_offset + reg_buf_bytes)) { diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index c15d61d47a06..e48ae227bb11 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -38,8 +38,8 @@ * otherwise by the lowest id first, see xfs_dqlock2. */ -struct kmem_zone *xfs_qm_dqtrxzone; -static struct kmem_zone *xfs_qm_dqzone; +struct kmem_cache *xfs_dqtrx_cache; +static struct kmem_cache *xfs_dquot_cache; static struct lock_class_key xfs_dquot_group_class; static struct lock_class_key xfs_dquot_project_class; @@ -57,7 +57,7 @@ xfs_qm_dqdestroy( mutex_destroy(&dqp->q_qlock); XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot); - kmem_cache_free(xfs_qm_dqzone, dqp); + kmem_cache_free(xfs_dquot_cache, dqp); } /* @@ -458,7 +458,7 @@ xfs_dquot_alloc( { struct xfs_dquot *dqp; - dqp = kmem_cache_zalloc(xfs_qm_dqzone, GFP_KERNEL | __GFP_NOFAIL); + dqp = kmem_cache_zalloc(xfs_dquot_cache, GFP_KERNEL | __GFP_NOFAIL); dqp->q_type = type; dqp->q_id = id; @@ -471,7 +471,7 @@ xfs_dquot_alloc( * Offset of dquot in the (fixed sized) dquot chunk. */ dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) * - sizeof(xfs_dqblk_t); + sizeof(struct xfs_dqblk); /* * Because we want to use a counting completion, complete @@ -1363,22 +1363,22 @@ xfs_dqlock2( int __init xfs_qm_init(void) { - xfs_qm_dqzone = kmem_cache_create("xfs_dquot", + xfs_dquot_cache = kmem_cache_create("xfs_dquot", sizeof(struct xfs_dquot), 0, 0, NULL); - if (!xfs_qm_dqzone) + if (!xfs_dquot_cache) goto out; - xfs_qm_dqtrxzone = kmem_cache_create("xfs_dqtrx", + xfs_dqtrx_cache = kmem_cache_create("xfs_dqtrx", sizeof(struct xfs_dquot_acct), 0, 0, NULL); - if (!xfs_qm_dqtrxzone) - goto out_free_dqzone; + if (!xfs_dqtrx_cache) + goto out_free_dquot_cache; return 0; -out_free_dqzone: - kmem_cache_destroy(xfs_qm_dqzone); +out_free_dquot_cache: + kmem_cache_destroy(xfs_dquot_cache); out: return -ENOMEM; } @@ -1386,8 +1386,8 @@ out: void xfs_qm_exit(void) { - kmem_cache_destroy(xfs_qm_dqtrxzone); - kmem_cache_destroy(xfs_qm_dqzone); + kmem_cache_destroy(xfs_dqtrx_cache); + kmem_cache_destroy(xfs_dquot_cache); } /* diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 3f8a0713573a..47ef9c9c5c17 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -25,8 +25,8 @@ #include "xfs_log_priv.h" #include "xfs_log_recover.h" -kmem_zone_t *xfs_efi_zone; -kmem_zone_t *xfs_efd_zone; +struct kmem_cache *xfs_efi_cache; +struct kmem_cache *xfs_efd_cache; static const struct xfs_item_ops xfs_efi_item_ops; @@ -43,7 +43,7 @@ xfs_efi_item_free( if (efip->efi_format.efi_nextents > XFS_EFI_MAX_FAST_EXTENTS) kmem_free(efip); else - kmem_cache_free(xfs_efi_zone, efip); + kmem_cache_free(xfs_efi_cache, efip); } /* @@ -161,7 +161,7 @@ xfs_efi_init( ((nextents - 1) * sizeof(xfs_extent_t))); efip = kmem_zalloc(size, 0); } else { - efip = kmem_cache_zalloc(xfs_efi_zone, + efip = kmem_cache_zalloc(xfs_efi_cache, GFP_KERNEL | __GFP_NOFAIL); } @@ -241,7 +241,7 @@ xfs_efd_item_free(struct xfs_efd_log_item *efdp) if (efdp->efd_format.efd_nextents > XFS_EFD_MAX_FAST_EXTENTS) kmem_free(efdp); else - kmem_cache_free(xfs_efd_zone, efdp); + kmem_cache_free(xfs_efd_cache, efdp); } /* @@ -333,7 +333,7 @@ xfs_trans_get_efd( (nextents - 1) * sizeof(struct xfs_extent), 0); } else { - efdp = kmem_cache_zalloc(xfs_efd_zone, + efdp = kmem_cache_zalloc(xfs_efd_cache, GFP_KERNEL | __GFP_NOFAIL); } @@ -474,15 +474,21 @@ xfs_extent_free_finish_item( struct list_head *item, struct xfs_btree_cur **state) { + struct xfs_owner_info oinfo = { }; struct xfs_extent_free_item *free; int error; free = container_of(item, struct xfs_extent_free_item, xefi_list); + oinfo.oi_owner = free->xefi_owner; + if (free->xefi_flags & XFS_EFI_ATTR_FORK) + oinfo.oi_flags |= XFS_OWNER_INFO_ATTR_FORK; + if (free->xefi_flags & XFS_EFI_BMBT_BLOCK) + oinfo.oi_flags |= XFS_OWNER_INFO_BMBT_BLOCK; error = xfs_trans_free_extent(tp, EFD_ITEM(done), free->xefi_startblock, free->xefi_blockcount, - &free->xefi_oinfo, free->xefi_skip_discard); - kmem_free(free); + &oinfo, free->xefi_flags & XFS_EFI_SKIP_DISCARD); + kmem_cache_free(xfs_extfree_item_cache, free); return error; } @@ -502,7 +508,7 @@ xfs_extent_free_cancel_item( struct xfs_extent_free_item *free; free = container_of(item, struct xfs_extent_free_item, xefi_list); - kmem_free(free); + kmem_cache_free(xfs_extfree_item_cache, free); } const struct xfs_defer_op_type xfs_extent_free_defer_type = { @@ -525,6 +531,7 @@ xfs_agfl_free_finish_item( struct list_head *item, struct xfs_btree_cur **state) { + struct xfs_owner_info oinfo = { }; struct xfs_mount *mp = tp->t_mountp; struct xfs_efd_log_item *efdp = EFD_ITEM(done); struct xfs_extent_free_item *free; @@ -539,13 +546,13 @@ xfs_agfl_free_finish_item( ASSERT(free->xefi_blockcount == 1); agno = XFS_FSB_TO_AGNO(mp, free->xefi_startblock); agbno = XFS_FSB_TO_AGBNO(mp, free->xefi_startblock); + oinfo.oi_owner = free->xefi_owner; trace_xfs_agfl_free_deferred(mp, agno, 0, agbno, free->xefi_blockcount); error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp); if (!error) - error = xfs_free_agfl_block(tp, agno, agbno, agbp, - &free->xefi_oinfo); + error = xfs_free_agfl_block(tp, agno, agbno, agbp, &oinfo); /* * Mark the transaction dirty, even on error. This ensures the @@ -564,7 +571,7 @@ xfs_agfl_free_finish_item( extp->ext_len = free->xefi_blockcount; efdp->efd_next_extent++; - kmem_free(free); + kmem_cache_free(xfs_extfree_item_cache, free); return error; } @@ -637,7 +644,7 @@ xfs_efi_item_recover( } - return xfs_defer_ops_capture_and_commit(tp, NULL, capture_list); + return xfs_defer_ops_capture_and_commit(tp, capture_list); abort_error: xfs_trans_cancel(tp); diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h index cd2860c875bf..186d0f2137f1 100644 --- a/fs/xfs/xfs_extfree_item.h +++ b/fs/xfs/xfs_extfree_item.h @@ -9,7 +9,7 @@ /* kernel only EFI/EFD definitions */ struct xfs_mount; -struct kmem_zone; +struct kmem_cache; /* * Max number of extents in fast allocation path. @@ -69,7 +69,7 @@ struct xfs_efd_log_item { */ #define XFS_EFD_MAX_FAST_EXTENTS 16 -extern struct kmem_zone *xfs_efi_zone; -extern struct kmem_zone *xfs_efd_zone; +extern struct kmem_cache *xfs_efi_cache; +extern struct kmem_cache *xfs_efd_cache; #endif /* __XFS_EXTFREE_ITEM_H__ */ diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index f2210d927481..e1472004170e 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -77,10 +77,10 @@ xfs_inode_alloc( * XXX: If this didn't occur in transactions, we could drop GFP_NOFAIL * and return NULL here on ENOMEM. */ - ip = kmem_cache_alloc(xfs_inode_zone, GFP_KERNEL | __GFP_NOFAIL); + ip = kmem_cache_alloc(xfs_inode_cache, GFP_KERNEL | __GFP_NOFAIL); if (inode_init_always(mp->m_super, VFS_I(ip))) { - kmem_cache_free(xfs_inode_zone, ip); + kmem_cache_free(xfs_inode_cache, ip); return NULL; } @@ -130,11 +130,11 @@ xfs_inode_free_callback( if (ip->i_afp) { xfs_idestroy_fork(ip->i_afp); - kmem_cache_free(xfs_ifork_zone, ip->i_afp); + kmem_cache_free(xfs_ifork_cache, ip->i_afp); } if (ip->i_cowfp) { xfs_idestroy_fork(ip->i_cowfp); - kmem_cache_free(xfs_ifork_zone, ip->i_cowfp); + kmem_cache_free(xfs_ifork_cache, ip->i_cowfp); } if (ip->i_itemp) { ASSERT(!test_bit(XFS_LI_IN_AIL, @@ -143,7 +143,7 @@ xfs_inode_free_callback( ip->i_itemp = NULL; } - kmem_cache_free(xfs_inode_zone, ip); + kmem_cache_free(xfs_inode_cache, ip); } static void diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c index 017904a34c02..508e184e3b8f 100644 --- a/fs/xfs/xfs_icreate_item.c +++ b/fs/xfs/xfs_icreate_item.c @@ -20,7 +20,7 @@ #include "xfs_ialloc.h" #include "xfs_trace.h" -kmem_zone_t *xfs_icreate_zone; /* inode create item zone */ +struct kmem_cache *xfs_icreate_cache; /* inode create item */ static inline struct xfs_icreate_item *ICR_ITEM(struct xfs_log_item *lip) { @@ -63,7 +63,7 @@ STATIC void xfs_icreate_item_release( struct xfs_log_item *lip) { - kmem_cache_free(xfs_icreate_zone, ICR_ITEM(lip)); + kmem_cache_free(xfs_icreate_cache, ICR_ITEM(lip)); } static const struct xfs_item_ops xfs_icreate_item_ops = { @@ -97,7 +97,7 @@ xfs_icreate_log( { struct xfs_icreate_item *icp; - icp = kmem_cache_zalloc(xfs_icreate_zone, GFP_KERNEL | __GFP_NOFAIL); + icp = kmem_cache_zalloc(xfs_icreate_cache, GFP_KERNEL | __GFP_NOFAIL); xfs_log_item_init(tp->t_mountp, &icp->ic_item, XFS_LI_ICREATE, &xfs_icreate_item_ops); diff --git a/fs/xfs/xfs_icreate_item.h b/fs/xfs/xfs_icreate_item.h index a50d0b01e15a..64992823108a 100644 --- a/fs/xfs/xfs_icreate_item.h +++ b/fs/xfs/xfs_icreate_item.h @@ -12,7 +12,7 @@ struct xfs_icreate_item { struct xfs_icreate_log ic_format; }; -extern kmem_zone_t *xfs_icreate_zone; /* inode create item zone */ +extern struct kmem_cache *xfs_icreate_cache; /* inode create item */ void xfs_icreate_log(struct xfs_trans *tp, xfs_agnumber_t agno, xfs_agblock_t agbno, unsigned int count, diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index a4f6f034fb81..64b9bf334806 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -36,7 +36,7 @@ #include "xfs_reflink.h" #include "xfs_ag.h" -kmem_zone_t *xfs_inode_zone; +struct kmem_cache *xfs_inode_cache; /* * Used in xfs_itruncate_extents(). This is the maximum number of extents @@ -564,8 +564,6 @@ xfs_lock_two_inodes( struct xfs_inode *ip1, uint ip1_mode) { - struct xfs_inode *temp; - uint mode_temp; int attempts = 0; struct xfs_log_item *lp; @@ -578,12 +576,8 @@ xfs_lock_two_inodes( ASSERT(ip0->i_ino != ip1->i_ino); if (ip0->i_ino > ip1->i_ino) { - temp = ip0; - ip0 = ip1; - ip1 = temp; - mode_temp = ip0_mode; - ip0_mode = ip1_mode; - ip1_mode = mode_temp; + swap(ip0, ip1); + swap(ip0_mode, ip1_mode); } again: diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index b21b177832d1..e635a3d64cba 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -504,7 +504,7 @@ static inline void xfs_setup_existing_inode(struct xfs_inode *ip) void xfs_irele(struct xfs_inode *ip); -extern struct kmem_zone *xfs_inode_zone; +extern struct kmem_cache *xfs_inode_cache; /* The default CoW extent size hint. */ #define XFS_DEFAULT_COWEXTSZ_HINT 32 diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 0659d19c211e..90d8e591baf8 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -21,7 +21,7 @@ #include <linux/iversion.h> -kmem_zone_t *xfs_ili_zone; /* inode log item zone */ +struct kmem_cache *xfs_ili_cache; /* inode log item */ static inline struct xfs_inode_log_item *INODE_ITEM(struct xfs_log_item *lip) { @@ -672,7 +672,7 @@ xfs_inode_item_init( struct xfs_inode_log_item *iip; ASSERT(ip->i_itemp == NULL); - iip = ip->i_itemp = kmem_cache_zalloc(xfs_ili_zone, + iip = ip->i_itemp = kmem_cache_zalloc(xfs_ili_cache, GFP_KERNEL | __GFP_NOFAIL); iip->ili_inode = ip; @@ -694,7 +694,7 @@ xfs_inode_item_destroy( ip->i_itemp = NULL; kmem_free(iip->ili_item.li_lv_shadow); - kmem_cache_free(xfs_ili_zone, iip); + kmem_cache_free(xfs_ili_cache, iip); } diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h index 403b45ab9aa2..1a302000d604 100644 --- a/fs/xfs/xfs_inode_item.h +++ b/fs/xfs/xfs_inode_item.h @@ -47,6 +47,6 @@ extern void xfs_iflush_abort(struct xfs_inode *); extern int xfs_inode_item_format_convert(xfs_log_iovec_t *, struct xfs_inode_log_format *); -extern struct kmem_zone *xfs_ili_zone; +extern struct kmem_cache *xfs_ili_cache; #endif /* __XFS_INODE_ITEM_H__ */ diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index f6cd2d4aa770..89fec9a18c34 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -21,7 +21,7 @@ #include "xfs_sb.h" #include "xfs_health.h" -kmem_zone_t *xfs_log_ticket_zone; +struct kmem_cache *xfs_log_ticket_cache; /* Local miscellaneous function prototypes */ STATIC struct xlog * @@ -3487,7 +3487,7 @@ xfs_log_ticket_put( { ASSERT(atomic_read(&ticket->t_ref) > 0); if (atomic_dec_and_test(&ticket->t_ref)) - kmem_cache_free(xfs_log_ticket_zone, ticket); + kmem_cache_free(xfs_log_ticket_cache, ticket); } xlog_ticket_t * @@ -3611,7 +3611,7 @@ xlog_ticket_alloc( struct xlog_ticket *tic; int unit_res; - tic = kmem_cache_zalloc(xfs_log_ticket_zone, GFP_NOFS | __GFP_NOFAIL); + tic = kmem_cache_zalloc(xfs_log_ticket_cache, GFP_NOFS | __GFP_NOFAIL); unit_res = xlog_calc_unit_res(log, unit_bytes); diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 844fbeec3545..23103d68423c 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -497,7 +497,7 @@ xlog_recover_cancel(struct xlog *); extern __le32 xlog_cksum(struct xlog *log, struct xlog_rec_header *rhead, char *dp, int size); -extern kmem_zone_t *xfs_log_ticket_zone; +extern struct kmem_cache *xfs_log_ticket_cache; struct xlog_ticket * xlog_ticket_alloc( struct xlog *log, diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 10562ecbd9ea..53366cc0bc9e 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2466,11 +2466,11 @@ xlog_finish_defer_ops( { struct xfs_defer_capture *dfc, *next; struct xfs_trans *tp; - struct xfs_inode *ip; int error = 0; list_for_each_entry_safe(dfc, next, capture_list, dfc_list) { struct xfs_trans_res resv; + struct xfs_defer_resources dres; /* * Create a new transaction reservation from the captured @@ -2494,13 +2494,9 @@ xlog_finish_defer_ops( * from recovering a single intent item. */ list_del_init(&dfc->dfc_list); - xfs_defer_ops_continue(dfc, tp, &ip); - + xfs_defer_ops_continue(dfc, tp, &dres); error = xfs_trans_commit(tp); - if (ip) { - xfs_iunlock(ip, XFS_ILOCK_EXCL); - xfs_irele(ip); - } + xfs_defer_resources_rele(&dres); if (error) return error; } @@ -2520,7 +2516,7 @@ xlog_abort_defer_ops( list_for_each_entry_safe(dfc, next, capture_list, dfc_list) { list_del_init(&dfc->dfc_list); - xfs_defer_ops_release(mp, dfc); + xfs_defer_ops_capture_free(mp, dfc); } } /* diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 06dac09eddbd..359109b6f0d3 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -567,6 +567,18 @@ xfs_mount_setup_inode_geom( xfs_ialloc_setup_geometry(mp); } +/* Compute maximum possible height for per-AG btree types for this fs. */ +static inline void +xfs_agbtree_compute_maxlevels( + struct xfs_mount *mp) +{ + unsigned int levels; + + levels = max(mp->m_alloc_maxlevels, M_IGEO(mp)->inobt_maxlevels); + levels = max(levels, mp->m_rmap_maxlevels); + mp->m_agbtree_maxlevels = max(levels, mp->m_refc_maxlevels); +} + /* * This function does the following on an initial mount of a file system: * - reads the superblock from disk and init the mount struct @@ -638,6 +650,8 @@ xfs_mountfs( xfs_rmapbt_compute_maxlevels(mp); xfs_refcountbt_compute_maxlevels(mp); + xfs_agbtree_compute_maxlevels(mp); + /* * Check if sb_agblocks is aligned at stripe boundary. If sb_agblocks * is NOT aligned turn off m_dalign since allocator alignment is within diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index e091f3b3fa15..00720a02e761 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -128,10 +128,11 @@ typedef struct xfs_mount { uint m_rmap_mnr[2]; /* min rmap btree records */ uint m_refc_mxr[2]; /* max refc btree records */ uint m_refc_mnr[2]; /* min refc btree records */ - uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */ - uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */ + uint m_alloc_maxlevels; /* max alloc btree levels */ + uint m_bm_maxlevels[2]; /* max bmap btree levels */ uint m_rmap_maxlevels; /* max rmap btree levels */ uint m_refc_maxlevels; /* max refcount btree level */ + unsigned int m_agbtree_maxlevels; /* max level of all AG btrees */ xfs_extlen_t m_ag_prealloc_blocks; /* reserved ag blocks */ uint m_alloc_set_aside; /* space we can't use */ uint m_ag_max_usable; /* max space per AG */ diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c index 34c3b16f834f..f85e3b07ab44 100644 --- a/fs/xfs/xfs_mru_cache.c +++ b/fs/xfs/xfs_mru_cache.c @@ -219,7 +219,7 @@ _xfs_mru_cache_list_insert( * When destroying or reaping, all the elements that were migrated to the reap * list need to be deleted. For each element this involves removing it from the * data store, removing it from the reap list, calling the client's free - * function and deleting the element from the element zone. + * function and deleting the element from the element cache. * * We get called holding the mru->lock, which we drop and then reacquire. * Sparse need special help with this to tell it we know what we are doing. diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 5608066d6e53..32ac8d9c8940 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -850,7 +850,7 @@ xfs_qm_reset_dqcounts( */ #ifdef DEBUG j = (int)XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) / - sizeof(xfs_dqblk_t); + sizeof(struct xfs_dqblk); ASSERT(mp->m_quotainfo->qi_dqperchunk == j); #endif dqb = bp->b_addr; diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h index 442a0f97a9d4..5bb12717ea28 100644 --- a/fs/xfs/xfs_qm.h +++ b/fs/xfs/xfs_qm.h @@ -11,7 +11,7 @@ struct xfs_inode; -extern struct kmem_zone *xfs_qm_dqtrxzone; +extern struct kmem_cache *xfs_dqtrx_cache; /* * Number of bmaps that we ask from bmapi when doing a quotacheck. diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index 46904b793bd4..d3da67772d57 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -21,8 +21,8 @@ #include "xfs_log_priv.h" #include "xfs_log_recover.h" -kmem_zone_t *xfs_cui_zone; -kmem_zone_t *xfs_cud_zone; +struct kmem_cache *xfs_cui_cache; +struct kmem_cache *xfs_cud_cache; static const struct xfs_item_ops xfs_cui_item_ops; @@ -38,7 +38,7 @@ xfs_cui_item_free( if (cuip->cui_format.cui_nextents > XFS_CUI_MAX_FAST_EXTENTS) kmem_free(cuip); else - kmem_cache_free(xfs_cui_zone, cuip); + kmem_cache_free(xfs_cui_cache, cuip); } /* @@ -143,7 +143,7 @@ xfs_cui_init( cuip = kmem_zalloc(xfs_cui_log_item_sizeof(nextents), 0); else - cuip = kmem_cache_zalloc(xfs_cui_zone, + cuip = kmem_cache_zalloc(xfs_cui_cache, GFP_KERNEL | __GFP_NOFAIL); xfs_log_item_init(mp, &cuip->cui_item, XFS_LI_CUI, &xfs_cui_item_ops); @@ -204,7 +204,7 @@ xfs_cud_item_release( struct xfs_cud_log_item *cudp = CUD_ITEM(lip); xfs_cui_release(cudp->cud_cuip); - kmem_cache_free(xfs_cud_zone, cudp); + kmem_cache_free(xfs_cud_cache, cudp); } static const struct xfs_item_ops xfs_cud_item_ops = { @@ -221,7 +221,7 @@ xfs_trans_get_cud( { struct xfs_cud_log_item *cudp; - cudp = kmem_cache_zalloc(xfs_cud_zone, GFP_KERNEL | __GFP_NOFAIL); + cudp = kmem_cache_zalloc(xfs_cud_cache, GFP_KERNEL | __GFP_NOFAIL); xfs_log_item_init(tp->t_mountp, &cudp->cud_item, XFS_LI_CUD, &xfs_cud_item_ops); cudp->cud_cuip = cuip; @@ -384,7 +384,7 @@ xfs_refcount_update_finish_item( refc->ri_blockcount = new_aglen; return -EAGAIN; } - kmem_free(refc); + kmem_cache_free(xfs_refcount_intent_cache, refc); return error; } @@ -404,7 +404,7 @@ xfs_refcount_update_cancel_item( struct xfs_refcount_intent *refc; refc = container_of(item, struct xfs_refcount_intent, ri_list); - kmem_free(refc); + kmem_cache_free(xfs_refcount_intent_cache, refc); } const struct xfs_defer_op_type xfs_refcount_update_defer_type = { @@ -557,7 +557,7 @@ xfs_cui_item_recover( } xfs_refcount_finish_one_cleanup(tp, rcur, error); - return xfs_defer_ops_capture_and_commit(tp, NULL, capture_list); + return xfs_defer_ops_capture_and_commit(tp, capture_list); abort_error: xfs_refcount_finish_one_cleanup(tp, rcur, error); diff --git a/fs/xfs/xfs_refcount_item.h b/fs/xfs/xfs_refcount_item.h index f4f2e836540b..eb0ab13682d0 100644 --- a/fs/xfs/xfs_refcount_item.h +++ b/fs/xfs/xfs_refcount_item.h @@ -25,7 +25,7 @@ /* kernel only CUI/CUD definitions */ struct xfs_mount; -struct kmem_zone; +struct kmem_cache; /* * Max number of extents in fast allocation path. @@ -68,7 +68,7 @@ struct xfs_cud_log_item { struct xfs_cud_log_format cud_format; }; -extern struct kmem_zone *xfs_cui_zone; -extern struct kmem_zone *xfs_cud_zone; +extern struct kmem_cache *xfs_cui_cache; +extern struct kmem_cache *xfs_cud_cache; #endif /* __XFS_REFCOUNT_ITEM_H__ */ diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 76355f293488..cb0edb1d68ef 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -484,7 +484,7 @@ xfs_reflink_cancel_cow_blocks( xfs_refcount_free_cow_extent(*tpp, del.br_startblock, del.br_blockcount); - xfs_bmap_add_free(*tpp, del.br_startblock, + xfs_free_extent_later(*tpp, del.br_startblock, del.br_blockcount, NULL); /* Roll the transaction */ diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c index 5f0695980467..c3966b4c58ef 100644 --- a/fs/xfs/xfs_rmap_item.c +++ b/fs/xfs/xfs_rmap_item.c @@ -21,8 +21,8 @@ #include "xfs_log_priv.h" #include "xfs_log_recover.h" -kmem_zone_t *xfs_rui_zone; -kmem_zone_t *xfs_rud_zone; +struct kmem_cache *xfs_rui_cache; +struct kmem_cache *xfs_rud_cache; static const struct xfs_item_ops xfs_rui_item_ops; @@ -38,7 +38,7 @@ xfs_rui_item_free( if (ruip->rui_format.rui_nextents > XFS_RUI_MAX_FAST_EXTENTS) kmem_free(ruip); else - kmem_cache_free(xfs_rui_zone, ruip); + kmem_cache_free(xfs_rui_cache, ruip); } /* @@ -141,7 +141,7 @@ xfs_rui_init( if (nextents > XFS_RUI_MAX_FAST_EXTENTS) ruip = kmem_zalloc(xfs_rui_log_item_sizeof(nextents), 0); else - ruip = kmem_cache_zalloc(xfs_rui_zone, + ruip = kmem_cache_zalloc(xfs_rui_cache, GFP_KERNEL | __GFP_NOFAIL); xfs_log_item_init(mp, &ruip->rui_item, XFS_LI_RUI, &xfs_rui_item_ops); @@ -227,7 +227,7 @@ xfs_rud_item_release( struct xfs_rud_log_item *rudp = RUD_ITEM(lip); xfs_rui_release(rudp->rud_ruip); - kmem_cache_free(xfs_rud_zone, rudp); + kmem_cache_free(xfs_rud_cache, rudp); } static const struct xfs_item_ops xfs_rud_item_ops = { @@ -244,7 +244,7 @@ xfs_trans_get_rud( { struct xfs_rud_log_item *rudp; - rudp = kmem_cache_zalloc(xfs_rud_zone, GFP_KERNEL | __GFP_NOFAIL); + rudp = kmem_cache_zalloc(xfs_rud_cache, GFP_KERNEL | __GFP_NOFAIL); xfs_log_item_init(tp->t_mountp, &rudp->rud_item, XFS_LI_RUD, &xfs_rud_item_ops); rudp->rud_ruip = ruip; @@ -427,7 +427,7 @@ xfs_rmap_update_finish_item( rmap->ri_bmap.br_startoff, rmap->ri_bmap.br_startblock, rmap->ri_bmap.br_blockcount, rmap->ri_bmap.br_state, state); - kmem_free(rmap); + kmem_cache_free(xfs_rmap_intent_cache, rmap); return error; } @@ -447,7 +447,7 @@ xfs_rmap_update_cancel_item( struct xfs_rmap_intent *rmap; rmap = container_of(item, struct xfs_rmap_intent, ri_list); - kmem_free(rmap); + kmem_cache_free(xfs_rmap_intent_cache, rmap); } const struct xfs_defer_op_type xfs_rmap_update_defer_type = { @@ -587,7 +587,7 @@ xfs_rui_item_recover( } xfs_rmap_finish_one_cleanup(tp, rcur, error); - return xfs_defer_ops_capture_and_commit(tp, NULL, capture_list); + return xfs_defer_ops_capture_and_commit(tp, capture_list); abort_error: xfs_rmap_finish_one_cleanup(tp, rcur, error); diff --git a/fs/xfs/xfs_rmap_item.h b/fs/xfs/xfs_rmap_item.h index 31e6cdfff71f..802e5119eaca 100644 --- a/fs/xfs/xfs_rmap_item.h +++ b/fs/xfs/xfs_rmap_item.h @@ -28,7 +28,7 @@ /* kernel only RUI/RUD definitions */ struct xfs_mount; -struct kmem_zone; +struct kmem_cache; /* * Max number of extents in fast allocation path. @@ -68,7 +68,7 @@ struct xfs_rud_log_item { struct xfs_rud_log_format rud_format; }; -extern struct kmem_zone *xfs_rui_zone; -extern struct kmem_zone *xfs_rud_zone; +extern struct kmem_cache *xfs_rui_cache; +extern struct kmem_cache *xfs_rud_cache; #endif /* __XFS_RMAP_ITEM_H__ */ diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index c4e0cd1c1c8c..e21459f9923a 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -37,6 +37,7 @@ #include "xfs_reflink.h" #include "xfs_pwork.h" #include "xfs_ag.h" +#include "xfs_defer.h" #include <linux/magic.h> #include <linux/fs_context.h> @@ -1951,196 +1952,194 @@ static struct file_system_type xfs_fs_type = { MODULE_ALIAS_FS("xfs"); STATIC int __init -xfs_init_zones(void) +xfs_init_caches(void) { - xfs_log_ticket_zone = kmem_cache_create("xfs_log_ticket", + int error; + + xfs_log_ticket_cache = kmem_cache_create("xfs_log_ticket", sizeof(struct xlog_ticket), 0, 0, NULL); - if (!xfs_log_ticket_zone) + if (!xfs_log_ticket_cache) goto out; - xfs_bmap_free_item_zone = kmem_cache_create("xfs_bmap_free_item", - sizeof(struct xfs_extent_free_item), - 0, 0, NULL); - if (!xfs_bmap_free_item_zone) - goto out_destroy_log_ticket_zone; + error = xfs_btree_init_cur_caches(); + if (error) + goto out_destroy_log_ticket_cache; - xfs_btree_cur_zone = kmem_cache_create("xfs_btree_cur", - sizeof(struct xfs_btree_cur), - 0, 0, NULL); - if (!xfs_btree_cur_zone) - goto out_destroy_bmap_free_item_zone; + error = xfs_defer_init_item_caches(); + if (error) + goto out_destroy_btree_cur_cache; - xfs_da_state_zone = kmem_cache_create("xfs_da_state", + xfs_da_state_cache = kmem_cache_create("xfs_da_state", sizeof(struct xfs_da_state), 0, 0, NULL); - if (!xfs_da_state_zone) - goto out_destroy_btree_cur_zone; + if (!xfs_da_state_cache) + goto out_destroy_defer_item_cache; - xfs_ifork_zone = kmem_cache_create("xfs_ifork", + xfs_ifork_cache = kmem_cache_create("xfs_ifork", sizeof(struct xfs_ifork), 0, 0, NULL); - if (!xfs_ifork_zone) - goto out_destroy_da_state_zone; + if (!xfs_ifork_cache) + goto out_destroy_da_state_cache; - xfs_trans_zone = kmem_cache_create("xfs_trans", + xfs_trans_cache = kmem_cache_create("xfs_trans", sizeof(struct xfs_trans), 0, 0, NULL); - if (!xfs_trans_zone) - goto out_destroy_ifork_zone; + if (!xfs_trans_cache) + goto out_destroy_ifork_cache; /* - * The size of the zone allocated buf log item is the maximum + * The size of the cache-allocated buf log item is the maximum * size possible under XFS. This wastes a little bit of memory, * but it is much faster. */ - xfs_buf_item_zone = kmem_cache_create("xfs_buf_item", + xfs_buf_item_cache = kmem_cache_create("xfs_buf_item", sizeof(struct xfs_buf_log_item), 0, 0, NULL); - if (!xfs_buf_item_zone) - goto out_destroy_trans_zone; + if (!xfs_buf_item_cache) + goto out_destroy_trans_cache; - xfs_efd_zone = kmem_cache_create("xfs_efd_item", + xfs_efd_cache = kmem_cache_create("xfs_efd_item", (sizeof(struct xfs_efd_log_item) + (XFS_EFD_MAX_FAST_EXTENTS - 1) * sizeof(struct xfs_extent)), 0, 0, NULL); - if (!xfs_efd_zone) - goto out_destroy_buf_item_zone; + if (!xfs_efd_cache) + goto out_destroy_buf_item_cache; - xfs_efi_zone = kmem_cache_create("xfs_efi_item", + xfs_efi_cache = kmem_cache_create("xfs_efi_item", (sizeof(struct xfs_efi_log_item) + (XFS_EFI_MAX_FAST_EXTENTS - 1) * sizeof(struct xfs_extent)), 0, 0, NULL); - if (!xfs_efi_zone) - goto out_destroy_efd_zone; + if (!xfs_efi_cache) + goto out_destroy_efd_cache; - xfs_inode_zone = kmem_cache_create("xfs_inode", + xfs_inode_cache = kmem_cache_create("xfs_inode", sizeof(struct xfs_inode), 0, (SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | SLAB_ACCOUNT), xfs_fs_inode_init_once); - if (!xfs_inode_zone) - goto out_destroy_efi_zone; + if (!xfs_inode_cache) + goto out_destroy_efi_cache; - xfs_ili_zone = kmem_cache_create("xfs_ili", + xfs_ili_cache = kmem_cache_create("xfs_ili", sizeof(struct xfs_inode_log_item), 0, SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); - if (!xfs_ili_zone) - goto out_destroy_inode_zone; + if (!xfs_ili_cache) + goto out_destroy_inode_cache; - xfs_icreate_zone = kmem_cache_create("xfs_icr", + xfs_icreate_cache = kmem_cache_create("xfs_icr", sizeof(struct xfs_icreate_item), 0, 0, NULL); - if (!xfs_icreate_zone) - goto out_destroy_ili_zone; + if (!xfs_icreate_cache) + goto out_destroy_ili_cache; - xfs_rud_zone = kmem_cache_create("xfs_rud_item", + xfs_rud_cache = kmem_cache_create("xfs_rud_item", sizeof(struct xfs_rud_log_item), 0, 0, NULL); - if (!xfs_rud_zone) - goto out_destroy_icreate_zone; + if (!xfs_rud_cache) + goto out_destroy_icreate_cache; - xfs_rui_zone = kmem_cache_create("xfs_rui_item", + xfs_rui_cache = kmem_cache_create("xfs_rui_item", xfs_rui_log_item_sizeof(XFS_RUI_MAX_FAST_EXTENTS), 0, 0, NULL); - if (!xfs_rui_zone) - goto out_destroy_rud_zone; + if (!xfs_rui_cache) + goto out_destroy_rud_cache; - xfs_cud_zone = kmem_cache_create("xfs_cud_item", + xfs_cud_cache = kmem_cache_create("xfs_cud_item", sizeof(struct xfs_cud_log_item), 0, 0, NULL); - if (!xfs_cud_zone) - goto out_destroy_rui_zone; + if (!xfs_cud_cache) + goto out_destroy_rui_cache; - xfs_cui_zone = kmem_cache_create("xfs_cui_item", + xfs_cui_cache = kmem_cache_create("xfs_cui_item", xfs_cui_log_item_sizeof(XFS_CUI_MAX_FAST_EXTENTS), 0, 0, NULL); - if (!xfs_cui_zone) - goto out_destroy_cud_zone; + if (!xfs_cui_cache) + goto out_destroy_cud_cache; - xfs_bud_zone = kmem_cache_create("xfs_bud_item", + xfs_bud_cache = kmem_cache_create("xfs_bud_item", sizeof(struct xfs_bud_log_item), 0, 0, NULL); - if (!xfs_bud_zone) - goto out_destroy_cui_zone; + if (!xfs_bud_cache) + goto out_destroy_cui_cache; - xfs_bui_zone = kmem_cache_create("xfs_bui_item", + xfs_bui_cache = kmem_cache_create("xfs_bui_item", xfs_bui_log_item_sizeof(XFS_BUI_MAX_FAST_EXTENTS), 0, 0, NULL); - if (!xfs_bui_zone) - goto out_destroy_bud_zone; + if (!xfs_bui_cache) + goto out_destroy_bud_cache; return 0; - out_destroy_bud_zone: - kmem_cache_destroy(xfs_bud_zone); - out_destroy_cui_zone: - kmem_cache_destroy(xfs_cui_zone); - out_destroy_cud_zone: - kmem_cache_destroy(xfs_cud_zone); - out_destroy_rui_zone: - kmem_cache_destroy(xfs_rui_zone); - out_destroy_rud_zone: - kmem_cache_destroy(xfs_rud_zone); - out_destroy_icreate_zone: - kmem_cache_destroy(xfs_icreate_zone); - out_destroy_ili_zone: - kmem_cache_destroy(xfs_ili_zone); - out_destroy_inode_zone: - kmem_cache_destroy(xfs_inode_zone); - out_destroy_efi_zone: - kmem_cache_destroy(xfs_efi_zone); - out_destroy_efd_zone: - kmem_cache_destroy(xfs_efd_zone); - out_destroy_buf_item_zone: - kmem_cache_destroy(xfs_buf_item_zone); - out_destroy_trans_zone: - kmem_cache_destroy(xfs_trans_zone); - out_destroy_ifork_zone: - kmem_cache_destroy(xfs_ifork_zone); - out_destroy_da_state_zone: - kmem_cache_destroy(xfs_da_state_zone); - out_destroy_btree_cur_zone: - kmem_cache_destroy(xfs_btree_cur_zone); - out_destroy_bmap_free_item_zone: - kmem_cache_destroy(xfs_bmap_free_item_zone); - out_destroy_log_ticket_zone: - kmem_cache_destroy(xfs_log_ticket_zone); + out_destroy_bud_cache: + kmem_cache_destroy(xfs_bud_cache); + out_destroy_cui_cache: + kmem_cache_destroy(xfs_cui_cache); + out_destroy_cud_cache: + kmem_cache_destroy(xfs_cud_cache); + out_destroy_rui_cache: + kmem_cache_destroy(xfs_rui_cache); + out_destroy_rud_cache: + kmem_cache_destroy(xfs_rud_cache); + out_destroy_icreate_cache: + kmem_cache_destroy(xfs_icreate_cache); + out_destroy_ili_cache: + kmem_cache_destroy(xfs_ili_cache); + out_destroy_inode_cache: + kmem_cache_destroy(xfs_inode_cache); + out_destroy_efi_cache: + kmem_cache_destroy(xfs_efi_cache); + out_destroy_efd_cache: + kmem_cache_destroy(xfs_efd_cache); + out_destroy_buf_item_cache: + kmem_cache_destroy(xfs_buf_item_cache); + out_destroy_trans_cache: + kmem_cache_destroy(xfs_trans_cache); + out_destroy_ifork_cache: + kmem_cache_destroy(xfs_ifork_cache); + out_destroy_da_state_cache: + kmem_cache_destroy(xfs_da_state_cache); + out_destroy_defer_item_cache: + xfs_defer_destroy_item_caches(); + out_destroy_btree_cur_cache: + xfs_btree_destroy_cur_caches(); + out_destroy_log_ticket_cache: + kmem_cache_destroy(xfs_log_ticket_cache); out: return -ENOMEM; } STATIC void -xfs_destroy_zones(void) +xfs_destroy_caches(void) { /* * Make sure all delayed rcu free are flushed before we * destroy caches. */ rcu_barrier(); - kmem_cache_destroy(xfs_bui_zone); - kmem_cache_destroy(xfs_bud_zone); - kmem_cache_destroy(xfs_cui_zone); - kmem_cache_destroy(xfs_cud_zone); - kmem_cache_destroy(xfs_rui_zone); - kmem_cache_destroy(xfs_rud_zone); - kmem_cache_destroy(xfs_icreate_zone); - kmem_cache_destroy(xfs_ili_zone); - kmem_cache_destroy(xfs_inode_zone); - kmem_cache_destroy(xfs_efi_zone); - kmem_cache_destroy(xfs_efd_zone); - kmem_cache_destroy(xfs_buf_item_zone); - kmem_cache_destroy(xfs_trans_zone); - kmem_cache_destroy(xfs_ifork_zone); - kmem_cache_destroy(xfs_da_state_zone); - kmem_cache_destroy(xfs_btree_cur_zone); - kmem_cache_destroy(xfs_bmap_free_item_zone); - kmem_cache_destroy(xfs_log_ticket_zone); + kmem_cache_destroy(xfs_bui_cache); + kmem_cache_destroy(xfs_bud_cache); + kmem_cache_destroy(xfs_cui_cache); + kmem_cache_destroy(xfs_cud_cache); + kmem_cache_destroy(xfs_rui_cache); + kmem_cache_destroy(xfs_rud_cache); + kmem_cache_destroy(xfs_icreate_cache); + kmem_cache_destroy(xfs_ili_cache); + kmem_cache_destroy(xfs_inode_cache); + kmem_cache_destroy(xfs_efi_cache); + kmem_cache_destroy(xfs_efd_cache); + kmem_cache_destroy(xfs_buf_item_cache); + kmem_cache_destroy(xfs_trans_cache); + kmem_cache_destroy(xfs_ifork_cache); + kmem_cache_destroy(xfs_da_state_cache); + xfs_defer_destroy_item_caches(); + xfs_btree_destroy_cur_caches(); + kmem_cache_destroy(xfs_log_ticket_cache); } STATIC int __init @@ -2233,13 +2232,13 @@ init_xfs_fs(void) if (error) goto out; - error = xfs_init_zones(); + error = xfs_init_caches(); if (error) goto out_destroy_hp; error = xfs_init_workqueues(); if (error) - goto out_destroy_zones; + goto out_destroy_caches; error = xfs_mru_cache_init(); if (error) @@ -2314,8 +2313,8 @@ init_xfs_fs(void) xfs_mru_cache_uninit(); out_destroy_wq: xfs_destroy_workqueues(); - out_destroy_zones: - xfs_destroy_zones(); + out_destroy_caches: + xfs_destroy_caches(); out_destroy_hp: xfs_cpu_hotplug_destroy(); out: @@ -2338,7 +2337,7 @@ exit_xfs_fs(void) xfs_buf_terminate(); xfs_mru_cache_uninit(); xfs_destroy_workqueues(); - xfs_destroy_zones(); + xfs_destroy_caches(); xfs_uuid_table_free(); xfs_cpu_hotplug_destroy(); } diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c index 18dc5eca6c04..8608f804388f 100644 --- a/fs/xfs/xfs_sysfs.c +++ b/fs/xfs/xfs_sysfs.c @@ -105,7 +105,7 @@ bug_on_assert_show( struct kobject *kobject, char *buf) { - return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.bug_on_assert ? 1 : 0); + return sysfs_emit(buf, "%d\n", xfs_globals.bug_on_assert); } XFS_SYSFS_ATTR_RW(bug_on_assert); @@ -135,7 +135,7 @@ log_recovery_delay_show( struct kobject *kobject, char *buf) { - return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.log_recovery_delay); + return sysfs_emit(buf, "%d\n", xfs_globals.log_recovery_delay); } XFS_SYSFS_ATTR_RW(log_recovery_delay); @@ -165,7 +165,7 @@ mount_delay_show( struct kobject *kobject, char *buf) { - return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.mount_delay); + return sysfs_emit(buf, "%d\n", xfs_globals.mount_delay); } XFS_SYSFS_ATTR_RW(mount_delay); @@ -188,7 +188,7 @@ always_cow_show( struct kobject *kobject, char *buf) { - return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.always_cow); + return sysfs_emit(buf, "%d\n", xfs_globals.always_cow); } XFS_SYSFS_ATTR_RW(always_cow); @@ -224,7 +224,7 @@ pwork_threads_show( struct kobject *kobject, char *buf) { - return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.pwork_threads); + return sysfs_emit(buf, "%d\n", xfs_globals.pwork_threads); } XFS_SYSFS_ATTR_RW(pwork_threads); #endif /* DEBUG */ @@ -327,7 +327,7 @@ log_head_lsn_show( block = log->l_curr_block; spin_unlock(&log->l_icloglock); - return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, block); + return sysfs_emit(buf, "%d:%d\n", cycle, block); } XFS_SYSFS_ATTR_RO(log_head_lsn); @@ -341,7 +341,7 @@ log_tail_lsn_show( struct xlog *log = to_xlog(kobject); xlog_crack_atomic_lsn(&log->l_tail_lsn, &cycle, &block); - return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, block); + return sysfs_emit(buf, "%d:%d\n", cycle, block); } XFS_SYSFS_ATTR_RO(log_tail_lsn); @@ -356,7 +356,7 @@ reserve_grant_head_show( struct xlog *log = to_xlog(kobject); xlog_crack_grant_head(&log->l_reserve_head.grant, &cycle, &bytes); - return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, bytes); + return sysfs_emit(buf, "%d:%d\n", cycle, bytes); } XFS_SYSFS_ATTR_RO(reserve_grant_head); @@ -370,7 +370,7 @@ write_grant_head_show( struct xlog *log = to_xlog(kobject); xlog_crack_grant_head(&log->l_write_head.grant, &cycle, &bytes); - return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, bytes); + return sysfs_emit(buf, "%d:%d\n", cycle, bytes); } XFS_SYSFS_ATTR_RO(write_grant_head); @@ -425,7 +425,7 @@ max_retries_show( else retries = cfg->max_retries; - return snprintf(buf, PAGE_SIZE, "%d\n", retries); + return sysfs_emit(buf, "%d\n", retries); } static ssize_t @@ -466,7 +466,7 @@ retry_timeout_seconds_show( else timeout = jiffies_to_msecs(cfg->retry_timeout) / MSEC_PER_SEC; - return snprintf(buf, PAGE_SIZE, "%d\n", timeout); + return sysfs_emit(buf, "%d\n", timeout); } static ssize_t @@ -504,7 +504,7 @@ fail_at_unmount_show( { struct xfs_mount *mp = err_to_mp(kobject); - return snprintf(buf, PAGE_SIZE, "%d\n", mp->m_fail_unmount); + return sysfs_emit(buf, "%d\n", mp->m_fail_unmount); } static ssize_t diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 1033a95fbf8e..4a8076ef8cb4 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -2476,7 +2476,7 @@ DECLARE_EVENT_CLASS(xfs_btree_cur_class, __entry->btnum = cur->bc_btnum; __entry->level = level; __entry->nlevels = cur->bc_nlevels; - __entry->ptr = cur->bc_ptrs[level]; + __entry->ptr = cur->bc_levels[level].ptr; __entry->daddr = bp ? xfs_buf_daddr(bp) : -1; ), TP_printk("dev %d:%d btree %s level %d/%d ptr %d daddr 0x%llx", diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 67dec11e34c7..234a9d9c2f43 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -25,7 +25,7 @@ #include "xfs_dquot.h" #include "xfs_icache.h" -kmem_zone_t *xfs_trans_zone; +struct kmem_cache *xfs_trans_cache; #if defined(CONFIG_TRACEPOINTS) static void @@ -76,7 +76,7 @@ xfs_trans_free( if (!(tp->t_flags & XFS_TRANS_NO_WRITECOUNT)) sb_end_intwrite(tp->t_mountp->m_super); xfs_trans_free_dqinfo(tp); - kmem_cache_free(xfs_trans_zone, tp); + kmem_cache_free(xfs_trans_cache, tp); } /* @@ -95,7 +95,7 @@ xfs_trans_dup( trace_xfs_trans_dup(tp, _RET_IP_); - ntp = kmem_cache_zalloc(xfs_trans_zone, GFP_KERNEL | __GFP_NOFAIL); + ntp = kmem_cache_zalloc(xfs_trans_cache, GFP_KERNEL | __GFP_NOFAIL); /* * Initialize the new transaction structure. @@ -263,7 +263,7 @@ xfs_trans_alloc( * by doing GFP_KERNEL allocations inside sb_start_intwrite(). */ retry: - tp = kmem_cache_zalloc(xfs_trans_zone, GFP_KERNEL | __GFP_NOFAIL); + tp = kmem_cache_zalloc(xfs_trans_cache, GFP_KERNEL | __GFP_NOFAIL); if (!(flags & XFS_TRANS_NO_WRITECOUNT)) sb_start_intwrite(mp->m_super); xfs_trans_set_context(tp); @@ -477,7 +477,7 @@ STATIC void xfs_trans_apply_sb_deltas( xfs_trans_t *tp) { - xfs_dsb_t *sbp; + struct xfs_dsb *sbp; struct xfs_buf *bp; int whole = 0; @@ -541,14 +541,14 @@ xfs_trans_apply_sb_deltas( /* * Log the whole thing, the fields are noncontiguous. */ - xfs_trans_log_buf(tp, bp, 0, sizeof(xfs_dsb_t) - 1); + xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsb) - 1); else /* * Since all the modifiable fields are contiguous, we * can get away with this. */ - xfs_trans_log_buf(tp, bp, offsetof(xfs_dsb_t, sb_icount), - offsetof(xfs_dsb_t, sb_frextents) + + xfs_trans_log_buf(tp, bp, offsetof(struct xfs_dsb, sb_icount), + offsetof(struct xfs_dsb, sb_frextents) + sizeof(sbp->sb_frextents) - 1); } diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 50da47f23a07..a487b264a9eb 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -113,12 +113,6 @@ void xfs_log_item_init(struct xfs_mount *mp, struct xfs_log_item *item, #define XFS_ITEM_FLUSHING 3 /* - * Deferred operation item relogging limits. - */ -#define XFS_DEFER_OPS_NR_INODES 2 /* join up to two inodes */ -#define XFS_DEFER_OPS_NR_BUFS 2 /* join up to two buffers */ - -/* * This is the structure maintained for every active transaction. */ typedef struct xfs_trans { @@ -243,7 +237,7 @@ void xfs_trans_buf_set_type(struct xfs_trans *, struct xfs_buf *, void xfs_trans_buf_copy_type(struct xfs_buf *dst_bp, struct xfs_buf *src_bp); -extern kmem_zone_t *xfs_trans_zone; +extern struct kmem_cache *xfs_trans_cache; static inline struct xfs_log_item * xfs_trans_item_relog( diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index 3872ce671411..9ba7e6b9bed3 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c @@ -846,7 +846,7 @@ STATIC void xfs_trans_alloc_dqinfo( xfs_trans_t *tp) { - tp->t_dqinfo = kmem_cache_zalloc(xfs_qm_dqtrxzone, + tp->t_dqinfo = kmem_cache_zalloc(xfs_dqtrx_cache, GFP_KERNEL | __GFP_NOFAIL); } @@ -856,6 +856,6 @@ xfs_trans_free_dqinfo( { if (!tp->t_dqinfo) return; - kmem_cache_free(xfs_qm_dqtrxzone, tp->t_dqinfo); + kmem_cache_free(xfs_dqtrx_cache, tp->t_dqinfo); tp->t_dqinfo = NULL; } |