diff options
Diffstat (limited to 'fs/nfs')
68 files changed, 3814 insertions, 1572 deletions
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index f7e32d76e34d..07932ce9246c 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -2,8 +2,10 @@ config NFS_FS tristate "NFS client support" depends on INET && FILE_LOCKING && MULTIUSER + select CRC32 select LOCKD select SUNRPC + select NFS_COMMON select NFS_ACL_SUPPORT if NFS_V3_ACL help Choose Y here if you want to access files residing on other @@ -33,12 +35,12 @@ config NFS_FS config NFS_V2 tristate "NFS client support for NFS version 2" depends on NFS_FS - default y + default n help This option enables support for version 2 of the NFS protocol (RFC 1094) in the kernel's NFS client. - If unsure, say Y. + If unsure, say N. config NFS_V3 tristate "NFS client support for NFS version 3" @@ -169,7 +171,8 @@ config ROOT_NFS config NFS_FSCACHE bool "Provide NFS client caching support" - depends on NFS_FS=m && NETFS_SUPPORT || NFS_FS=y && NETFS_SUPPORT=y + depends on NFS_FS + select NETFS_SUPPORT select FSCACHE help Say Y here if you want NFS data to be cached locally on disc through @@ -194,7 +197,6 @@ config NFS_USE_KERNEL_DNS config NFS_DEBUG bool depends on NFS_FS && SUNRPC_DEBUG - select CRC32 default y config NFS_DISABLE_UDP_SUPPORT diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 5f6db37f461e..9fb2f2cac87e 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -13,6 +13,7 @@ nfs-y := client.o dir.o file.o getroot.o inode.o super.o \ nfs-$(CONFIG_ROOT_NFS) += nfsroot.o nfs-$(CONFIG_SYSCTL) += sysctl.o nfs-$(CONFIG_NFS_FSCACHE) += fscache.o +nfs-$(CONFIG_NFS_LOCALIO) += localio.o obj-$(CONFIG_NFS_V2) += nfsv2.o nfsv2-y := nfs2super.o proc.o nfs2xdr.o diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 6be13e0ec170..47189476b553 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -564,25 +564,45 @@ bl_find_get_deviceid(struct nfs_server *server, gfp_t gfp_mask) { struct nfs4_deviceid_node *node; - unsigned long start, end; + int err = -ENODEV; retry: node = nfs4_find_get_deviceid(server, id, cred, gfp_mask); if (!node) return ERR_PTR(-ENODEV); - if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags) == 0) - return node; + /* + * Devices that are marked unavailable are left in the cache with a + * timeout to avoid sending GETDEVINFO after every LAYOUTGET, or + * constantly attempting to register the device. Once marked as + * unavailable they must be deleted and never reused. + */ + if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags)) { + unsigned long end = jiffies; + unsigned long start = end - PNFS_DEVICE_RETRY_TIMEOUT; + + if (!time_in_range(node->timestamp_unavailable, start, end)) { + /* Uncork subsequent GETDEVINFO operations for this device */ + nfs4_delete_deviceid(node->ld, node->nfs_client, id); + goto retry; + } + goto out_put; + } - end = jiffies; - start = end - PNFS_DEVICE_RETRY_TIMEOUT; - if (!time_in_range(node->timestamp_unavailable, start, end)) { - nfs4_delete_deviceid(node->ld, node->nfs_client, id); - goto retry; + if (!bl_register_dev(container_of(node, struct pnfs_block_dev, node))) { + /* + * If we cannot register, treat this device as transient: + * Make a negative cache entry for the device + */ + nfs4_mark_deviceid_unavailable(node); + goto out_put; } + return node; + +out_put: nfs4_put_deviceid_node(node); - return ERR_PTR(-ENODEV); + return ERR_PTR(err); } static int diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h index f1eeb4914199..6da40ca19570 100644 --- a/fs/nfs/blocklayout/blocklayout.h +++ b/fs/nfs/blocklayout/blocklayout.h @@ -104,20 +104,26 @@ struct pnfs_block_dev { u64 start; u64 len; + enum pnfs_block_volume_type type; u32 nr_children; struct pnfs_block_dev *children; u64 chunk_size; struct file *bdev_file; u64 disk_offset; + unsigned long flags; u64 pr_key; - bool pr_registered; bool (*map)(struct pnfs_block_dev *dev, u64 offset, struct pnfs_block_dev_map *map); }; +/* pnfs_block_dev flag bits */ +enum { + PNFS_BDEV_REGISTERED = 0, +}; + /* sector_t fields are all in 512-byte sectors */ struct pnfs_block_extent { union { @@ -172,6 +178,7 @@ struct bl_msg_hdr { #define BL_DEVICE_REQUEST_ERR 0x2 /* User level process fails */ /* dev.c */ +bool bl_register_dev(struct pnfs_block_dev *d); struct nfs4_deviceid_node *bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, gfp_t gfp_mask); void bl_free_deviceid_node(struct nfs4_deviceid_node *d); diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c index 93ef7f864980..cab8809f0e0f 100644 --- a/fs/nfs/blocklayout/dev.c +++ b/fs/nfs/blocklayout/dev.c @@ -10,12 +10,81 @@ #include <linux/pr.h> #include "blocklayout.h" +#include "../nfs4trace.h" #define NFSDBG_FACILITY NFSDBG_PNFS_LD +static void bl_unregister_scsi(struct pnfs_block_dev *dev) +{ + struct block_device *bdev = file_bdev(dev->bdev_file); + const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; + int status; + + status = ops->pr_register(bdev, dev->pr_key, 0, false); + if (status) + trace_bl_pr_key_unreg_err(bdev, dev->pr_key, status); + else + trace_bl_pr_key_unreg(bdev, dev->pr_key); +} + +static bool bl_register_scsi(struct pnfs_block_dev *dev) +{ + struct block_device *bdev = file_bdev(dev->bdev_file); + const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; + int status; + + if (test_and_set_bit(PNFS_BDEV_REGISTERED, &dev->flags)) + return true; + + status = ops->pr_register(bdev, 0, dev->pr_key, true); + if (status) { + trace_bl_pr_key_reg_err(bdev, dev->pr_key, status); + return false; + } + trace_bl_pr_key_reg(bdev, dev->pr_key); + return true; +} + +static void bl_unregister_dev(struct pnfs_block_dev *dev) +{ + u32 i; + + if (dev->nr_children) { + for (i = 0; i < dev->nr_children; i++) + bl_unregister_dev(&dev->children[i]); + return; + } + + if (dev->type == PNFS_BLOCK_VOLUME_SCSI && + test_and_clear_bit(PNFS_BDEV_REGISTERED, &dev->flags)) + bl_unregister_scsi(dev); +} + +bool bl_register_dev(struct pnfs_block_dev *dev) +{ + u32 i; + + if (dev->nr_children) { + for (i = 0; i < dev->nr_children; i++) { + if (!bl_register_dev(&dev->children[i])) { + while (i > 0) + bl_unregister_dev(&dev->children[--i]); + return false; + } + } + return true; + } + + if (dev->type == PNFS_BLOCK_VOLUME_SCSI) + return bl_register_scsi(dev); + return true; +} + static void bl_free_device(struct pnfs_block_dev *dev) { + bl_unregister_dev(dev); + if (dev->nr_children) { int i; @@ -23,17 +92,6 @@ bl_free_device(struct pnfs_block_dev *dev) bl_free_device(&dev->children[i]); kfree(dev->children); } else { - if (dev->pr_registered) { - const struct pr_ops *ops = - file_bdev(dev->bdev_file)->bd_disk->fops->pr_ops; - int error; - - error = ops->pr_register(file_bdev(dev->bdev_file), - dev->pr_key, 0, false); - if (error) - pr_err("failed to unregister PR key.\n"); - } - if (dev->bdev_file) fput(dev->bdev_file); } @@ -314,7 +372,7 @@ bl_open_path(struct pnfs_block_volume *v, const char *prefix) bdev_file = bdev_file_open_by_path(devname, BLK_OPEN_READ | BLK_OPEN_WRITE, NULL, NULL); if (IS_ERR(bdev_file)) { - pr_warn("pNFS: failed to open device %s (%ld)\n", + dprintk("failed to open device %s (%ld)\n", devname, PTR_ERR(bdev_file)); } @@ -327,8 +385,9 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d, struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) { struct pnfs_block_volume *v = &volumes[idx]; - struct file *bdev_file; + struct block_device *bdev; const struct pr_ops *ops; + struct file *bdev_file; int error; if (!bl_validate_designator(v)) @@ -344,35 +403,30 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d, if (IS_ERR(bdev_file)) bdev_file = bl_open_path(v, "wwn-0x"); if (IS_ERR(bdev_file)) + bdev_file = bl_open_path(v, "nvme-eui."); + if (IS_ERR(bdev_file)) { + pr_warn("pNFS: no device found for volume %*phN\n", + v->scsi.designator_len, v->scsi.designator); return PTR_ERR(bdev_file); + } d->bdev_file = bdev_file; + bdev = file_bdev(bdev_file); - d->len = bdev_nr_bytes(file_bdev(d->bdev_file)); + d->len = bdev_nr_bytes(bdev); d->map = bl_map_simple; d->pr_key = v->scsi.pr_key; if (d->len == 0) return -ENODEV; - pr_info("pNFS: using block device %s (reservation key 0x%llx)\n", - file_bdev(d->bdev_file)->bd_disk->disk_name, d->pr_key); - - ops = file_bdev(d->bdev_file)->bd_disk->fops->pr_ops; + ops = bdev->bd_disk->fops->pr_ops; if (!ops) { pr_err("pNFS: block device %s does not support reservations.", - file_bdev(d->bdev_file)->bd_disk->disk_name); + bdev->bd_disk->disk_name); error = -EINVAL; goto out_blkdev_put; } - error = ops->pr_register(file_bdev(d->bdev_file), 0, d->pr_key, true); - if (error) { - pr_err("pNFS: failed to register key for block device %s.", - file_bdev(d->bdev_file)->bd_disk->disk_name); - goto out_blkdev_put; - } - - d->pr_registered = true; return 0; out_blkdev_put: @@ -458,7 +512,9 @@ static int bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d, struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) { - switch (volumes[idx].type) { + d->type = volumes[idx].type; + + switch (d->type) { case PNFS_BLOCK_VOLUME_SIMPLE: return bl_parse_simple(server, d, volumes, idx, gfp_mask); case PNFS_BLOCK_VOLUME_SLICE: @@ -470,7 +526,7 @@ bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d, case PNFS_BLOCK_VOLUME_SCSI: return bl_parse_scsi(server, d, volumes, idx, gfp_mask); default: - dprintk("unsupported volume type: %d\n", volumes[idx].type); + dprintk("unsupported volume type: %d\n", d->type); return -EIO; } } diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 8adfcd4c8c1a..86bdc7d23fb9 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -76,6 +76,8 @@ nfs4_callback_svc(void *vrqstp) { struct svc_rqst *rqstp = vrqstp; + svc_thread_init_status(rqstp, 0); + set_freezable(); while (!svc_thread_should_stop(rqstp)) @@ -209,10 +211,6 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion) return ERR_PTR(-ENOMEM); } cb_info->serv = serv; - /* As there is only one thread we need to over-ride the - * default maximum of 80 connections - */ - serv->sv_maxconn = 1024; dprintk("nfs_callback_create_svc: service created\n"); return serv; } diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index 650758ee0d5f..154a6ed1299f 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -46,14 +46,15 @@ struct cb_compound_hdr_res { struct cb_getattrargs { struct nfs_fh fh; - uint32_t bitmap[2]; + uint32_t bitmap[3]; }; struct cb_getattrres { __be32 status; - uint32_t bitmap[2]; + uint32_t bitmap[3]; uint64_t size; uint64_t change_attr; + struct timespec64 atime; struct timespec64 ctime; struct timespec64 mtime; }; diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 76cea34477ae..8397c43358bd 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -37,7 +37,7 @@ __be32 nfs4_callback_getattr(void *argp, void *resp, if (!cps->clp) /* Always set for v4.0. Set in cb_sequence for v4.1 */ goto out; - res->bitmap[0] = res->bitmap[1] = 0; + memset(res->bitmap, 0, sizeof(res->bitmap)); res->status = htonl(NFS4ERR_BADHANDLE); dprintk_rcu("NFS: GETATTR callback request from %s\n", @@ -59,12 +59,16 @@ __be32 nfs4_callback_getattr(void *argp, void *resp, res->change_attr = delegation->change_attr; if (nfs_have_writebacks(inode)) res->change_attr++; + res->atime = inode_get_atime(inode); res->ctime = inode_get_ctime(inode); res->mtime = inode_get_mtime(inode); - res->bitmap[0] = (FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE) & - args->bitmap[0]; - res->bitmap[1] = (FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY) & - args->bitmap[1]; + res->bitmap[0] = (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE) & + args->bitmap[0]; + res->bitmap[1] = (FATTR4_WORD1_TIME_ACCESS | + FATTR4_WORD1_TIME_METADATA | + FATTR4_WORD1_TIME_MODIFY) & args->bitmap[1]; + res->bitmap[2] = (FATTR4_WORD2_TIME_DELEG_ACCESS | + FATTR4_WORD2_TIME_DELEG_MODIFY) & args->bitmap[2]; res->status = 0; out_iput: rcu_read_unlock(); @@ -319,9 +323,10 @@ static u32 initiate_bulk_draining(struct nfs_client *clp, int stat; if (args->cbl_recall_type == RETURN_FSID) - stat = pnfs_destroy_layouts_byfsid(clp, &args->cbl_fsid, true); + stat = pnfs_layout_destroy_byfsid(clp, &args->cbl_fsid, + PNFS_LAYOUT_BULK_RETURN); else - stat = pnfs_destroy_layouts_byclid(clp, true); + stat = pnfs_layout_destroy_byclid(clp, PNFS_LAYOUT_BULK_RETURN); if (stat != 0) return NFS4ERR_DELAY; return NFS4ERR_NOMATCHING_LAYOUT; @@ -713,7 +718,7 @@ __be32 nfs4_callback_offload(void *data, void *dummy, copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_KERNEL); if (!copy) - return htonl(NFS4ERR_SERVERFAULT); + return cpu_to_be32(NFS4ERR_DELAY); spin_lock(&cps->clp->cl_lock); rcu_read_lock(); diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 9369488f2ed4..4254ba3ee7c5 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -25,8 +25,9 @@ #define CB_OP_GETATTR_BITMAP_MAXSZ (4 * 4) // bitmap length, 3 bitmaps #define CB_OP_GETATTR_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \ CB_OP_GETATTR_BITMAP_MAXSZ + \ - /* change, size, ctime, mtime */\ - (2 + 2 + 3 + 3) * 4) + /* change, size, atime, ctime, + * mtime, deleg_atime, deleg_mtime */\ + (2 + 2 + 3 + 3 + 3 + 3 + 3) * 4) #define CB_OP_RECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) #if defined(CONFIG_NFS_V4_1) @@ -117,7 +118,9 @@ static __be32 decode_bitmap(struct xdr_stream *xdr, uint32_t *bitmap) if (likely(attrlen > 0)) bitmap[0] = ntohl(*p++); if (attrlen > 1) - bitmap[1] = ntohl(*p); + bitmap[1] = ntohl(*p++); + if (attrlen > 2) + bitmap[2] = ntohl(*p); return 0; } @@ -372,6 +375,8 @@ static __be32 decode_rc_list(struct xdr_stream *xdr, rc_list->rcl_nrefcalls = ntohl(*p++); if (rc_list->rcl_nrefcalls) { + if (unlikely(rc_list->rcl_nrefcalls > xdr->buf->len)) + goto out; p = xdr_inline_decode(xdr, rc_list->rcl_nrefcalls * 2 * sizeof(uint32_t)); if (unlikely(p == NULL)) @@ -445,7 +450,7 @@ static __be32 decode_recallany_args(struct svc_rqst *rqstp, void *argp) { struct cb_recallanyargs *args = argp; - uint32_t bitmap[2]; + uint32_t bitmap[3]; __be32 *p, status; p = xdr_inline_decode(xdr, 4); @@ -635,6 +640,13 @@ static __be32 encode_attr_time(struct xdr_stream *xdr, const struct timespec64 * return 0; } +static __be32 encode_attr_atime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec64 *time) +{ + if (!(bitmap[1] & FATTR4_WORD1_TIME_ACCESS)) + return 0; + return encode_attr_time(xdr,time); +} + static __be32 encode_attr_ctime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec64 *time) { if (!(bitmap[1] & FATTR4_WORD1_TIME_METADATA)) @@ -649,6 +661,24 @@ static __be32 encode_attr_mtime(struct xdr_stream *xdr, const uint32_t *bitmap, return encode_attr_time(xdr,time); } +static __be32 encode_attr_delegatime(struct xdr_stream *xdr, + const uint32_t *bitmap, + const struct timespec64 *time) +{ + if (!(bitmap[2] & FATTR4_WORD2_TIME_DELEG_ACCESS)) + return 0; + return encode_attr_time(xdr,time); +} + +static __be32 encode_attr_delegmtime(struct xdr_stream *xdr, + const uint32_t *bitmap, + const struct timespec64 *time) +{ + if (!(bitmap[2] & FATTR4_WORD2_TIME_DELEG_MODIFY)) + return 0; + return encode_attr_time(xdr,time); +} + static __be32 encode_compound_hdr_res(struct xdr_stream *xdr, struct cb_compound_hdr_res *hdr) { __be32 status; @@ -699,10 +729,19 @@ static __be32 encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr, status = encode_attr_size(xdr, res->bitmap, res->size); if (unlikely(status != 0)) goto out; + status = encode_attr_atime(xdr, res->bitmap, &res->atime); + if (unlikely(status != 0)) + goto out; status = encode_attr_ctime(xdr, res->bitmap, &res->ctime); if (unlikely(status != 0)) goto out; status = encode_attr_mtime(xdr, res->bitmap, &res->mtime); + if (unlikely(status != 0)) + goto out; + status = encode_attr_delegatime(xdr, res->bitmap, &res->atime); + if (unlikely(status != 0)) + goto out; + status = encode_attr_delegmtime(xdr, res->bitmap, &res->mtime); *savep = htonl((unsigned int)((char *)xdr->p - (char *)(savep+1))); out: return status; @@ -945,6 +984,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp) nfs_put_client(cps.clp); goto out_invalidcred; } + svc_xprt_set_valid(rqstp->rq_xprt); } cps.minorversion = hdr_arg.minorversion; diff --git a/fs/nfs/client.c b/fs/nfs/client.c index de77848ae654..6d63b958c4bb 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -38,7 +38,7 @@ #include <linux/sunrpc/bc_xprt.h> #include <linux/nsproxy.h> #include <linux/pid_namespace.h> - +#include <linux/nfslocalio.h> #include "nfs4_fs.h" #include "callback.h" @@ -55,9 +55,13 @@ #define NFSDBG_FACILITY NFSDBG_CLIENT static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq); -static DEFINE_SPINLOCK(nfs_version_lock); -static DEFINE_MUTEX(nfs_version_mutex); -static LIST_HEAD(nfs_versions); +static DEFINE_RWLOCK(nfs_version_lock); + +static struct nfs_subversion *nfs_version_mods[5] = { + [2] = NULL, + [3] = NULL, + [4] = NULL, +}; /* * RPC cruft for NFS @@ -76,38 +80,38 @@ const struct rpc_program nfs_program = { .pipe_dir_name = NFS_PIPE_DIRNAME, }; -static struct nfs_subversion *find_nfs_version(unsigned int version) +static struct nfs_subversion *__find_nfs_version(unsigned int version) { struct nfs_subversion *nfs; - spin_lock(&nfs_version_lock); - list_for_each_entry(nfs, &nfs_versions, list) { - if (nfs->rpc_ops->version == version) { - spin_unlock(&nfs_version_lock); - return nfs; - } - } - - spin_unlock(&nfs_version_lock); - return ERR_PTR(-EPROTONOSUPPORT); + read_lock(&nfs_version_lock); + nfs = nfs_version_mods[version]; + read_unlock(&nfs_version_lock); + return nfs; } -struct nfs_subversion *get_nfs_version(unsigned int version) +struct nfs_subversion *find_nfs_version(unsigned int version) { - struct nfs_subversion *nfs = find_nfs_version(version); + struct nfs_subversion *nfs = __find_nfs_version(version); - if (IS_ERR(nfs)) { - mutex_lock(&nfs_version_mutex); - request_module("nfsv%d", version); - nfs = find_nfs_version(version); - mutex_unlock(&nfs_version_mutex); - } + if (!nfs && request_module("nfsv%d", version) == 0) + nfs = __find_nfs_version(version); + + if (!nfs) + return ERR_PTR(-EPROTONOSUPPORT); - if (!IS_ERR(nfs) && !try_module_get(nfs->owner)) + if (!get_nfs_version(nfs)) return ERR_PTR(-EAGAIN); + return nfs; } +int get_nfs_version(struct nfs_subversion *nfs) +{ + return try_module_get(nfs->owner); +} +EXPORT_SYMBOL_GPL(get_nfs_version); + void put_nfs_version(struct nfs_subversion *nfs) { module_put(nfs->owner); @@ -115,23 +119,23 @@ void put_nfs_version(struct nfs_subversion *nfs) void register_nfs_version(struct nfs_subversion *nfs) { - spin_lock(&nfs_version_lock); + write_lock(&nfs_version_lock); - list_add(&nfs->list, &nfs_versions); + nfs_version_mods[nfs->rpc_ops->version] = nfs; nfs_version[nfs->rpc_ops->version] = nfs->rpc_vers; - spin_unlock(&nfs_version_lock); + write_unlock(&nfs_version_lock); } EXPORT_SYMBOL_GPL(register_nfs_version); void unregister_nfs_version(struct nfs_subversion *nfs) { - spin_lock(&nfs_version_lock); + write_lock(&nfs_version_lock); nfs_version[nfs->rpc_ops->version] = NULL; - list_del(&nfs->list); + nfs_version_mods[nfs->rpc_ops->version] = NULL; - spin_unlock(&nfs_version_lock); + write_unlock(&nfs_version_lock); } EXPORT_SYMBOL_GPL(unregister_nfs_version); @@ -151,7 +155,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) clp->cl_minorversion = cl_init->minorversion; clp->cl_nfs_mod = cl_init->nfs_mod; - if (!try_module_get(clp->cl_nfs_mod->owner)) + if (!get_nfs_version(clp->cl_nfs_mod)) goto error_dealloc; clp->rpc_ops = clp->cl_nfs_mod->rpc_ops; @@ -178,6 +182,13 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) clp->cl_max_connect = cl_init->max_connect ? cl_init->max_connect : 1; clp->cl_net = get_net(cl_init->net); +#if IS_ENABLED(CONFIG_NFS_LOCALIO) + seqlock_init(&clp->cl_boot_lock); + ktime_get_real_ts64(&clp->cl_nfssvc_boot); + nfs_uuid_init(&clp->cl_uuid); + INIT_WORK(&clp->cl_local_probe_work, nfs_local_probe_async_work); +#endif /* CONFIG_NFS_LOCALIO */ + clp->cl_principal = "*"; clp->cl_xprtsec = cl_init->xprtsec; return clp; @@ -233,6 +244,8 @@ static void pnfs_init_server(struct nfs_server *server) */ void nfs_free_client(struct nfs_client *clp) { + nfs_localio_disable_client(clp); + /* -EIO all pending I/O */ if (!IS_ERR(clp->cl_rpcclient)) rpc_shutdown_client(clp->cl_rpcclient); @@ -424,7 +437,10 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init) list_add_tail(&new->cl_share_link, &nn->nfs_client_list); spin_unlock(&nn->nfs_client_lock); - return rpc_ops->init_client(new, cl_init); + new = rpc_ops->init_client(new, cl_init); + if (!IS_ERR(new)) + nfs_local_probe(new); + return new; } spin_unlock(&nn->nfs_client_lock); @@ -530,6 +546,8 @@ int nfs_create_rpc_client(struct nfs_client *clp, args.flags |= RPC_CLNT_CREATE_NOPING; if (test_bit(NFS_CS_REUSEPORT, &clp->cl_flags)) args.flags |= RPC_CLNT_CREATE_REUSEPORT; + if (test_bit(NFS_CS_NETUNREACH_FATAL, &clp->cl_flags)) + args.flags |= RPC_CLNT_CREATE_NETUNREACH_FATAL; if (!IS_ERR(clp->cl_rpcclient)) return 0; @@ -693,6 +711,9 @@ static int nfs_init_server(struct nfs_server *server, if (ctx->flags & NFS_MOUNT_NORESVPORT) set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); + if (ctx->flags & NFS_MOUNT_NETUNREACH_FATAL) + __set_bit(NFS_CS_NETUNREACH_FATAL, &cl_init.init_flags); + /* Allocate or find a client reference we can use */ clp = nfs_get_client(&cl_init); if (IS_ERR(clp)) @@ -983,6 +1004,7 @@ struct nfs_server *nfs_alloc_server(void) INIT_LIST_HEAD(&server->layouts); INIT_LIST_HEAD(&server->state_owners_lru); INIT_LIST_HEAD(&server->ss_copies); + INIT_LIST_HEAD(&server->ss_src_copies); atomic_set(&server->active, 0); @@ -994,8 +1016,11 @@ struct nfs_server *nfs_alloc_server(void) server->change_attr_type = NFS4_CHANGE_TYPE_IS_UNDEFINED; - ida_init(&server->openowner_id); - ida_init(&server->lockowner_id); + init_waitqueue_head(&server->write_congestion_wait); + atomic_long_set(&server->writeback, 0); + + atomic64_set(&server->owner_ctr, 0); + pnfs_init_server(server); rpc_init_wait_queue(&server->uoc_rpcwaitq, "NFS UOC"); @@ -1034,8 +1059,6 @@ void nfs_free_server(struct nfs_server *server) } ida_free(&s_sysfs_ids, server->s_sysfs_id); - ida_destroy(&server->lockowner_id); - ida_destroy(&server->openowner_id); put_cred(server->cred); nfs_release_automount_timer(); call_rcu(&server->rcu, delayed_free); @@ -1082,6 +1105,8 @@ struct nfs_server *nfs_create_server(struct fs_context *fc) if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN) server->namelen = NFS2_MAXNAMLEN; } + /* Linux 'subtree_check' borkenness mandates this setting */ + server->fh_expire_type = NFS_FH_VOL_RENAME; if (!(fattr->valid & NFS_ATTR_FATTR)) { error = ctx->nfs_mod->rpc_ops->getattr(server, ctx->mntfh, @@ -1177,6 +1202,10 @@ void nfs_clients_init(struct net *net) #if IS_ENABLED(CONFIG_NFS_V4) idr_init(&nn->cb_ident_idr); #endif +#if IS_ENABLED(CONFIG_NFS_V4_1) + INIT_LIST_HEAD(&nn->nfs4_data_server_cache); + spin_lock_init(&nn->nfs4_data_server_lock); +#endif spin_lock_init(&nn->nfs_client_lock); nn->boot_time = ktime_get_real(); memset(&nn->rpcstats, 0, sizeof(nn->rpcstats)); @@ -1193,6 +1222,9 @@ void nfs_clients_exit(struct net *net) nfs_cleanup_cb_ident_idr(net); WARN_ON_ONCE(!list_empty(&nn->nfs_client_list)); WARN_ON_ONCE(!list_empty(&nn->nfs_volume_list)); +#if IS_ENABLED(CONFIG_NFS_V4_1) + WARN_ON_ONCE(!list_empty(&nn->nfs4_data_server_cache)); +#endif } #ifdef CONFIG_PROC_FS diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 6bace5fece04..8bdbc4dca89c 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -79,14 +79,14 @@ static void nfs_mark_return_delegation(struct nfs_server *server, struct nfs_delegation *delegation) { set_bit(NFS_DELEGATION_RETURN, &delegation->flags); + set_bit(NFS4SERV_DELEGRETURN, &server->delegation_flags); set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state); } -static bool -nfs4_is_valid_delegation(const struct nfs_delegation *delegation, - fmode_t flags) +static bool nfs4_is_valid_delegation(const struct nfs_delegation *delegation, + fmode_t type) { - if (delegation != NULL && (delegation->type & flags) == flags && + if (delegation != NULL && (delegation->type & type) == type && !test_bit(NFS_DELEGATION_REVOKED, &delegation->flags) && !test_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) return true; @@ -103,19 +103,22 @@ struct nfs_delegation *nfs4_get_valid_delegation(const struct inode *inode) return NULL; } -static int -nfs4_do_check_delegation(struct inode *inode, fmode_t flags, bool mark) +static int nfs4_do_check_delegation(struct inode *inode, fmode_t type, + int flags, bool mark) { struct nfs_delegation *delegation; int ret = 0; - flags &= FMODE_READ|FMODE_WRITE; + type &= FMODE_READ|FMODE_WRITE; rcu_read_lock(); delegation = rcu_dereference(NFS_I(inode)->delegation); - if (nfs4_is_valid_delegation(delegation, flags)) { + if (nfs4_is_valid_delegation(delegation, type)) { if (mark) nfs_mark_delegation_referenced(delegation); ret = 1; + if ((flags & NFS_DELEGATION_FLAG_TIME) && + !test_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags)) + ret = 0; } rcu_read_unlock(); return ret; @@ -124,22 +127,23 @@ nfs4_do_check_delegation(struct inode *inode, fmode_t flags, bool mark) * nfs4_have_delegation - check if inode has a delegation, mark it * NFS_DELEGATION_REFERENCED if there is one. * @inode: inode to check - * @flags: delegation types to check for + * @type: delegation types to check for + * @flags: various modifiers * * Returns one if inode has the indicated delegation, otherwise zero. */ -int nfs4_have_delegation(struct inode *inode, fmode_t flags) +int nfs4_have_delegation(struct inode *inode, fmode_t type, int flags) { - return nfs4_do_check_delegation(inode, flags, true); + return nfs4_do_check_delegation(inode, type, flags, true); } /* * nfs4_check_delegation - check if inode has a delegation, do not mark * NFS_DELEGATION_REFERENCED if it has one. */ -int nfs4_check_delegation(struct inode *inode, fmode_t flags) +int nfs4_check_delegation(struct inode *inode, fmode_t type) { - return nfs4_do_check_delegation(inode, flags, false); + return nfs4_do_check_delegation(inode, type, 0, false); } static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_stateid *stateid) @@ -221,11 +225,12 @@ again: * @type: delegation type * @stateid: delegation stateid * @pagemod_limit: write delegation "space_limit" + * @deleg_type: raw delegation type * */ void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred, fmode_t type, const nfs4_stateid *stateid, - unsigned long pagemod_limit) + unsigned long pagemod_limit, u32 deleg_type) { struct nfs_delegation *delegation; const struct cred *oldcred = NULL; @@ -239,6 +244,14 @@ void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred, delegation->pagemod_limit = pagemod_limit; oldcred = delegation->cred; delegation->cred = get_cred(cred); + switch (deleg_type) { + case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG: + case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG: + set_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags); + break; + default: + clear_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags); + } clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags); if (test_and_clear_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) @@ -250,11 +263,13 @@ void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred, } else { rcu_read_unlock(); nfs_inode_set_delegation(inode, cred, type, stateid, - pagemod_limit); + pagemod_limit, deleg_type); } } -static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync) +static int nfs_do_return_delegation(struct inode *inode, + struct nfs_delegation *delegation, + int issync) { const struct cred *cred; int res = 0; @@ -263,9 +278,8 @@ static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation * spin_lock(&delegation->lock); cred = get_cred(delegation->cred); spin_unlock(&delegation->lock); - res = nfs4_proc_delegreturn(inode, cred, - &delegation->stateid, - issync); + res = nfs4_proc_delegreturn(inode, cred, &delegation->stateid, + delegation, issync); put_cred(cred); } return res; @@ -293,7 +307,8 @@ nfs_start_delegation_return_locked(struct nfs_inode *nfsi) if (delegation == NULL) goto out; spin_lock(&delegation->lock); - if (!test_and_set_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) { + if (delegation->inode && + !test_and_set_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) { clear_bit(NFS_DELEGATION_RETURN_DELAYED, &delegation->flags); /* Refcount matched in nfs_end_delegation_return() */ ret = nfs_get_delegation(delegation); @@ -317,14 +332,16 @@ nfs_start_delegation_return(struct nfs_inode *nfsi) } static void nfs_abort_delegation_return(struct nfs_delegation *delegation, - struct nfs_client *clp, int err) + struct nfs_server *server, int err) { - spin_lock(&delegation->lock); clear_bit(NFS_DELEGATION_RETURNING, &delegation->flags); if (err == -EAGAIN) { set_bit(NFS_DELEGATION_RETURN_DELAYED, &delegation->flags); - set_bit(NFS4CLNT_DELEGRETURN_DELAYED, &clp->cl_state); + set_bit(NFS4SERV_DELEGRETURN_DELAYED, + &server->delegation_flags); + set_bit(NFS4CLNT_DELEGRETURN_DELAYED, + &server->nfs_client->cl_state); } spin_unlock(&delegation->lock); } @@ -418,13 +435,13 @@ nfs_update_inplace_delegation(struct nfs_delegation *delegation, * @type: delegation type * @stateid: delegation stateid * @pagemod_limit: write delegation "space_limit" + * @deleg_type: raw delegation type * * Returns zero on success, or a negative errno value. */ int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred, - fmode_t type, - const nfs4_stateid *stateid, - unsigned long pagemod_limit) + fmode_t type, const nfs4_stateid *stateid, + unsigned long pagemod_limit, u32 deleg_type) { struct nfs_server *server = NFS_SERVER(inode); struct nfs_client *clp = server->nfs_client; @@ -444,6 +461,11 @@ int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred, delegation->cred = get_cred(cred); delegation->inode = inode; delegation->flags = 1<<NFS_DELEGATION_REFERENCED; + switch (deleg_type) { + case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG: + case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG: + delegation->flags |= BIT(NFS_DELEGATION_DELEGTIME); + } delegation->test_gen = 0; spin_lock_init(&delegation->lock); @@ -508,6 +530,11 @@ add_new: atomic_long_inc(&nfs_active_delegations); trace_nfs4_set_delegation(inode, type); + + /* If we hold writebacks and have delegated mtime then update */ + if (deleg_type == NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG && + nfs_have_writebacks(inode)) + nfs_update_delegated_mtime(inode); out: spin_unlock(&clp->cl_lock); if (delegation != NULL) @@ -524,7 +551,7 @@ out: */ static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation *delegation, int issync) { - struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; + struct nfs_server *server = NFS_SERVER(inode); unsigned int mode = O_WRONLY | O_RDWR; int err = 0; @@ -546,11 +573,11 @@ static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation /* * Guard against state recovery */ - err = nfs4_wait_clnt_recover(clp); + err = nfs4_wait_clnt_recover(server->nfs_client); } if (err) { - nfs_abort_delegation_return(delegation, clp, err); + nfs_abort_delegation_return(delegation, server, err); goto out; } @@ -567,17 +594,6 @@ static bool nfs_delegation_need_return(struct nfs_delegation *delegation) if (test_and_clear_bit(NFS_DELEGATION_RETURN, &delegation->flags)) ret = true; - else if (test_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags)) { - struct inode *inode; - - spin_lock(&delegation->lock); - inode = delegation->inode; - if (inode && list_empty(&NFS_I(inode)->open_files)) - ret = true; - spin_unlock(&delegation->lock); - } - if (ret) - clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags); if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags) || test_bit(NFS_DELEGATION_RETURN_DELAYED, &delegation->flags) || test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) @@ -596,6 +612,9 @@ static int nfs_server_return_marked_delegations(struct nfs_server *server, struct nfs_delegation *place_holder_deleg = NULL; int err = 0; + if (!test_and_clear_bit(NFS4SERV_DELEGRETURN, + &server->delegation_flags)) + return 0; restart: /* * To avoid quadratic looping we hold a reference @@ -624,6 +643,9 @@ restart: prev = delegation; continue; } + inode = nfs_delegation_grab_inode(delegation); + if (inode == NULL) + continue; if (prev) { struct inode *tmp = nfs_delegation_grab_inode(prev); @@ -634,12 +656,6 @@ restart: } } - inode = nfs_delegation_grab_inode(delegation); - if (inode == NULL) { - rcu_read_unlock(); - iput(to_put); - goto restart; - } delegation = nfs_start_delegation_return_locked(NFS_I(inode)); rcu_read_unlock(); @@ -650,6 +666,7 @@ restart: cond_resched(); if (!err) goto restart; + set_bit(NFS4SERV_DELEGRETURN, &server->delegation_flags); set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state); goto out; } @@ -664,6 +681,9 @@ static bool nfs_server_clear_delayed_delegations(struct nfs_server *server) struct nfs_delegation *d; bool ret = false; + if (!test_and_clear_bit(NFS4SERV_DELEGRETURN_DELAYED, + &server->delegation_flags)) + goto out; list_for_each_entry_rcu (d, &server->delegations, super_list) { if (!test_bit(NFS_DELEGATION_RETURN_DELAYED, &d->flags)) continue; @@ -671,6 +691,7 @@ static bool nfs_server_clear_delayed_delegations(struct nfs_server *server) clear_bit(NFS_DELEGATION_RETURN_DELAYED, &d->flags); ret = true; } +out: return ret; } @@ -761,6 +782,43 @@ int nfs4_inode_return_delegation(struct inode *inode) } /** + * nfs4_inode_set_return_delegation_on_close - asynchronously return a delegation + * @inode: inode to process + * + * This routine is called to request that the delegation be returned as soon + * as the file is closed. If the file is already closed, the delegation is + * immediately returned. + */ +void nfs4_inode_set_return_delegation_on_close(struct inode *inode) +{ + struct nfs_delegation *delegation; + struct nfs_delegation *ret = NULL; + + if (!inode) + return; + rcu_read_lock(); + delegation = nfs4_get_valid_delegation(inode); + if (!delegation) + goto out; + spin_lock(&delegation->lock); + if (!delegation->inode) + goto out_unlock; + if (list_empty(&NFS_I(inode)->open_files) && + !test_and_set_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) { + /* Refcount matched in nfs_end_delegation_return() */ + ret = nfs_get_delegation(delegation); + } else + set_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags); +out_unlock: + spin_unlock(&delegation->lock); + if (ret) + nfs_clear_verifier_delegated(inode); +out: + rcu_read_unlock(); + nfs_end_delegation_return(inode, ret, 0); +} + +/** * nfs4_inode_return_delegation_on_close - asynchronously return a delegation * @inode: inode to process * @@ -821,11 +879,25 @@ int nfs4_inode_make_writeable(struct inode *inode) return nfs4_inode_return_delegation(inode); } -static void nfs_mark_return_if_closed_delegation(struct nfs_server *server, - struct nfs_delegation *delegation) +static void +nfs_mark_return_if_closed_delegation(struct nfs_server *server, + struct nfs_delegation *delegation) { - set_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags); - set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state); + struct inode *inode; + + if (test_bit(NFS_DELEGATION_RETURN, &delegation->flags) || + test_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags)) + return; + spin_lock(&delegation->lock); + inode = delegation->inode; + if (!inode) + goto out; + if (list_empty(&NFS_I(inode)->open_files)) + nfs_mark_return_delegation(server, delegation); + else + set_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags); +out: + spin_unlock(&delegation->lock); } static bool nfs_server_mark_return_all_delegations(struct nfs_server *server) @@ -981,6 +1053,11 @@ void nfs_delegation_mark_returned(struct inode *inode, } nfs_mark_delegation_revoked(delegation); + clear_bit(NFS_DELEGATION_RETURNING, &delegation->flags); + spin_unlock(&delegation->lock); + if (nfs_detach_delegation(NFS_I(inode), delegation, NFS_SERVER(inode))) + nfs_put_delegation(delegation); + goto out_rcu_unlock; out_clear_returning: clear_bit(NFS_DELEGATION_RETURNING, &delegation->flags); @@ -1161,7 +1238,6 @@ static int nfs_server_reap_unclaimed_delegations(struct nfs_server *server, struct inode *inode; restart: rcu_read_lock(); -restart_locked: list_for_each_entry_rcu(delegation, &server->delegations, super_list) { if (test_bit(NFS_DELEGATION_INODE_FREEING, &delegation->flags) || @@ -1172,7 +1248,7 @@ restart_locked: continue; inode = nfs_delegation_grab_inode(delegation); if (inode == NULL) - goto restart_locked; + continue; delegation = nfs_start_delegation_return_locked(NFS_I(inode)); rcu_read_unlock(); if (delegation != NULL) { @@ -1215,6 +1291,7 @@ static void nfs_mark_test_expired_delegation(struct nfs_server *server, return; clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags); set_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags); + set_bit(NFS4SERV_DELEGATION_EXPIRED, &server->delegation_flags); set_bit(NFS4CLNT_DELEGATION_EXPIRED, &server->nfs_client->cl_state); } @@ -1293,9 +1370,11 @@ static int nfs_server_reap_expired_delegations(struct nfs_server *server, nfs4_stateid stateid; unsigned long gen = ++server->delegation_gen; + if (!test_and_clear_bit(NFS4SERV_DELEGATION_EXPIRED, + &server->delegation_flags)) + return 0; restart: rcu_read_lock(); -restart_locked: list_for_each_entry_rcu(delegation, &server->delegations, super_list) { if (test_bit(NFS_DELEGATION_INODE_FREEING, &delegation->flags) || @@ -1307,7 +1386,7 @@ restart_locked: continue; inode = nfs_delegation_grab_inode(delegation); if (inode == NULL) - goto restart_locked; + continue; spin_lock(&delegation->lock); cred = get_cred_rcu(delegation->cred); nfs4_stateid_copy(&stateid, &delegation->stateid); @@ -1323,6 +1402,9 @@ restart_locked: goto restart; } nfs_inode_mark_test_expired_delegation(server,inode); + set_bit(NFS4SERV_DELEGATION_EXPIRED, &server->delegation_flags); + set_bit(NFS4CLNT_DELEGATION_EXPIRED, + &server->nfs_client->cl_state); iput(inode); return -EAGAIN; } diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index a6f495d012cf..8ff5ab9c5c25 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -38,14 +38,18 @@ enum { NFS_DELEGATION_TEST_EXPIRED, NFS_DELEGATION_INODE_FREEING, NFS_DELEGATION_RETURN_DELAYED, + NFS_DELEGATION_DELEGTIME, }; int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred, - fmode_t type, const nfs4_stateid *stateid, unsigned long pagemod_limit); + fmode_t type, const nfs4_stateid *stateid, + unsigned long pagemod_limit, u32 deleg_type); void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred, - fmode_t type, const nfs4_stateid *stateid, unsigned long pagemod_limit); + fmode_t type, const nfs4_stateid *stateid, + unsigned long pagemod_limit, u32 deleg_type); int nfs4_inode_return_delegation(struct inode *inode); void nfs4_inode_return_delegation_on_close(struct inode *inode); +void nfs4_inode_set_return_delegation_on_close(struct inode *inode); int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid); void nfs_inode_evict_delegation(struct inode *inode); @@ -67,7 +71,9 @@ void nfs_test_expired_all_delegations(struct nfs_client *clp); void nfs_reap_expired_delegations(struct nfs_client *clp); /* NFSv4 delegation-related procedures */ -int nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, const nfs4_stateid *stateid, int issync); +int nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, + const nfs4_stateid *stateid, + struct nfs_delegation *delegation, int issync); int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid); int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid); bool nfs4_copy_delegation_stateid(struct inode *inode, fmode_t flags, nfs4_stateid *dst, const struct cred **cred); @@ -75,8 +81,8 @@ bool nfs4_refresh_delegation_stateid(nfs4_stateid *dst, struct inode *inode); struct nfs_delegation *nfs4_get_valid_delegation(const struct inode *inode); void nfs_mark_delegation_referenced(struct nfs_delegation *delegation); -int nfs4_have_delegation(struct inode *inode, fmode_t flags); -int nfs4_check_delegation(struct inode *inode, fmode_t flags); +int nfs4_have_delegation(struct inode *inode, fmode_t type, int flags); +int nfs4_check_delegation(struct inode *inode, fmode_t type); bool nfs4_delegation_flush_on_close(const struct inode *inode); void nfs_inode_find_delegation_state_and_recover(struct inode *inode, const nfs4_stateid *stateid); @@ -84,9 +90,37 @@ int nfs4_inode_make_writeable(struct inode *inode); #endif +#define NFS_DELEGATION_FLAG_TIME BIT(1) + +void nfs_update_delegated_atime(struct inode *inode); +void nfs_update_delegated_mtime(struct inode *inode); +void nfs_update_delegated_mtime_locked(struct inode *inode); + +static inline int nfs_have_read_or_write_delegation(struct inode *inode) +{ + return NFS_PROTO(inode)->have_delegation(inode, FMODE_READ, 0); +} + +static inline int nfs_have_write_delegation(struct inode *inode) +{ + return NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE, 0); +} + static inline int nfs_have_delegated_attributes(struct inode *inode) { - return NFS_PROTO(inode)->have_delegation(inode, FMODE_READ); + return NFS_PROTO(inode)->have_delegation(inode, FMODE_READ, 0); +} + +static inline int nfs_have_delegated_atime(struct inode *inode) +{ + return NFS_PROTO(inode)->have_delegation(inode, FMODE_READ, + NFS_DELEGATION_FLAG_TIME); +} + +static inline int nfs_have_delegated_mtime(struct inode *inode) +{ + return NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE, + NFS_DELEGATION_FLAG_TIME); } #endif diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index ac505671efbd..d0e0b435a843 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -56,6 +56,8 @@ static int nfs_readdir(struct file *, struct dir_context *); static int nfs_fsync_dir(struct file *, loff_t, loff_t, int); static loff_t nfs_llseek_dir(struct file *, loff_t, int); static void nfs_readdir_clear_array(struct folio *); +static int nfs_do_create(struct inode *dir, struct dentry *dentry, + umode_t mode, int open_flags); const struct file_operations nfs_dir_operations = { .llseek = nfs_llseek_dir, @@ -149,7 +151,7 @@ struct nfs_cache_array { unsigned char folio_full : 1, folio_is_eof : 1, cookies_are_ordered : 1; - struct nfs_cache_array_entry array[]; + struct nfs_cache_array_entry array[] __counted_by(size); }; struct nfs_readdir_descriptor { @@ -326,7 +328,8 @@ static int nfs_readdir_folio_array_append(struct folio *folio, goto out; } - cache_entry = &array->array[array->size]; + array->size++; + cache_entry = &array->array[array->size - 1]; cache_entry->cookie = array->last_cookie; cache_entry->ino = entry->ino; cache_entry->d_type = entry->d_type; @@ -335,7 +338,6 @@ static int nfs_readdir_folio_array_append(struct folio *folio, array->last_cookie = entry->cookie; if (array->last_cookie <= cache_entry->cookie) array->cookies_are_ordered = 0; - array->size++; if (entry->eof != 0) nfs_readdir_array_set_eof(array); out: @@ -664,6 +666,8 @@ static bool nfs_use_readdirplus(struct inode *dir, struct dir_context *ctx, { if (!nfs_server_capable(dir, NFS_CAP_READDIRPLUS)) return false; + if (NFS_SERVER(dir)->flags & NFS_MOUNT_FORCE_RDIRPLUS) + return true; if (ctx->pos == 0 || cache_hits + cache_misses > NFS_READDIR_CACHE_USAGE_THRESHOLD) return true; @@ -1435,7 +1439,7 @@ static void nfs_set_verifier_locked(struct dentry *dentry, unsigned long verf) if (!dir || !nfs_verify_change_attribute(dir, verf)) return; - if (inode && NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) + if (inode && NFS_PROTO(inode)->have_delegation(inode, FMODE_READ, 0)) nfs_set_verifier_delegated(&verf); dentry->d_time = verf; } @@ -1530,7 +1534,8 @@ static int nfs_is_exclusive_create(struct inode *dir, unsigned int flags) { if (NFS_PROTO(dir)->version == 2) return 0; - return flags & LOOKUP_EXCL; + return (flags & (LOOKUP_CREATE | LOOKUP_EXCL)) == + (LOOKUP_CREATE | LOOKUP_EXCL); } /* @@ -1625,7 +1630,16 @@ nfs_lookup_revalidate_done(struct inode *dir, struct dentry *dentry, switch (error) { case 1: break; - case 0: + case -ETIMEDOUT: + if (inode && (IS_ROOT(dentry) || + NFS_SERVER(inode)->flags & NFS_MOUNT_SOFTREVAL)) + error = 1; + break; + case -ESTALE: + case -ENOENT: + error = 0; + fallthrough; + default: /* * We can't d_drop the root of a disconnected tree: * its d_hash is on the s_anon list and d_drop() would hide @@ -1661,7 +1675,7 @@ nfs_lookup_revalidate_delegated(struct inode *dir, struct dentry *dentry, return nfs_lookup_revalidate_done(dir, dentry, inode, 1); } -static int nfs_lookup_revalidate_dentry(struct inode *dir, +static int nfs_lookup_revalidate_dentry(struct inode *dir, const struct qstr *name, struct dentry *dentry, struct inode *inode, unsigned int flags) { @@ -1679,19 +1693,9 @@ static int nfs_lookup_revalidate_dentry(struct inode *dir, goto out; dir_verifier = nfs_save_change_attribute(dir); - ret = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr); - if (ret < 0) { - switch (ret) { - case -ESTALE: - case -ENOENT: - ret = 0; - break; - case -ETIMEDOUT: - if (NFS_SERVER(inode)->flags & NFS_MOUNT_SOFTREVAL) - ret = 1; - } + ret = NFS_PROTO(dir)->lookup(dir, dentry, name, fhandle, fattr); + if (ret < 0) goto out; - } /* Request help from readdirplus */ nfs_lookup_advise_force_readdirplus(dir, flags); @@ -1731,11 +1735,11 @@ out: * cached dentry and do a new lookup. */ static int -nfs_do_lookup_revalidate(struct inode *dir, struct dentry *dentry, - unsigned int flags) +nfs_do_lookup_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { struct inode *inode; - int error; + int error = 0; nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE); inode = d_inode(dentry); @@ -1774,47 +1778,59 @@ nfs_do_lookup_revalidate(struct inode *dir, struct dentry *dentry, if (NFS_STALE(inode)) goto out_bad; - return nfs_lookup_revalidate_dentry(dir, dentry, inode, flags); + return nfs_lookup_revalidate_dentry(dir, name, dentry, inode, flags); out_valid: return nfs_lookup_revalidate_done(dir, dentry, inode, 1); out_bad: if (flags & LOOKUP_RCU) return -ECHILD; - return nfs_lookup_revalidate_done(dir, dentry, inode, 0); + return nfs_lookup_revalidate_done(dir, dentry, inode, error); } static int -__nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags, - int (*reval)(struct inode *, struct dentry *, unsigned int)) +__nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) { - struct dentry *parent; - struct inode *dir; - int ret; - if (flags & LOOKUP_RCU) { if (dentry->d_fsdata == NFS_FSDATA_BLOCKED) return -ECHILD; - parent = READ_ONCE(dentry->d_parent); - dir = d_inode_rcu(parent); - if (!dir) - return -ECHILD; - ret = reval(dir, dentry, flags); - if (parent != READ_ONCE(dentry->d_parent)) - return -ECHILD; } else { - /* Wait for unlink to complete */ + /* Wait for unlink to complete - see unblock_revalidate() */ wait_var_event(&dentry->d_fsdata, - dentry->d_fsdata != NFS_FSDATA_BLOCKED); - parent = dget_parent(dentry); - ret = reval(d_inode(parent), dentry, flags); - dput(parent); + smp_load_acquire(&dentry->d_fsdata) + != NFS_FSDATA_BLOCKED); } - return ret; + return 0; } -static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) +static int nfs_lookup_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { - return __nfs_lookup_revalidate(dentry, flags, nfs_do_lookup_revalidate); + if (__nfs_lookup_revalidate(dentry, flags)) + return -ECHILD; + return nfs_do_lookup_revalidate(dir, name, dentry, flags); +} + +static void block_revalidate(struct dentry *dentry) +{ + /* old devname - just in case */ + kfree(dentry->d_fsdata); + + /* Any new reference that could lead to an open + * will take ->d_lock in lookup_open() -> d_lookup(). + * Holding this lock ensures we cannot race with + * __nfs_lookup_revalidate() and removes and need + * for further barriers. + */ + lockdep_assert_held(&dentry->d_lock); + + dentry->d_fsdata = NFS_FSDATA_BLOCKED; +} + +static void unblock_revalidate(struct dentry *dentry) +{ + /* store_release ensures wait_var_event() sees the update */ + smp_store_release(&dentry->d_fsdata, NULL); + wake_up_var(&dentry->d_fsdata); } /* @@ -1957,7 +1973,8 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in dir_verifier = nfs_save_change_attribute(dir); trace_nfs_lookup_enter(dir, dentry, flags); - error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr); + error = NFS_PROTO(dir)->lookup(dir, dentry, &dentry->d_name, + fhandle, fattr); if (error == -ENOENT) { if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE)) dir_verifier = inode_peek_iversion_raw(dir); @@ -2000,7 +2017,8 @@ void nfs_d_prune_case_insensitive_aliases(struct inode *inode) EXPORT_SYMBOL_GPL(nfs_d_prune_case_insensitive_aliases); #if IS_ENABLED(CONFIG_NFS_V4) -static int nfs4_lookup_revalidate(struct dentry *, unsigned int); +static int nfs4_lookup_revalidate(struct inode *, const struct qstr *, + struct dentry *, unsigned int); const struct dentry_operations nfs4_dentry_operations = { .d_revalidate = nfs4_lookup_revalidate, @@ -2189,11 +2207,14 @@ no_open: EXPORT_SYMBOL_GPL(nfs_atomic_open); static int -nfs4_do_lookup_revalidate(struct inode *dir, struct dentry *dentry, - unsigned int flags) +nfs4_lookup_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { struct inode *inode; + if (__nfs_lookup_revalidate(dentry, flags)) + return -ECHILD; + trace_nfs_lookup_revalidate_enter(dir, dentry, flags); if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY)) @@ -2229,19 +2250,51 @@ nfs4_do_lookup_revalidate(struct inode *dir, struct dentry *dentry, reval_dentry: if (flags & LOOKUP_RCU) return -ECHILD; - return nfs_lookup_revalidate_dentry(dir, dentry, inode, flags); + return nfs_lookup_revalidate_dentry(dir, name, dentry, inode, flags); full_reval: - return nfs_do_lookup_revalidate(dir, dentry, flags); + return nfs_do_lookup_revalidate(dir, name, dentry, flags); } -static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags) +#endif /* CONFIG_NFSV4 */ + +int nfs_atomic_open_v23(struct inode *dir, struct dentry *dentry, + struct file *file, unsigned int open_flags, + umode_t mode) { - return __nfs_lookup_revalidate(dentry, flags, - nfs4_do_lookup_revalidate); -} -#endif /* CONFIG_NFSV4 */ + /* Same as look+open from lookup_open(), but with different O_TRUNC + * handling. + */ + int error = 0; + + if (dentry->d_name.len > NFS_SERVER(dir)->namelen) + return -ENAMETOOLONG; + + if (open_flags & O_CREAT) { + file->f_mode |= FMODE_CREATED; + error = nfs_do_create(dir, dentry, mode, open_flags); + if (error) + return error; + return finish_open(file, dentry, NULL); + } else if (d_in_lookup(dentry)) { + /* The only flags nfs_lookup considers are + * LOOKUP_EXCL and LOOKUP_RENAME_TARGET, and + * we want those to be zero so the lookup isn't skipped. + */ + struct dentry *res = nfs_lookup(dir, dentry, 0); + + d_lookup_done(dentry); + if (unlikely(res)) { + if (IS_ERR(res)) + return PTR_ERR(res); + return finish_no_open(file, res); + } + } + return finish_no_open(file, NULL); + +} +EXPORT_SYMBOL_GPL(nfs_atomic_open_v23); struct dentry * nfs_add_or_obtain(struct dentry *dentry, struct nfs_fh *fhandle, @@ -2256,7 +2309,8 @@ nfs_add_or_obtain(struct dentry *dentry, struct nfs_fh *fhandle, d_drop(dentry); if (fhandle->size == 0) { - error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr); + error = NFS_PROTO(dir)->lookup(dir, dentry, &dentry->d_name, + fhandle, fattr); if (error) goto out_error; } @@ -2303,18 +2357,23 @@ EXPORT_SYMBOL_GPL(nfs_instantiate); * that the operation succeeded on the server, but an error in the * reply path made it appear to have failed. */ -int nfs_create(struct mnt_idmap *idmap, struct inode *dir, - struct dentry *dentry, umode_t mode, bool excl) +static int nfs_do_create(struct inode *dir, struct dentry *dentry, + umode_t mode, int open_flags) { struct iattr attr; - int open_flags = excl ? O_CREAT | O_EXCL : O_CREAT; int error; + open_flags |= O_CREAT; + dfprintk(VFS, "NFS: create(%s/%lu), %pd\n", dir->i_sb->s_id, dir->i_ino, dentry); attr.ia_mode = mode; attr.ia_valid = ATTR_MODE; + if (open_flags & O_TRUNC) { + attr.ia_size = 0; + attr.ia_valid |= ATTR_SIZE; + } trace_nfs_create_enter(dir, dentry, open_flags); error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags); @@ -2326,6 +2385,12 @@ out_err: d_drop(dentry); return error; } + +int nfs_create(struct mnt_idmap *idmap, struct inode *dir, + struct dentry *dentry, umode_t mode, bool excl) +{ + return nfs_do_create(dir, dentry, mode, excl ? O_EXCL : 0); +} EXPORT_SYMBOL_GPL(nfs_create); /* @@ -2359,11 +2424,11 @@ EXPORT_SYMBOL_GPL(nfs_mknod); /* * See comments for nfs_proc_create regarding failed operations. */ -int nfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, - struct dentry *dentry, umode_t mode) +struct dentry *nfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, + struct dentry *dentry, umode_t mode) { struct iattr attr; - int error; + struct dentry *ret; dfprintk(VFS, "NFS: mkdir(%s/%lu), %pd\n", dir->i_sb->s_id, dir->i_ino, dentry); @@ -2372,14 +2437,9 @@ int nfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, attr.ia_mode = mode | S_IFDIR; trace_nfs_mkdir_enter(dir, dentry); - error = NFS_PROTO(dir)->mkdir(dir, dentry, &attr); - trace_nfs_mkdir_exit(dir, dentry, error); - if (error != 0) - goto out_err; - return 0; -out_err: - d_drop(dentry); - return error; + ret = NFS_PROTO(dir)->mkdir(dir, dentry, &attr); + trace_nfs_mkdir_exit(dir, dentry, PTR_ERR_OR_ZERO(ret)); + return ret; } EXPORT_SYMBOL_GPL(nfs_mkdir); @@ -2501,15 +2561,12 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry) spin_unlock(&dentry->d_lock); goto out; } - /* old devname */ - kfree(dentry->d_fsdata); - dentry->d_fsdata = NFS_FSDATA_BLOCKED; + block_revalidate(dentry); spin_unlock(&dentry->d_lock); error = nfs_safe_remove(dentry); nfs_dentry_remove_handle_error(dir, dentry, error); - dentry->d_fsdata = NULL; - wake_up_var(&dentry->d_fsdata); + unblock_revalidate(dentry); out: trace_nfs_unlink_exit(dir, dentry, error); return error; @@ -2616,8 +2673,19 @@ nfs_unblock_rename(struct rpc_task *task, struct nfs_renamedata *data) { struct dentry *new_dentry = data->new_dentry; - new_dentry->d_fsdata = NULL; - wake_up_var(&new_dentry->d_fsdata); + unblock_revalidate(new_dentry); +} + +static bool nfs_rename_is_unsafe_cross_dir(struct dentry *old_dentry, + struct dentry *new_dentry) +{ + struct nfs_server *server = NFS_SB(old_dentry->d_sb); + + if (old_dentry->d_parent != new_dentry->d_parent) + return false; + if (server->fh_expire_type & NFS_FH_RENAME_UNSAFE) + return !(server->fh_expire_type & NFS_FH_NOEXPIRE_WITH_OPEN); + return true; } /* @@ -2679,11 +2747,6 @@ int nfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, if (WARN_ON(new_dentry->d_flags & DCACHE_NFSFS_RENAMED) || WARN_ON(new_dentry->d_fsdata == NFS_FSDATA_BLOCKED)) goto out; - if (new_dentry->d_fsdata) { - /* old devname */ - kfree(new_dentry->d_fsdata); - new_dentry->d_fsdata = NULL; - } spin_lock(&new_dentry->d_lock); if (d_count(new_dentry) > 2) { @@ -2705,18 +2768,21 @@ int nfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, new_dentry = dentry; new_inode = NULL; } else { - new_dentry->d_fsdata = NFS_FSDATA_BLOCKED; + block_revalidate(new_dentry); must_unblock = true; spin_unlock(&new_dentry->d_lock); } } - if (S_ISREG(old_inode->i_mode)) + if (S_ISREG(old_inode->i_mode) && + nfs_rename_is_unsafe_cross_dir(old_dentry, new_dentry)) nfs_sync_inode(old_inode); task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, must_unblock ? nfs_unblock_rename : NULL); if (IS_ERR(task)) { + if (must_unblock) + unblock_revalidate(new_dentry); error = PTR_ERR(task); goto out; } diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index bb2f583eb28b..48d89716193a 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -56,6 +56,7 @@ #include <linux/uaccess.h> #include <linux/atomic.h> +#include "delegation.h" #include "internal.h" #include "iostat.h" #include "pnfs.h" @@ -130,6 +131,20 @@ static void nfs_direct_truncate_request(struct nfs_direct_req *dreq, dreq->count = req_start; } +static void nfs_direct_file_adjust_size_locked(struct inode *inode, + loff_t offset, size_t count) +{ + loff_t newsize = offset + (loff_t)count; + loff_t oldsize = i_size_read(inode); + + if (newsize > oldsize) { + i_size_write(inode, newsize); + NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_SIZE; + trace_nfs_size_grow(inode, newsize); + nfs_inc_stats(inode, NFSIOS_EXTENDWRITE); + } +} + /** * nfs_swap_rw - NFS address space operation for swap I/O * @iocb: target I/O control block @@ -141,8 +156,6 @@ int nfs_swap_rw(struct kiocb *iocb, struct iov_iter *iter) { ssize_t ret; - VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE); - if (iov_iter_rw(iter) == READ) ret = nfs_file_direct_read(iocb, iter, true); else @@ -274,6 +287,8 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) nfs_direct_count_bytes(dreq, hdr); spin_unlock(&dreq->lock); + nfs_update_delegated_atime(dreq->inode); + while (!list_empty(&hdr->pages)) { struct nfs_page *req = nfs_list_entry(hdr->pages.next); struct page *page = req->wb_page; @@ -305,6 +320,7 @@ static void nfs_read_sync_pgio_error(struct list_head *head, int error) static void nfs_direct_pgio_init(struct nfs_pgio_header *hdr) { get_dreq(hdr->dreq); + set_bit(NFS_IOHDR_ODIRECT, &hdr->flags); } static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = { @@ -456,8 +472,16 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, if (user_backed_iter(iter)) dreq->flags = NFS_ODIRECT_SHOULD_DIRTY; - if (!swap) - nfs_start_io_direct(inode); + if (!swap) { + result = nfs_start_io_direct(inode); + if (result) { + /* release the reference that would usually be + * consumed by nfs_direct_read_schedule_iovec() + */ + nfs_direct_req_release(dreq); + goto out_release; + } + } NFS_I(inode)->read_io += count; requested = nfs_direct_read_schedule_iovec(dreq, iter, iocb->ki_pos); @@ -733,7 +757,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) { struct nfs_direct_req *dreq = hdr->dreq; struct nfs_commit_info cinfo; - struct nfs_page *req = nfs_list_entry(hdr->pages.next); + struct inode *inode = dreq->inode; int flags = NFS_ODIRECT_DONE; trace_nfs_direct_write_completion(dreq); @@ -755,7 +779,13 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) } spin_unlock(&dreq->lock); + spin_lock(&inode->i_lock); + nfs_direct_file_adjust_size_locked(inode, dreq->io_start, dreq->count); + nfs_update_delegated_mtime_locked(dreq->inode); + spin_unlock(&inode->i_lock); + while (!list_empty(&hdr->pages)) { + struct nfs_page *req; req = nfs_list_entry(hdr->pages.next); nfs_list_remove_request(req); @@ -1009,7 +1039,14 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, requested = nfs_direct_write_schedule_iovec(dreq, iter, pos, FLUSH_STABLE); } else { - nfs_start_io_direct(inode); + result = nfs_start_io_direct(inode); + if (result) { + /* release the reference that would usually be + * consumed by nfs_direct_write_schedule_iovec() + */ + nfs_direct_req_release(dreq); + goto out_release; + } requested = nfs_direct_write_schedule_iovec(dreq, iter, pos, FLUSH_COND_STABLE); diff --git a/fs/nfs/export.c b/fs/nfs/export.c index be686b8e0c54..e9c233b6fd20 100644 --- a/fs/nfs/export.c +++ b/fs/nfs/export.c @@ -154,5 +154,6 @@ const struct export_operations nfs_export_ops = { EXPORT_OP_CLOSE_BEFORE_UNLINK | EXPORT_OP_REMOTE_FS | EXPORT_OP_NOATOMIC_ATTR | - EXPORT_OP_FLUSH_ON_CLOSE, + EXPORT_OP_FLUSH_ON_CLOSE | + EXPORT_OP_NOLOCKS, }; diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 407c6e15afe2..033feeab8c34 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -29,6 +29,7 @@ #include <linux/pagemap.h> #include <linux/gfp.h> #include <linux/swap.h> +#include <linux/compaction.h> #include <linux/uaccess.h> #include <linux/filelock.h> @@ -166,7 +167,10 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to) iocb->ki_filp, iov_iter_count(to), (unsigned long) iocb->ki_pos); - nfs_start_io_read(inode); + result = nfs_start_io_read(inode); + if (result) + return result; + result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); if (!result) { result = generic_file_read_iter(iocb, to); @@ -187,7 +191,10 @@ nfs_file_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe dprintk("NFS: splice_read(%pD2, %zu@%llu)\n", in, len, *ppos); - nfs_start_io_read(inode); + result = nfs_start_io_read(inode); + if (result) + return result; + result = nfs_revalidate_mapping(inode, in->f_mapping); if (!result) { result = filemap_splice_read(in, ppos, pipe, len, flags); @@ -336,9 +343,10 @@ static bool nfs_want_read_modify_write(struct file *file, struct folio *folio, * increment the page use counts until he is done with the page. */ static int nfs_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, struct page **pagep, + loff_t pos, unsigned len, struct folio **foliop, void **fsdata) { + fgf_t fgp = FGP_WRITEBEGIN; struct folio *folio; int once_thru = 0; int ret; @@ -346,12 +354,13 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping, dfprintk(PAGECACHE, "NFS: write_begin(%pD2(%lu), %u@%lld)\n", file, mapping->host->i_ino, len, (long long) pos); + fgp |= fgf_set_order(len); start: - folio = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, FGP_WRITEBEGIN, + folio = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, fgp, mapping_gfp_mask(mapping)); if (IS_ERR(folio)) return PTR_ERR(folio); - *pagep = &folio->page; + *foliop = folio; ret = nfs_flush_incompatible(file, folio); if (ret) { @@ -370,10 +379,9 @@ start: static int nfs_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata) + struct folio *folio, void *fsdata) { struct nfs_open_context *ctx = nfs_file_open_context(file); - struct folio *folio = page_folio(page); unsigned offset = offset_in_folio(folio, pos); int status; @@ -425,7 +433,7 @@ static int nfs_write_end(struct file *file, struct address_space *mapping, static void nfs_invalidate_folio(struct folio *folio, size_t offset, size_t length) { - struct inode *inode = folio_file_mapping(folio)->host; + struct inode *inode = folio->mapping->host; dfprintk(PAGECACHE, "NFS: invalidate_folio(%lu, %zu, %zu)\n", folio->index, offset, length); @@ -433,8 +441,8 @@ static void nfs_invalidate_folio(struct folio *folio, size_t offset, return; /* Cancel any unstarted writes on this page */ nfs_wb_folio_cancel(inode, folio); - folio_wait_fscache(folio); - trace_nfs_invalidate_folio(inode, folio); + folio_wait_private_2(folio); /* [DEPRECATED] */ + trace_nfs_invalidate_folio(inode, folio_pos(folio) + offset, length); } /* @@ -450,9 +458,9 @@ static bool nfs_release_folio(struct folio *folio, gfp_t gfp) /* If the private flag is set, then the folio is not freeable */ if (folio_test_private(folio)) { if ((current_gfp_context(gfp) & GFP_KERNEL) != GFP_KERNEL || - current_is_kswapd()) + current_is_kswapd() || current_is_kcompactd()) return false; - if (nfs_wb_folio(folio_file_mapping(folio)->host, folio) < 0) + if (nfs_wb_folio(folio->mapping->host, folio) < 0) return false; } return nfs_fscache_release_folio(folio, gfp); @@ -500,9 +508,10 @@ static int nfs_launder_folio(struct folio *folio) dfprintk(PAGECACHE, "NFS: launder_folio(%ld, %llu)\n", inode->i_ino, folio_pos(folio)); - folio_wait_fscache(folio); + folio_wait_private_2(folio); /* [DEPRECATED] */ ret = nfs_wb_folio(inode, folio); - trace_nfs_launder_folio_done(inode, folio, ret); + trace_nfs_launder_folio_done(inode, folio_pos(folio), + folio_size(folio), ret); return ret; } @@ -588,13 +597,13 @@ static vm_fault_t nfs_vm_page_mkwrite(struct vm_fault *vmf) dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%pD2(%lu), offset %lld)\n", filp, filp->f_mapping->host->i_ino, - (long long)folio_file_pos(folio)); + (long long)folio_pos(folio)); sb_start_pagefault(inode->i_sb); /* make sure the cache has finished storing the page */ - if (folio_test_fscache(folio) && - folio_wait_fscache_killable(folio) < 0) { + if (folio_test_private_2(folio) && /* [DEPRECATED] */ + folio_wait_private_2_killable(folio) < 0) { ret = VM_FAULT_RETRY; goto out; } @@ -604,7 +613,7 @@ static vm_fault_t nfs_vm_page_mkwrite(struct vm_fault *vmf) TASK_KILLABLE|TASK_FREEZABLE_UNSAFE); folio_lock(folio); - mapping = folio_file_mapping(folio); + mapping = folio->mapping; if (mapping != inode->i_mapping) goto out_unlock; @@ -666,7 +675,9 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) nfs_clear_invalid_mapping(file->f_mapping); since = filemap_sample_wb_err(file->f_mapping); - nfs_start_io_write(inode); + error = nfs_start_io_write(inode); + if (error) + return error; result = generic_write_checks(iocb, from); if (result > 0) result = generic_perform_write(iocb, from); @@ -730,7 +741,7 @@ do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) } fl->c.flc_type = saved_type; - if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) + if (nfs_have_read_or_write_delegation(inode)) goto out_noconflict; if (is_local) @@ -813,7 +824,7 @@ do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) * This makes locking act as a cache coherency point. */ nfs_sync_mapping(filp->f_mapping); - if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) { + if (!nfs_have_read_or_write_delegation(inode)) { nfs_zap_caches(inode); if (mapping_mapped(filp->f_mapping)) nfs_revalidate_mapping(inode, filp->f_mapping); diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index ce8f8934bca5..d39a1f58e18d 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -488,7 +488,7 @@ filelayout_read_pagelist(struct nfs_pgio_header *hdr) /* Perform an asynchronous read to ds */ nfs_initiate_pgio(ds_clnt, hdr, hdr->cred, NFS_PROTO(hdr->inode), &filelayout_read_call_ops, - 0, RPC_TASK_SOFTCONN); + 0, RPC_TASK_SOFTCONN, NULL); return PNFS_ATTEMPTED; } @@ -530,7 +530,7 @@ filelayout_write_pagelist(struct nfs_pgio_header *hdr, int sync) /* Perform an asynchronous write */ nfs_initiate_pgio(ds_clnt, hdr, hdr->cred, NFS_PROTO(hdr->inode), &filelayout_write_call_ops, - sync, RPC_TASK_SOFTCONN); + sync, RPC_TASK_SOFTCONN, NULL); return PNFS_ATTEMPTED; } @@ -605,14 +605,6 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, dprintk("--> %s\n", __func__); - /* FIXME: remove this check when layout segment support is added */ - if (lgr->range.offset != 0 || - lgr->range.length != NFS4_MAX_UINT64) { - dprintk("%s Only whole file layouts supported. Use MDS i/o\n", - __func__); - goto out; - } - if (fl->pattern_offset > lgr->range.offset) { dprintk("%s pattern_offset %lld too large\n", __func__, fl->pattern_offset); @@ -875,15 +867,15 @@ static void filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { - pnfs_generic_pg_check_layout(pgio); + pnfs_generic_pg_check_layout(pgio, req); if (!pgio->pg_lseg) { pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode, nfs_req_openctx(req), - 0, - NFS4_MAX_UINT64, + req_offset(req), + req->wb_bytes, IOMODE_READ, false, - GFP_KERNEL); + nfs_io_gfp_mask()); if (IS_ERR(pgio->pg_lseg)) { pgio->pg_error = PTR_ERR(pgio->pg_lseg); pgio->pg_lseg = NULL; @@ -899,15 +891,15 @@ static void filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { - pnfs_generic_pg_check_layout(pgio); + pnfs_generic_pg_check_layout(pgio, req); if (!pgio->pg_lseg) { pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode, nfs_req_openctx(req), - 0, - NFS4_MAX_UINT64, + req_offset(req), + req->wb_bytes, IOMODE_RW, false, - GFP_NOFS); + nfs_io_gfp_mask()); if (IS_ERR(pgio->pg_lseg)) { pgio->pg_error = PTR_ERR(pgio->pg_lseg); pgio->pg_lseg = NULL; @@ -1019,7 +1011,7 @@ static int filelayout_initiate_commit(struct nfs_commit_data *data, int how) data->args.fh = fh; return nfs_initiate_commit(ds_clnt, data, NFS_PROTO(data->inode), &filelayout_commit_call_ops, how, - RPC_TASK_SOFTCONN); + RPC_TASK_SOFTCONN, NULL); out_err: pnfs_generic_prepare_to_resend_writes(data); pnfs_generic_commit_release(data); @@ -1118,7 +1110,6 @@ static const struct pnfs_commit_ops filelayout_commit_ops = { .clear_request_commit = pnfs_generic_clear_request_commit, .scan_commit_lists = pnfs_generic_scan_commit_lists, .recover_commit_reqs = pnfs_generic_recover_commit_reqs, - .search_commit_reqs = pnfs_generic_search_commit_reqs, .commit_pagelist = filelayout_commit_pagelist, }; diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c index 4fa304fa5bc4..29d9234d5c08 100644 --- a/fs/nfs/filelayout/filelayoutdev.c +++ b/fs/nfs/filelayout/filelayoutdev.c @@ -76,6 +76,7 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, struct page *scratch; struct list_head dsaddrs; struct nfs4_pnfs_ds_addr *da; + struct net *net = server->nfs_client->cl_net; /* set up xdr stream */ scratch = alloc_page(gfp_flags); @@ -159,8 +160,7 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, mp_count = be32_to_cpup(p); /* multipath count */ for (j = 0; j < mp_count; j++) { - da = nfs4_decode_mp_ds_addr(server->nfs_client->cl_net, - &stream, gfp_flags); + da = nfs4_decode_mp_ds_addr(net, &stream, gfp_flags); if (da) list_add_tail(&da->da_node, &dsaddrs); } @@ -170,7 +170,7 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, goto out_err_free_deviceid; } - dsaddr->ds_list[i] = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags); + dsaddr->ds_list[i] = nfs4_pnfs_ds_add(net, &dsaddrs, gfp_flags); if (!dsaddr->ds_list[i]) goto out_err_drain_dsaddrs; trace_fl_getdevinfo(server, &pdev->dev_id, dsaddr->ds_list[i]->ds_remotestr); diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 3e724cb7ef01..e6909cafab68 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -11,6 +11,7 @@ #include <linux/nfs_mount.h> #include <linux/nfs_page.h> #include <linux/module.h> +#include <linux/file.h> #include <linux/sched/mm.h> #include <linux/sunrpc/metrics.h> @@ -162,6 +163,20 @@ decode_name(struct xdr_stream *xdr, u32 *id) return 0; } +static struct nfsd_file * +ff_local_open_fh(struct pnfs_layout_segment *lseg, u32 ds_idx, + struct nfs_client *clp, const struct cred *cred, + struct nfs_fh *fh, fmode_t mode) +{ +#if IS_ENABLED(CONFIG_NFS_LOCALIO) + struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx); + + return nfs_local_open_fh(clp, cred, fh, &mirror->nfl, mode); +#else + return NULL; +#endif +} + static bool ff_mirror_match_fh(const struct nfs4_ff_layout_mirror *m1, const struct nfs4_ff_layout_mirror *m2) { @@ -231,16 +246,18 @@ static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags) spin_lock_init(&mirror->lock); refcount_set(&mirror->ref, 1); INIT_LIST_HEAD(&mirror->mirrors); + nfs_localio_file_init(&mirror->nfl); } return mirror; } static void ff_layout_free_mirror(struct nfs4_ff_layout_mirror *mirror) { - const struct cred *cred; + const struct cred *cred; ff_layout_remove_mirror(mirror); kfree(mirror->fh_versions); + nfs_close_local_fh(&mirror->nfl); cred = rcu_access_pointer(mirror->ro_cred); put_cred(cred); cred = rcu_access_pointer(mirror->rw_cred); @@ -823,14 +840,6 @@ ff_layout_pg_get_read(struct nfs_pageio_descriptor *pgio, } static void -ff_layout_pg_check_layout(struct nfs_pageio_descriptor *pgio, - struct nfs_page *req) -{ - pnfs_generic_pg_check_layout(pgio); - pnfs_generic_pg_check_range(pgio, req); -} - -static void ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { @@ -839,8 +848,11 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs4_pnfs_ds *ds; u32 ds_idx; + if (NFS_SERVER(pgio->pg_inode)->flags & + (NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR)) + pgio->pg_maxretrans = io_maxretrans; retry: - ff_layout_pg_check_layout(pgio, req); + pnfs_generic_pg_check_layout(pgio, req); /* Use full layout for now */ if (!pgio->pg_lseg) { ff_layout_pg_get_read(pgio, req, false); @@ -852,6 +864,8 @@ retry: if (!pgio->pg_lseg) goto out_nolseg; } + /* Reset wb_nio, since getting layout segment was successful */ + req->wb_nio = 0; ds = ff_layout_get_ds_for_read(pgio, &ds_idx); if (!ds) { @@ -868,14 +882,24 @@ retry: pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize; pgio->pg_mirror_idx = ds_idx; - - if (NFS_SERVER(pgio->pg_inode)->flags & - (NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR)) - pgio->pg_maxretrans = io_maxretrans; return; out_nolseg: - if (pgio->pg_error < 0) - return; + if (pgio->pg_error < 0) { + if (pgio->pg_error != -EAGAIN) + return; + /* Retry getting layout segment if lower layer returned -EAGAIN */ + if (pgio->pg_maxretrans && req->wb_nio++ > pgio->pg_maxretrans) { + if (NFS_SERVER(pgio->pg_inode)->flags & NFS_MOUNT_SOFTERR) + pgio->pg_error = -ETIMEDOUT; + else + pgio->pg_error = -EIO; + return; + } + pgio->pg_error = 0; + /* Sleep for 1 second before retrying */ + ssleep(1); + goto retry; + } out_mds: trace_pnfs_mds_fallback_pg_init_read(pgio->pg_inode, 0, NFS4_MAX_UINT64, IOMODE_READ, @@ -895,7 +919,7 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio, u32 i; retry: - ff_layout_pg_check_layout(pgio, req); + pnfs_generic_pg_check_layout(pgio, req); if (!pgio->pg_lseg) { pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, nfs_req_openctx(req), @@ -1130,10 +1154,14 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, rpc_wake_up(&tbl->slot_tbl_waitq); goto reset; /* RPC connection errors */ + case -ENETDOWN: + case -ENETUNREACH: + if (test_bit(NFS_CS_NETUNREACH_FATAL, &clp->cl_flags)) + return -NFS4ERR_FATAL_IOERROR; + fallthrough; case -ECONNREFUSED: case -EHOSTDOWN: case -EHOSTUNREACH: - case -ENETUNREACH: case -EIO: case -ETIMEDOUT: case -EPIPE: @@ -1159,6 +1187,7 @@ reset: /* Retry all errors through either pNFS or MDS except for -EJUKEBOX */ static int ff_layout_async_handle_error_v3(struct rpc_task *task, + struct nfs_client *clp, struct pnfs_layout_segment *lseg, u32 idx) { @@ -1176,6 +1205,11 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task, case -EJUKEBOX: nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY); goto out_retry; + case -ENETDOWN: + case -ENETUNREACH: + if (test_bit(NFS_CS_NETUNREACH_FATAL, &clp->cl_flags)) + return -NFS4ERR_FATAL_IOERROR; + fallthrough; default: dprintk("%s DS connection error %d\n", __func__, task->tk_status); @@ -1210,7 +1244,7 @@ static int ff_layout_async_handle_error(struct rpc_task *task, switch (vers) { case 3: - return ff_layout_async_handle_error_v3(task, lseg, idx); + return ff_layout_async_handle_error_v3(task, clp, lseg, idx); case 4: return ff_layout_async_handle_error_v4(task, state, clp, lseg, idx); @@ -1240,6 +1274,7 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, case -ECONNRESET: case -EHOSTDOWN: case -EHOSTUNREACH: + case -ENETDOWN: case -ENETUNREACH: case -EADDRINUSE: case -ENOBUFS: @@ -1294,7 +1329,7 @@ static int ff_layout_read_done_cb(struct rpc_task *task, hdr->args.offset, hdr->args.count, &hdr->res.op_status, OP_READ, task->tk_status); - trace_ff_layout_read_error(hdr); + trace_ff_layout_read_error(hdr, task->tk_status); } err = ff_layout_async_handle_error(task, hdr->args.context->state, @@ -1313,6 +1348,9 @@ static int ff_layout_read_done_cb(struct rpc_task *task, return task->tk_status; case -EAGAIN: goto out_eagain; + case -NFS4ERR_FATAL_IOERROR: + task->tk_status = -EIO; + return 0; } return 0; @@ -1464,7 +1502,7 @@ static int ff_layout_write_done_cb(struct rpc_task *task, hdr->args.offset, hdr->args.count, &hdr->res.op_status, OP_WRITE, task->tk_status); - trace_ff_layout_write_error(hdr); + trace_ff_layout_write_error(hdr, task->tk_status); } err = ff_layout_async_handle_error(task, hdr->args.context->state, @@ -1483,6 +1521,9 @@ static int ff_layout_write_done_cb(struct rpc_task *task, return task->tk_status; case -EAGAIN: return -EAGAIN; + case -NFS4ERR_FATAL_IOERROR: + task->tk_status = -EIO; + return 0; } if (hdr->res.verf->committed == NFS_FILE_SYNC || @@ -1510,7 +1551,7 @@ static int ff_layout_commit_done_cb(struct rpc_task *task, data->args.offset, data->args.count, &data->res.op_status, OP_COMMIT, task->tk_status); - trace_ff_layout_commit_error(data); + trace_ff_layout_commit_error(data, task->tk_status); } err = ff_layout_async_handle_error(task, NULL, data->ds_clp, @@ -1527,6 +1568,9 @@ static int ff_layout_commit_done_cb(struct rpc_task *task, case -EAGAIN: rpc_restart_call_prepare(task); return -EAGAIN; + case -NFS4ERR_FATAL_IOERROR: + task->tk_status = -EIO; + return 0; } ff_layout_set_layoutcommit(data->inode, data->lseg, data->lwb); @@ -1764,6 +1808,7 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) struct pnfs_layout_segment *lseg = hdr->lseg; struct nfs4_pnfs_ds *ds; struct rpc_clnt *ds_clnt; + struct nfsd_file *localio; struct nfs4_ff_layout_mirror *mirror; const struct cred *ds_cred; loff_t offset = hdr->args.offset; @@ -1810,11 +1855,18 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) hdr->args.offset = offset; hdr->mds_offset = offset; + /* Start IO accounting for local read */ + localio = ff_local_open_fh(lseg, idx, ds->ds_clp, ds_cred, fh, FMODE_READ); + if (localio) { + hdr->task.tk_start = ktime_get(); + ff_layout_read_record_layoutstats_start(&hdr->task, hdr); + } + /* Perform an asynchronous read to ds */ nfs_initiate_pgio(ds_clnt, hdr, ds_cred, ds->ds_clp->rpc_ops, vers == 3 ? &ff_layout_read_call_ops_v3 : &ff_layout_read_call_ops_v4, - 0, RPC_TASK_SOFTCONN); + 0, RPC_TASK_SOFTCONN, localio); put_cred(ds_cred); return PNFS_ATTEMPTED; @@ -1834,6 +1886,7 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync) struct pnfs_layout_segment *lseg = hdr->lseg; struct nfs4_pnfs_ds *ds; struct rpc_clnt *ds_clnt; + struct nfsd_file *localio; struct nfs4_ff_layout_mirror *mirror; const struct cred *ds_cred; loff_t offset = hdr->args.offset; @@ -1878,11 +1931,19 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync) */ hdr->args.offset = offset; + /* Start IO accounting for local write */ + localio = ff_local_open_fh(lseg, idx, ds->ds_clp, ds_cred, fh, + FMODE_READ|FMODE_WRITE); + if (localio) { + hdr->task.tk_start = ktime_get(); + ff_layout_write_record_layoutstats_start(&hdr->task, hdr); + } + /* Perform an asynchronous write */ nfs_initiate_pgio(ds_clnt, hdr, ds_cred, ds->ds_clp->rpc_ops, vers == 3 ? &ff_layout_write_call_ops_v3 : &ff_layout_write_call_ops_v4, - sync, RPC_TASK_SOFTCONN); + sync, RPC_TASK_SOFTCONN, localio); put_cred(ds_cred); return PNFS_ATTEMPTED; @@ -1916,6 +1977,7 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how) struct pnfs_layout_segment *lseg = data->lseg; struct nfs4_pnfs_ds *ds; struct rpc_clnt *ds_clnt; + struct nfsd_file *localio; struct nfs4_ff_layout_mirror *mirror; const struct cred *ds_cred; u32 idx; @@ -1954,10 +2016,18 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how) if (fh) data->args.fh = fh; + /* Start IO accounting for local commit */ + localio = ff_local_open_fh(lseg, idx, ds->ds_clp, ds_cred, fh, + FMODE_READ|FMODE_WRITE); + if (localio) { + data->task.tk_start = ktime_get(); + ff_layout_commit_record_layoutstats_start(&data->task, data); + } + ret = nfs_initiate_commit(ds_clnt, data, ds->ds_clp->rpc_ops, vers == 3 ? &ff_layout_commit_call_ops_v3 : &ff_layout_commit_call_ops_v4, - how, RPC_TASK_SOFTCONN); + how, RPC_TASK_SOFTCONN, localio); put_cred(ds_cred); return ret; out_err: @@ -2095,12 +2165,6 @@ static int ff_layout_encode_ioerr(struct xdr_stream *xdr, } static void -encode_opaque_fixed(struct xdr_stream *xdr, const void *buf, size_t len) -{ - WARN_ON_ONCE(xdr_stream_encode_opaque_fixed(xdr, buf, len) < 0); -} - -static void ff_layout_encode_ff_iostat_head(struct xdr_stream *xdr, const nfs4_stateid *stateid, const struct nfs42_layoutstat_devinfo *devinfo) @@ -2556,7 +2620,7 @@ ff_layout_set_layoutdriver(struct nfs_server *server, const struct nfs_fh *dummy) { #if IS_ENABLED(CONFIG_NFS_V4_2) - server->caps |= NFS_CAP_LAYOUTSTATS; + server->caps |= NFS_CAP_LAYOUTSTATS | NFS_CAP_REBOOT_LAYOUTRETURN; #endif return 0; } diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index f84b3fb0dddd..095df09017a5 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -83,6 +83,7 @@ struct nfs4_ff_layout_mirror { nfs4_stateid stateid; const struct cred __rcu *ro_cred; const struct cred __rcu *rw_cred; + struct nfs_file_localio nfl; refcount_t ref; spinlock_t lock; unsigned long flags; diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index e028f5a0ef5f..4a304cf17c4b 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -49,6 +49,7 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, struct nfs4_pnfs_ds_addr *da; struct nfs4_ff_layout_ds *new_ds = NULL; struct nfs4_ff_ds_version *ds_versions = NULL; + struct net *net = server->nfs_client->cl_net; u32 mp_count; u32 version_count; __be32 *p; @@ -80,8 +81,7 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, for (i = 0; i < mp_count; i++) { /* multipath ds */ - da = nfs4_decode_mp_ds_addr(server->nfs_client->cl_net, - &stream, gfp_flags); + da = nfs4_decode_mp_ds_addr(net, &stream, gfp_flags); if (da) list_add_tail(&da->da_node, &dsaddrs); } @@ -149,7 +149,7 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, new_ds->ds_versions = ds_versions; new_ds->ds_versions_cnt = version_count; - new_ds->ds = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags); + new_ds->ds = nfs4_pnfs_ds_add(net, &dsaddrs, gfp_flags); if (!new_ds->ds) goto out_err_drain_dsaddrs; @@ -395,6 +395,12 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, /* connect success, check rsize/wsize limit */ if (!status) { + /* + * ds_clp is put in destroy_ds(). + * keep ds_clp even if DS is local, so that if local IO cannot + * proceed somehow, we can fall back to NFS whenever we want. + */ + nfs_local_probe(ds->ds_clp); max_payload = nfs_block_size(rpc_max_payload(ds->ds_clp->cl_rpcclient), NULL); diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c index d0a0956f8a13..13f71ca8c974 100644 --- a/fs/nfs/fs_context.c +++ b/fs/nfs/fs_context.c @@ -49,6 +49,8 @@ enum nfs_param { Opt_bsize, Opt_clientaddr, Opt_cto, + Opt_alignwrite, + Opt_fatal_neterrors, Opt_fg, Opt_fscache, Opt_fscache_flag, @@ -71,6 +73,8 @@ enum nfs_param { Opt_posix, Opt_proto, Opt_rdirplus, + Opt_rdirplus_none, + Opt_rdirplus_force, Opt_rdma, Opt_resvport, Opt_retrans, @@ -95,6 +99,20 @@ enum nfs_param { }; enum { + Opt_fatal_neterrors_default, + Opt_fatal_neterrors_enetunreach, + Opt_fatal_neterrors_none, +}; + +static const struct constant_table nfs_param_enums_fatal_neterrors[] = { + { "default", Opt_fatal_neterrors_default }, + { "ENETDOWN:ENETUNREACH", Opt_fatal_neterrors_enetunreach }, + { "ENETUNREACH:ENETDOWN", Opt_fatal_neterrors_enetunreach }, + { "none", Opt_fatal_neterrors_none }, + {} +}; + +enum { Opt_local_lock_all, Opt_local_lock_flock, Opt_local_lock_none, @@ -149,6 +167,9 @@ static const struct fs_parameter_spec nfs_fs_parameters[] = { fsparam_u32 ("bsize", Opt_bsize), fsparam_string("clientaddr", Opt_clientaddr), fsparam_flag_no("cto", Opt_cto), + fsparam_flag_no("alignwrite", Opt_alignwrite), + fsparam_enum("fatal_neterrors", Opt_fatal_neterrors, + nfs_param_enums_fatal_neterrors), fsparam_flag ("fg", Opt_fg), fsparam_flag_no("fsc", Opt_fscache_flag), fsparam_string("fsc", Opt_fscache), @@ -172,7 +193,8 @@ static const struct fs_parameter_spec nfs_fs_parameters[] = { fsparam_u32 ("port", Opt_port), fsparam_flag_no("posix", Opt_posix), fsparam_string("proto", Opt_proto), - fsparam_flag_no("rdirplus", Opt_rdirplus), + fsparam_flag_no("rdirplus", Opt_rdirplus), // rdirplus|nordirplus + fsparam_string("rdirplus", Opt_rdirplus), // rdirplus=... fsparam_flag ("rdma", Opt_rdma), fsparam_flag_no("resvport", Opt_resvport), fsparam_u32 ("retrans", Opt_retrans), @@ -286,6 +308,12 @@ static const struct constant_table nfs_xprtsec_policies[] = { {} }; +static const struct constant_table nfs_rdirplus_tokens[] = { + { "none", Opt_rdirplus_none }, + { "force", Opt_rdirplus_force }, + {} +}; + /* * Sanity-check a server address provided by the mount command. * @@ -592,6 +620,12 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, else ctx->flags |= NFS_MOUNT_TRUNK_DISCOVERY; break; + case Opt_alignwrite: + if (result.negated) + ctx->flags |= NFS_MOUNT_NO_ALIGNWRITE; + else + ctx->flags &= ~NFS_MOUNT_NO_ALIGNWRITE; + break; case Opt_ac: if (result.negated) ctx->flags |= NFS_MOUNT_NOAC; @@ -600,9 +634,11 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, break; case Opt_lock: if (result.negated) { + ctx->lock_status = NFS_LOCK_NOLOCK; ctx->flags |= NFS_MOUNT_NONLM; ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL); } else { + ctx->lock_status = NFS_LOCK_LOCK; ctx->flags &= ~NFS_MOUNT_NONLM; ctx->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL); } @@ -626,10 +662,25 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, ctx->flags &= ~NFS_MOUNT_NOACL; break; case Opt_rdirplus: - if (result.negated) + if (result.negated) { + ctx->flags &= ~NFS_MOUNT_FORCE_RDIRPLUS; ctx->flags |= NFS_MOUNT_NORDIRPLUS; - else - ctx->flags &= ~NFS_MOUNT_NORDIRPLUS; + } else if (!param->string) { + ctx->flags &= ~(NFS_MOUNT_NORDIRPLUS | NFS_MOUNT_FORCE_RDIRPLUS); + } else { + switch (lookup_constant(nfs_rdirplus_tokens, param->string, -1)) { + case Opt_rdirplus_none: + ctx->flags &= ~NFS_MOUNT_FORCE_RDIRPLUS; + ctx->flags |= NFS_MOUNT_NORDIRPLUS; + break; + case Opt_rdirplus_force: + ctx->flags &= ~NFS_MOUNT_NORDIRPLUS; + ctx->flags |= NFS_MOUNT_FORCE_RDIRPLUS; + break; + default: + goto out_invalid_value; + } + } break; case Opt_sharecache: if (result.negated) @@ -862,6 +913,25 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, goto out_of_bounds; ctx->nfs_server.max_connect = result.uint_32; break; + case Opt_fatal_neterrors: + trace_nfs_mount_assign(param->key, param->string); + switch (result.uint_32) { + case Opt_fatal_neterrors_default: + if (fc->net_ns != &init_net) + ctx->flags |= NFS_MOUNT_NETUNREACH_FATAL; + else + ctx->flags &= ~NFS_MOUNT_NETUNREACH_FATAL; + break; + case Opt_fatal_neterrors_enetunreach: + ctx->flags |= NFS_MOUNT_NETUNREACH_FATAL; + break; + case Opt_fatal_neterrors_none: + ctx->flags &= ~NFS_MOUNT_NETUNREACH_FATAL; + break; + default: + goto out_invalid_value; + } + break; case Opt_lookupcache: trace_nfs_mount_assign(param->key, param->string); switch (result.uint_32) { @@ -1112,9 +1182,12 @@ static int nfs23_parse_monolithic(struct fs_context *fc, ctx->acdirmax = data->acdirmax; ctx->need_mount = false; - memcpy(sap, &data->addr, sizeof(data->addr)); - ctx->nfs_server.addrlen = sizeof(data->addr); - ctx->nfs_server.port = ntohs(data->addr.sin_port); + if (!is_remount_fc(fc)) { + memcpy(sap, &data->addr, sizeof(data->addr)); + ctx->nfs_server.addrlen = sizeof(data->addr); + ctx->nfs_server.port = ntohs(data->addr.sin_port); + } + if (sap->ss_family != AF_INET || !nfs_verify_server_address(sap)) goto out_no_address; @@ -1454,7 +1527,7 @@ static int nfs_fs_context_validate(struct fs_context *fc) /* Load the NFS protocol module if we haven't done so yet */ if (!ctx->nfs_mod) { - nfs_mod = get_nfs_version(ctx->version); + nfs_mod = find_nfs_version(ctx->version); if (IS_ERR(nfs_mod)) { ret = PTR_ERR(nfs_mod); goto out_version_unavailable; @@ -1528,7 +1601,7 @@ static int nfs_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc) } nfs_copy_fh(ctx->mntfh, src->mntfh); - __module_get(ctx->nfs_mod->owner); + get_nfs_version(ctx->nfs_mod); ctx->client_address = NULL; ctx->mount_server.hostname = NULL; ctx->nfs_server.export_path = NULL; @@ -1620,7 +1693,7 @@ static int nfs_init_fs_context(struct fs_context *fc) } ctx->nfs_mod = nfss->nfs_client->cl_nfs_mod; - __module_get(ctx->nfs_mod->owner); + get_nfs_version(ctx->nfs_mod); } else { /* defaults */ ctx->timeo = NFS_UNSPEC_TIMEO; @@ -1638,6 +1711,9 @@ static int nfs_init_fs_context(struct fs_context *fc) ctx->xprtsec.cert_serial = TLS_NO_CERT; ctx->xprtsec.privkey_serial = TLS_NO_PRIVKEY; + if (fc->net_ns != &init_net) + ctx->flags |= NFS_MOUNT_NETUNREACH_FATAL; + fc->s_iflags |= SB_I_STABLE_WRITES; } fc->fs_private = ctx; diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index ddc1ee031955..e278a1ad1ca3 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -263,15 +263,25 @@ int nfs_netfs_readahead(struct readahead_control *ractl) static atomic_t nfs_netfs_debug_id; static int nfs_netfs_init_request(struct netfs_io_request *rreq, struct file *file) { + if (!file) { + if (WARN_ON_ONCE(rreq->origin != NETFS_PGPRIV2_COPY_TO_CACHE)) + return -EIO; + return 0; + } + rreq->netfs_priv = get_nfs_open_context(nfs_file_open_context(file)); rreq->debug_id = atomic_inc_return(&nfs_netfs_debug_id); + /* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */ + __set_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags); + rreq->io_streams[0].sreq_max_len = NFS_SB(rreq->inode->i_sb)->rsize; return 0; } static void nfs_netfs_free_request(struct netfs_io_request *rreq) { - put_nfs_open_context(rreq->netfs_priv); + if (rreq->netfs_priv) + put_nfs_open_context(rreq->netfs_priv); } static struct nfs_netfs_io_data *nfs_netfs_alloc(struct netfs_io_subrequest *sreq) @@ -286,14 +296,6 @@ static struct nfs_netfs_io_data *nfs_netfs_alloc(struct netfs_io_subrequest *sre return netfs; } -static bool nfs_netfs_clamp_length(struct netfs_io_subrequest *sreq) -{ - size_t rsize = NFS_SB(sreq->rreq->inode->i_sb)->rsize; - - sreq->len = min(sreq->len, rsize); - return true; -} - static void nfs_netfs_issue_read(struct netfs_io_subrequest *sreq) { struct nfs_netfs_io_data *netfs; @@ -302,17 +304,20 @@ static void nfs_netfs_issue_read(struct netfs_io_subrequest *sreq) struct nfs_open_context *ctx = sreq->rreq->netfs_priv; struct page *page; unsigned long idx; + pgoff_t start, last; int err; - pgoff_t start = (sreq->start + sreq->transferred) >> PAGE_SHIFT; - pgoff_t last = ((sreq->start + sreq->len - - sreq->transferred - 1) >> PAGE_SHIFT); + + start = (sreq->start + sreq->transferred) >> PAGE_SHIFT; + last = ((sreq->start + sreq->len - sreq->transferred - 1) >> PAGE_SHIFT); nfs_pageio_init_read(&pgio, inode, false, &nfs_async_read_completion_ops); netfs = nfs_netfs_alloc(sreq); - if (!netfs) - return netfs_subreq_terminated(sreq, -ENOMEM, false); + if (!netfs) { + sreq->error = -ENOMEM; + return netfs_read_subreq_terminated(sreq); + } pgio.pg_netfs = netfs; /* used in completion */ @@ -341,7 +346,7 @@ void nfs_netfs_initiate_read(struct nfs_pgio_header *hdr) int nfs_netfs_folio_unlock(struct folio *folio) { - struct inode *inode = folio_file_mapping(folio)->host; + struct inode *inode = folio->mapping->host; /* * If fscache is enabled, netfs will unlock pages. @@ -361,7 +366,8 @@ void nfs_netfs_read_completion(struct nfs_pgio_header *hdr) return; sreq = netfs->sreq; - if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) + if (test_bit(NFS_IOHDR_EOF, &hdr->flags) && + sreq->rreq->origin != NETFS_DIO_READ) __set_bit(NETFS_SREQ_CLEAR_TAIL, &sreq->flags); if (hdr->error) @@ -377,5 +383,4 @@ const struct netfs_request_ops nfs_netfs_ops = { .init_request = nfs_netfs_init_request, .free_request = nfs_netfs_free_request, .issue_read = nfs_netfs_issue_read, - .clamp_length = nfs_netfs_clamp_length }; diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h index e3cb4923316b..9d86868f4998 100644 --- a/fs/nfs/fscache.h +++ b/fs/nfs/fscache.h @@ -60,8 +60,6 @@ static inline void nfs_netfs_get(struct nfs_netfs_io_data *netfs) static inline void nfs_netfs_put(struct nfs_netfs_io_data *netfs) { - ssize_t final_len; - /* Only the last RPC completion should call netfs_subreq_terminated() */ if (!refcount_dec_and_test(&netfs->refcount)) return; @@ -74,8 +72,10 @@ static inline void nfs_netfs_put(struct nfs_netfs_io_data *netfs) * Correct the final length here to be no larger than the netfs subrequest * length, and thus avoid netfs's "Subreq overread" warning message. */ - final_len = min_t(s64, netfs->sreq->len, atomic64_read(&netfs->transferred)); - netfs_subreq_terminated(netfs->sreq, netfs->error ?: final_len, false); + netfs->sreq->transferred = min_t(s64, netfs->sreq->len, + atomic64_read(&netfs->transferred)); + netfs->sreq->error = netfs->error; + netfs_read_subreq_terminated(netfs->sreq); kfree(netfs); } static inline void nfs_netfs_inode_init(struct nfs_inode *nfsi) @@ -101,10 +101,10 @@ extern int nfs_netfs_read_folio(struct file *file, struct folio *folio); static inline bool nfs_fscache_release_folio(struct folio *folio, gfp_t gfp) { - if (folio_test_fscache(folio)) { + if (folio_test_private_2(folio)) { /* [DEPRECATED] */ if (current_is_kswapd() || !(gfp & __GFP_FS)) return false; - folio_wait_fscache(folio); + folio_wait_private_2(folio); } fscache_note_page_release(netfs_i_cookie(netfs_inode(folio->mapping->host))); return true; diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index 11ff2b2e060f..f13d25d95b85 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -62,7 +62,7 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i } /* - * get an NFS2/NFS3 root dentry from the root filehandle + * get a root dentry from the root filehandle */ int nfs_get_root(struct super_block *s, struct fs_context *fc) { diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index acef52ecb1bb..119e447758b9 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -74,6 +74,8 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr) int nfs_wait_bit_killable(struct wait_bit_key *key, int mode) { + if (unlikely(nfs_current_task_exiting())) + return -EINTR; schedule(); if (signal_pending_state(mode, current)) return -ERESTARTSYS; @@ -190,9 +192,8 @@ static bool nfs_has_xattr_cache(const struct nfs_inode *nfsi) void nfs_set_cache_invalid(struct inode *inode, unsigned long flags) { struct nfs_inode *nfsi = NFS_I(inode); - bool have_delegation = NFS_PROTO(inode)->have_delegation(inode, FMODE_READ); - if (have_delegation) { + if (nfs_have_delegated_attributes(inode)) { if (!(flags & NFS_INO_REVAL_FORCED)) flags &= ~(NFS_INO_INVALID_MODE | NFS_INO_INVALID_OTHER | @@ -206,12 +207,15 @@ void nfs_set_cache_invalid(struct inode *inode, unsigned long flags) nfs_fscache_invalidate(inode, 0); flags &= ~NFS_INO_REVAL_FORCED; - nfsi->cache_validity |= flags; + flags |= nfsi->cache_validity; + if (inode->i_mapping->nrpages == 0) + flags &= ~NFS_INO_INVALID_DATA; - if (inode->i_mapping->nrpages == 0) { - nfsi->cache_validity &= ~NFS_INO_INVALID_DATA; - nfs_ooo_clear(nfsi); - } else if (nfsi->cache_validity & NFS_INO_INVALID_DATA) { + /* pairs with nfs_clear_invalid_mapping()'s smp_load_acquire() */ + smp_store_release(&nfsi->cache_validity, flags); + + if (inode->i_mapping->nrpages == 0 || + nfsi->cache_validity & NFS_INO_INVALID_DATA) { nfs_ooo_clear(nfsi); } trace_nfs_set_cache_invalid(inode, 0); @@ -276,6 +280,8 @@ EXPORT_SYMBOL_GPL(nfs_zap_acl_cache); void nfs_invalidate_atime(struct inode *inode) { + if (nfs_have_delegated_atime(inode)) + return; spin_lock(&inode->i_lock); nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME); spin_unlock(&inode->i_lock); @@ -491,6 +497,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) inode->i_fop = NFS_SB(sb)->nfs_client->rpc_ops->file_ops; inode->i_data.a_ops = &nfs_file_aops; nfs_inode_init_regular(nfsi); + mapping_set_large_folios(inode->i_mapping); } else if (S_ISDIR(inode->i_mode)) { inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops; inode->i_fop = &nfs_dir_operations; @@ -604,6 +611,67 @@ out_no_inode: } EXPORT_SYMBOL_GPL(nfs_fhget); +static void +nfs_fattr_fixup_delegated(struct inode *inode, struct nfs_fattr *fattr) +{ + unsigned long cache_validity = NFS_I(inode)->cache_validity; + + if (nfs_have_delegated_mtime(inode)) { + if (!(cache_validity & NFS_INO_INVALID_CTIME)) + fattr->valid &= ~(NFS_ATTR_FATTR_PRECTIME | + NFS_ATTR_FATTR_CTIME); + + if (!(cache_validity & NFS_INO_INVALID_MTIME)) + fattr->valid &= ~(NFS_ATTR_FATTR_PREMTIME | + NFS_ATTR_FATTR_MTIME); + + if (!(cache_validity & NFS_INO_INVALID_ATIME)) + fattr->valid &= ~NFS_ATTR_FATTR_ATIME; + } else if (nfs_have_delegated_atime(inode)) { + if (!(cache_validity & NFS_INO_INVALID_ATIME)) + fattr->valid &= ~NFS_ATTR_FATTR_ATIME; + } +} + +static void nfs_update_timestamps(struct inode *inode, unsigned int ia_valid) +{ + enum file_time_flags time_flags = 0; + unsigned int cache_flags = 0; + + if (ia_valid & ATTR_MTIME) { + time_flags |= S_MTIME | S_CTIME; + cache_flags |= NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME; + } + if (ia_valid & ATTR_ATIME) { + time_flags |= S_ATIME; + cache_flags |= NFS_INO_INVALID_ATIME; + } + inode_update_timestamps(inode, time_flags); + NFS_I(inode)->cache_validity &= ~cache_flags; +} + +void nfs_update_delegated_atime(struct inode *inode) +{ + spin_lock(&inode->i_lock); + if (nfs_have_delegated_atime(inode)) + nfs_update_timestamps(inode, ATTR_ATIME); + spin_unlock(&inode->i_lock); +} + +void nfs_update_delegated_mtime_locked(struct inode *inode) +{ + if (nfs_have_delegated_mtime(inode)) + nfs_update_timestamps(inode, ATTR_MTIME); +} + +void nfs_update_delegated_mtime(struct inode *inode) +{ + spin_lock(&inode->i_lock); + nfs_update_delegated_mtime_locked(inode); + spin_unlock(&inode->i_lock); +} +EXPORT_SYMBOL_GPL(nfs_update_delegated_mtime); + #define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET|ATTR_FILE|ATTR_OPEN) int @@ -631,6 +699,18 @@ nfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, attr->ia_valid &= ~ATTR_SIZE; } + if (nfs_have_delegated_mtime(inode) && attr->ia_valid & ATTR_MTIME) { + spin_lock(&inode->i_lock); + nfs_update_timestamps(inode, attr->ia_valid); + spin_unlock(&inode->i_lock); + attr->ia_valid &= ~(ATTR_MTIME | ATTR_ATIME); + } else if (nfs_have_delegated_atime(inode) && + attr->ia_valid & ATTR_ATIME && + !(attr->ia_valid & ATTR_MTIME)) { + nfs_update_delegated_atime(inode); + attr->ia_valid &= ~ATTR_ATIME; + } + /* Optimization: if the end result is no change, don't RPC */ if (((attr->ia_valid & NFS_VALID_ATTRS) & ~(ATTR_FILE|ATTR_OPEN)) == 0) return 0; @@ -686,6 +766,7 @@ static int nfs_vmtruncate(struct inode * inode, loff_t offset) spin_unlock(&inode->i_lock); truncate_pagecache(inode, offset); + nfs_update_delegated_mtime_locked(inode); spin_lock(&inode->i_lock); out: return err; @@ -709,8 +790,9 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, spin_lock(&inode->i_lock); NFS_I(inode)->attr_gencount = fattr->gencount; if ((attr->ia_valid & ATTR_SIZE) != 0) { - nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME | - NFS_INO_INVALID_BLOCKS); + if (!nfs_have_delegated_mtime(inode)) + nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME); + nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS); nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC); nfs_vmtruncate(inode, attr->ia_size); } @@ -856,8 +938,12 @@ int nfs_getattr(struct mnt_idmap *idmap, const struct path *path, /* Flush out writes to the server in order to update c/mtime/version. */ if ((request_mask & (STATX_CTIME | STATX_MTIME | STATX_CHANGE_COOKIE)) && - S_ISREG(inode->i_mode)) - filemap_write_and_wait(inode->i_mapping); + S_ISREG(inode->i_mode)) { + if (nfs_have_delegated_mtime(inode)) + filemap_fdatawrite(inode->i_mapping); + else + filemap_write_and_wait(inode->i_mapping); + } /* * We may force a getattr if the user cares about atime. @@ -1012,7 +1098,7 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync) if (!is_sync) return; inode = d_inode(ctx->dentry); - if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) + if (nfs_have_read_or_write_delegation(inode)) return; nfsi = NFS_I(inode); if (inode->i_mapping->nrpages == 0) @@ -1053,6 +1139,8 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, ctx->lock_context.open_context = ctx; INIT_LIST_HEAD(&ctx->list); ctx->mdsthreshold = NULL; + nfs_localio_file_init(&ctx->nfl); + return ctx; } EXPORT_SYMBOL_GPL(alloc_nfs_open_context); @@ -1084,6 +1172,7 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync) nfs_sb_deactive(sb); put_rpccred(rcu_dereference_protected(ctx->ll_cred, 1)); kfree(ctx->mdsthreshold); + nfs_close_local_fh(&ctx->nfl); kfree_rcu(ctx, rcu_head); } @@ -1340,6 +1429,13 @@ int nfs_clear_invalid_mapping(struct address_space *mapping) TASK_KILLABLE|TASK_FREEZABLE_UNSAFE); if (ret) goto out; + smp_rmb(); /* pairs with smp_wmb() below */ + if (test_bit(NFS_INO_INVALIDATING, bitlock)) + continue; + /* pairs with nfs_set_cache_invalid()'s smp_store_release() */ + if (!(smp_load_acquire(&nfsi->cache_validity) & NFS_INO_INVALID_DATA)) + goto out; + /* Slow-path that double-checks with spinlock held */ spin_lock(&inode->i_lock); if (test_bit(NFS_INO_INVALIDATING, bitlock)) { spin_unlock(&inode->i_lock); @@ -1482,7 +1578,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat unsigned long invalid = 0; struct timespec64 ts; - if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) + if (nfs_have_delegated_attributes(inode)) return 0; if (!(fattr->valid & NFS_ATTR_FATTR_FILEID)) { @@ -1565,6 +1661,7 @@ void nfs_fattr_init(struct nfs_fattr *fattr) fattr->gencount = nfs_inc_attr_generation_counter(); fattr->owner_name = NULL; fattr->group_name = NULL; + fattr->mdsthreshold = NULL; } EXPORT_SYMBOL_GPL(nfs_fattr_init); @@ -2118,6 +2215,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) */ nfsi->read_cache_jiffies = fattr->time_start; + /* Fix up any delegated attributes in the struct nfs_fattr */ + nfs_fattr_fixup_delegated(inode, fattr); + save_cache_validity = nfsi->cache_validity; nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ATIME @@ -2390,35 +2490,54 @@ static void nfs_destroy_inodecache(void) kmem_cache_destroy(nfs_inode_cachep); } +struct workqueue_struct *nfslocaliod_workqueue; struct workqueue_struct *nfsiod_workqueue; EXPORT_SYMBOL_GPL(nfsiod_workqueue); /* - * start up the nfsiod workqueue + * Destroy the nfsiod workqueues */ -static int nfsiod_start(void) +static void nfsiod_stop(void) { struct workqueue_struct *wq; - dprintk("RPC: creating workqueue nfsiod\n"); - wq = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM | WQ_UNBOUND, 0); - if (wq == NULL) - return -ENOMEM; - nfsiod_workqueue = wq; - return 0; + + wq = nfsiod_workqueue; + if (wq != NULL) { + nfsiod_workqueue = NULL; + destroy_workqueue(wq); + } +#if IS_ENABLED(CONFIG_NFS_LOCALIO) + wq = nfslocaliod_workqueue; + if (wq != NULL) { + nfslocaliod_workqueue = NULL; + destroy_workqueue(wq); + } +#endif /* CONFIG_NFS_LOCALIO */ } /* - * Destroy the nfsiod workqueue + * Start the nfsiod workqueues */ -static void nfsiod_stop(void) +static int nfsiod_start(void) { - struct workqueue_struct *wq; - - wq = nfsiod_workqueue; - if (wq == NULL) - return; - nfsiod_workqueue = NULL; - destroy_workqueue(wq); + dprintk("RPC: creating workqueue nfsiod\n"); + nfsiod_workqueue = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM | WQ_UNBOUND, 0); + if (nfsiod_workqueue == NULL) + return -ENOMEM; +#if IS_ENABLED(CONFIG_NFS_LOCALIO) + /* + * localio writes need to use a normal (non-memreclaim) workqueue. + * When we start getting low on space, XFS goes and calls flush_work() on + * a non-memreclaim work queue, which causes a priority inversion problem. + */ + dprintk("RPC: creating workqueue nfslocaliod\n"); + nfslocaliod_workqueue = alloc_workqueue("nfslocaliod", WQ_UNBOUND, 0); + if (unlikely(nfslocaliod_workqueue == NULL)) { + nfsiod_stop(); + return -ENOMEM; + } +#endif /* CONFIG_NFS_LOCALIO */ + return 0; } unsigned int nfs_net_id; @@ -2538,6 +2657,7 @@ static void __exit exit_nfs_fs(void) /* Not quite true; I just maintain it */ MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>"); +MODULE_DESCRIPTION("NFS client support"); MODULE_LICENSE("GPL"); module_param(enable_ino64, bool, 0644); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 06253695fe53..6655e5f32ec6 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -6,12 +6,14 @@ #include "nfs4_fs.h" #include <linux/fs_context.h> #include <linux/security.h> +#include <linux/compiler_attributes.h> #include <linux/crc32.h> #include <linux/sunrpc/addr.h> #include <linux/nfs_page.h> +#include <linux/nfslocalio.h> #include <linux/wait_bit.h> -#define NFS_SB_MASK (SB_RDONLY|SB_NOSUID|SB_NODEV|SB_NOEXEC|SB_SYNCHRONOUS) +#define NFS_SB_MASK (SB_NOSUID|SB_NODEV|SB_NOEXEC|SB_SYNCHRONOUS) extern const struct export_operations nfs_export_ops; @@ -112,6 +114,7 @@ struct nfs_fs_context { unsigned short protofamily; unsigned short mountfamily; bool has_sec_mnt_opts; + int lock_status; struct { union { @@ -153,6 +156,12 @@ struct nfs_fs_context { } clone_data; }; +enum nfs_lock_status { + NFS_LOCK_NOT_SET = 0, + NFS_LOCK_LOCK = 1, + NFS_LOCK_NOLOCK = 2, +}; + #define nfs_errorf(fc, fmt, ...) ((fc)->log.log ? \ errorf(fc, fmt, ## __VA_ARGS__) : \ ({ dprintk(fmt "\n", ## __VA_ARGS__); })) @@ -301,7 +310,8 @@ void nfs_pgio_header_free(struct nfs_pgio_header *); int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *); int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr, const struct cred *cred, const struct nfs_rpc_ops *rpc_ops, - const struct rpc_call_ops *call_ops, int how, int flags); + const struct rpc_call_ops *call_ops, int how, int flags, + struct nfsd_file *localio); void nfs_free_request(struct nfs_page *req); struct nfs_pgio_mirror * nfs_pgio_current_mirror(struct nfs_pageio_descriptor *desc); @@ -390,8 +400,8 @@ struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int); void nfs_d_prune_case_insensitive_aliases(struct inode *inode); int nfs_create(struct mnt_idmap *, struct inode *, struct dentry *, umode_t, bool); -int nfs_mkdir(struct mnt_idmap *, struct inode *, struct dentry *, - umode_t); +struct dentry *nfs_mkdir(struct mnt_idmap *, struct inode *, struct dentry *, + umode_t); int nfs_rmdir(struct inode *, struct dentry *); int nfs_unlink(struct inode *, struct dentry *); int nfs_symlink(struct mnt_idmap *, struct inode *, struct dentry *, @@ -431,6 +441,7 @@ int nfs_check_flags(int); /* inode.c */ extern struct workqueue_struct *nfsiod_workqueue; +extern struct workqueue_struct *nfslocaliod_workqueue; extern struct inode *nfs_alloc_inode(struct super_block *sb); extern void nfs_free_inode(struct inode *); extern int nfs_write_inode(struct inode *, struct writeback_control *); @@ -442,6 +453,54 @@ extern void nfs_set_cache_invalid(struct inode *inode, unsigned long flags); extern bool nfs_check_cache_invalid(struct inode *, unsigned long); extern int nfs_wait_bit_killable(struct wait_bit_key *key, int mode); +#if IS_ENABLED(CONFIG_NFS_LOCALIO) +/* localio.c */ +extern void nfs_local_probe(struct nfs_client *); +extern void nfs_local_probe_async(struct nfs_client *); +extern void nfs_local_probe_async_work(struct work_struct *); +extern struct nfsd_file *nfs_local_open_fh(struct nfs_client *, + const struct cred *, + struct nfs_fh *, + struct nfs_file_localio *, + const fmode_t); +extern int nfs_local_doio(struct nfs_client *, + struct nfsd_file *, + struct nfs_pgio_header *, + const struct rpc_call_ops *); +extern int nfs_local_commit(struct nfsd_file *, + struct nfs_commit_data *, + const struct rpc_call_ops *, int); +extern bool nfs_server_is_local(const struct nfs_client *clp); + +#else /* CONFIG_NFS_LOCALIO */ +static inline void nfs_local_probe(struct nfs_client *clp) {} +static inline void nfs_local_probe_async(struct nfs_client *clp) {} +static inline struct nfsd_file * +nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred, + struct nfs_fh *fh, struct nfs_file_localio *nfl, + const fmode_t mode) +{ + return NULL; +} +static inline int nfs_local_doio(struct nfs_client *clp, + struct nfsd_file *localio, + struct nfs_pgio_header *hdr, + const struct rpc_call_ops *call_ops) +{ + return -EINVAL; +} +static inline int nfs_local_commit(struct nfsd_file *localio, + struct nfs_commit_data *data, + const struct rpc_call_ops *call_ops, int how) +{ + return -EINVAL; +} +static inline bool nfs_server_is_local(const struct nfs_client *clp) +{ + return false; +} +#endif /* CONFIG_NFS_LOCALIO */ + /* super.c */ extern const struct super_operations nfs_sops; bool nfs_auth_info_match(const struct nfs_auth_info *, rpc_authflavor_t); @@ -461,11 +520,11 @@ extern const struct netfs_request_ops nfs_netfs_ops; #endif /* io.c */ -extern void nfs_start_io_read(struct inode *inode); +extern __must_check int nfs_start_io_read(struct inode *inode); extern void nfs_end_io_read(struct inode *inode); -extern void nfs_start_io_write(struct inode *inode); +extern __must_check int nfs_start_io_write(struct inode *inode); extern void nfs_end_io_write(struct inode *inode); -extern void nfs_start_io_direct(struct inode *inode); +extern __must_check int nfs_start_io_direct(struct inode *inode); extern void nfs_end_io_direct(struct inode *inode); static inline bool nfs_file_io_is_buffered(struct nfs_inode *nfsi) @@ -498,7 +557,6 @@ extern int nfs_read_add_folio(struct nfs_pageio_descriptor *pgio, struct nfs_open_context *ctx, struct folio *folio); extern void nfs_pageio_complete_read(struct nfs_pageio_descriptor *pgio); -extern void nfs_read_prepare(struct rpc_task *task, void *calldata); extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); /* super.c */ @@ -521,7 +579,8 @@ extern int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, const struct nfs_rpc_ops *nfs_ops, const struct rpc_call_ops *call_ops, - int how, int flags); + int how, int flags, + struct nfsd_file *localio); extern void nfs_init_commit(struct nfs_commit_data *data, struct list_head *head, struct pnfs_layout_segment *lseg, @@ -710,9 +769,9 @@ unsigned long nfs_block_bits(unsigned long bsize, unsigned char *nrbitsp) if ((bsize & (bsize - 1)) || nrbitsp) { unsigned char nrbits; - for (nrbits = 31; nrbits && !(bsize & (1 << nrbits)); nrbits--) + for (nrbits = 31; nrbits && !(bsize & (1UL << nrbits)); nrbits--) ; - bsize = 1 << nrbits; + bsize = 1UL << nrbits; if (nrbitsp) *nrbitsp = nrbits; } @@ -778,7 +837,7 @@ static inline void nfs_folio_mark_unstable(struct folio *folio, struct nfs_commit_info *cinfo) { if (folio && !cinfo->dreq) { - struct inode *inode = folio_file_mapping(folio)->host; + struct inode *inode = folio->mapping->host; long nr = folio_nr_pages(folio); /* This page is really still in write-back - just that the @@ -793,31 +852,12 @@ static inline void nfs_folio_mark_unstable(struct folio *folio, /* * Determine the number of bytes of data the page contains */ -static inline -unsigned int nfs_page_length(struct page *page) -{ - loff_t i_size = i_size_read(page_file_mapping(page)->host); - - if (i_size > 0) { - pgoff_t index = page_index(page); - pgoff_t end_index = (i_size - 1) >> PAGE_SHIFT; - if (index < end_index) - return PAGE_SIZE; - if (index == end_index) - return ((i_size - 1) & ~PAGE_MASK) + 1; - } - return 0; -} - -/* - * Determine the number of bytes of data the page contains - */ static inline size_t nfs_folio_length(struct folio *folio) { - loff_t i_size = i_size_read(folio_file_mapping(folio)->host); + loff_t i_size = i_size_read(folio->mapping->host); if (i_size > 0) { - pgoff_t index = folio_index(folio) >> folio_order(folio); + pgoff_t index = folio->index >> folio_order(folio); pgoff_t end_index = (i_size - 1) >> folio_shift(folio); if (index < end_index) return folio_size(folio); @@ -859,18 +899,16 @@ u64 nfs_timespec_to_change_attr(const struct timespec64 *ts) return ((u64)ts->tv_sec << 30) + ts->tv_nsec; } -#ifdef CONFIG_CRC32 static inline u32 nfs_stateid_hash(const nfs4_stateid *stateid) { return ~crc32_le(0xFFFFFFFF, &stateid->other[0], NFS4_STATEID_OTHER_SIZE); } -#else -static inline u32 nfs_stateid_hash(nfs4_stateid *stateid) + +static inline bool nfs_current_task_exiting(void) { - return 0; + return (current->flags & PF_EXITING) != 0; } -#endif static inline bool nfs_error_is_fatal(int err) { diff --git a/fs/nfs/io.c b/fs/nfs/io.c index b5551ed8f648..3388faf2acb9 100644 --- a/fs/nfs/io.c +++ b/fs/nfs/io.c @@ -39,19 +39,28 @@ static void nfs_block_o_direct(struct nfs_inode *nfsi, struct inode *inode) * Note that buffered writes and truncates both take a write lock on * inode->i_rwsem, meaning that those are serialised w.r.t. the reads. */ -void +int nfs_start_io_read(struct inode *inode) { struct nfs_inode *nfsi = NFS_I(inode); + int err; + /* Be an optimist! */ - down_read(&inode->i_rwsem); + err = down_read_killable(&inode->i_rwsem); + if (err) + return err; if (test_bit(NFS_INO_ODIRECT, &nfsi->flags) == 0) - return; + return 0; up_read(&inode->i_rwsem); + /* Slow path.... */ - down_write(&inode->i_rwsem); + err = down_write_killable(&inode->i_rwsem); + if (err) + return err; nfs_block_o_direct(nfsi, inode); downgrade_write(&inode->i_rwsem); + + return 0; } /** @@ -74,11 +83,15 @@ nfs_end_io_read(struct inode *inode) * Declare that a buffered read operation is about to start, and ensure * that we block all direct I/O. */ -void +int nfs_start_io_write(struct inode *inode) { - down_write(&inode->i_rwsem); - nfs_block_o_direct(NFS_I(inode), inode); + int err; + + err = down_write_killable(&inode->i_rwsem); + if (!err) + nfs_block_o_direct(NFS_I(inode), inode); + return err; } /** @@ -119,19 +132,28 @@ static void nfs_block_buffered(struct nfs_inode *nfsi, struct inode *inode) * Note that buffered writes and truncates both take a write lock on * inode->i_rwsem, meaning that those are serialised w.r.t. O_DIRECT. */ -void +int nfs_start_io_direct(struct inode *inode) { struct nfs_inode *nfsi = NFS_I(inode); + int err; + /* Be an optimist! */ - down_read(&inode->i_rwsem); + err = down_read_killable(&inode->i_rwsem); + if (err) + return err; if (test_bit(NFS_INO_ODIRECT, &nfsi->flags) != 0) - return; + return 0; up_read(&inode->i_rwsem); + /* Slow path.... */ - down_write(&inode->i_rwsem); + err = down_write_killable(&inode->i_rwsem); + if (err) + return err; nfs_block_buffered(nfsi, inode); downgrade_write(&inode->i_rwsem); + + return 0; } /** diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h index 5aa776b5a3e7..49862c95b224 100644 --- a/fs/nfs/iostat.h +++ b/fs/nfs/iostat.h @@ -46,10 +46,11 @@ static inline void nfs_add_stats(const struct inode *inode, nfs_add_server_stats(NFS_SERVER(inode), stat, addend); } -static inline struct nfs_iostats __percpu *nfs_alloc_iostats(void) -{ - return alloc_percpu(struct nfs_iostats); -} +/* + * This specialized allocator has to be a macro for its allocations to be + * accounted separately (to have a separate alloc_tag). + */ +#define nfs_alloc_iostats() alloc_percpu(struct nfs_iostats) static inline void nfs_free_iostats(struct nfs_iostats __percpu *stats) { diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c new file mode 100644 index 000000000000..4ec952f9f47d --- /dev/null +++ b/fs/nfs/localio.c @@ -0,0 +1,867 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * NFS client support for local clients to bypass network stack + * + * Copyright (C) 2014 Weston Andros Adamson <dros@primarydata.com> + * Copyright (C) 2019 Trond Myklebust <trond.myklebust@hammerspace.com> + * Copyright (C) 2024 Mike Snitzer <snitzer@hammerspace.com> + * Copyright (C) 2024 NeilBrown <neilb@suse.de> + */ + +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/vfs.h> +#include <linux/file.h> +#include <linux/inet.h> +#include <linux/sunrpc/addr.h> +#include <linux/inetdevice.h> +#include <net/addrconf.h> +#include <linux/nfs_common.h> +#include <linux/nfslocalio.h> +#include <linux/bvec.h> + +#include <linux/nfs.h> +#include <linux/nfs_fs.h> +#include <linux/nfs_xdr.h> + +#include "internal.h" +#include "pnfs.h" +#include "nfstrace.h" + +#define NFSDBG_FACILITY NFSDBG_VFS + +struct nfs_local_kiocb { + struct kiocb kiocb; + struct bio_vec *bvec; + struct nfs_pgio_header *hdr; + struct work_struct work; + void (*aio_complete_work)(struct work_struct *); + struct nfsd_file *localio; +}; + +struct nfs_local_fsync_ctx { + struct nfsd_file *localio; + struct nfs_commit_data *data; + struct work_struct work; + struct completion *done; +}; + +static bool localio_enabled __read_mostly = true; +module_param(localio_enabled, bool, 0644); + +static bool localio_O_DIRECT_semantics __read_mostly = false; +module_param(localio_O_DIRECT_semantics, bool, 0644); +MODULE_PARM_DESC(localio_O_DIRECT_semantics, + "LOCALIO will use O_DIRECT semantics to filesystem."); + +static inline bool nfs_client_is_local(const struct nfs_client *clp) +{ + return !!rcu_access_pointer(clp->cl_uuid.net); +} + +bool nfs_server_is_local(const struct nfs_client *clp) +{ + return nfs_client_is_local(clp) && localio_enabled; +} +EXPORT_SYMBOL_GPL(nfs_server_is_local); + +/* + * UUID_IS_LOCAL XDR functions + */ + +static void localio_xdr_enc_uuidargs(struct rpc_rqst *req, + struct xdr_stream *xdr, + const void *data) +{ + const u8 *uuid = data; + + encode_opaque_fixed(xdr, uuid, UUID_SIZE); +} + +static int localio_xdr_dec_uuidres(struct rpc_rqst *req, + struct xdr_stream *xdr, + void *result) +{ + /* void return */ + return 0; +} + +static const struct rpc_procinfo nfs_localio_procedures[] = { + [LOCALIOPROC_UUID_IS_LOCAL] = { + .p_proc = LOCALIOPROC_UUID_IS_LOCAL, + .p_encode = localio_xdr_enc_uuidargs, + .p_decode = localio_xdr_dec_uuidres, + .p_arglen = XDR_QUADLEN(UUID_SIZE), + .p_replen = 0, + .p_statidx = LOCALIOPROC_UUID_IS_LOCAL, + .p_name = "UUID_IS_LOCAL", + }, +}; + +static unsigned int nfs_localio_counts[ARRAY_SIZE(nfs_localio_procedures)]; +static const struct rpc_version nfslocalio_version1 = { + .number = 1, + .nrprocs = ARRAY_SIZE(nfs_localio_procedures), + .procs = nfs_localio_procedures, + .counts = nfs_localio_counts, +}; + +static const struct rpc_version *nfslocalio_version[] = { + [1] = &nfslocalio_version1, +}; + +extern const struct rpc_program nfslocalio_program; +static struct rpc_stat nfslocalio_rpcstat = { &nfslocalio_program }; + +const struct rpc_program nfslocalio_program = { + .name = "nfslocalio", + .number = NFS_LOCALIO_PROGRAM, + .nrvers = ARRAY_SIZE(nfslocalio_version), + .version = nfslocalio_version, + .stats = &nfslocalio_rpcstat, +}; + +/* + * nfs_init_localioclient - Initialise an NFS localio client connection + */ +static struct rpc_clnt *nfs_init_localioclient(struct nfs_client *clp) +{ + struct rpc_clnt *rpcclient_localio; + + rpcclient_localio = rpc_bind_new_program(clp->cl_rpcclient, + &nfslocalio_program, 1); + + dprintk_rcu("%s: server (%s) %s NFS LOCALIO.\n", + __func__, rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR), + (IS_ERR(rpcclient_localio) ? "does not support" : "supports")); + + return rpcclient_localio; +} + +static bool nfs_server_uuid_is_local(struct nfs_client *clp) +{ + u8 uuid[UUID_SIZE]; + struct rpc_message msg = { + .rpc_argp = &uuid, + }; + struct rpc_clnt *rpcclient_localio; + int status; + + rpcclient_localio = nfs_init_localioclient(clp); + if (IS_ERR(rpcclient_localio)) + return false; + + export_uuid(uuid, &clp->cl_uuid.uuid); + + msg.rpc_proc = &nfs_localio_procedures[LOCALIOPROC_UUID_IS_LOCAL]; + status = rpc_call_sync(rpcclient_localio, &msg, 0); + dprintk("%s: NFS reply UUID_IS_LOCAL: status=%d\n", + __func__, status); + rpc_shutdown_client(rpcclient_localio); + + /* Server is only local if it initialized required struct members */ + if (status || !rcu_access_pointer(clp->cl_uuid.net) || !clp->cl_uuid.dom) + return false; + + return true; +} + +/* + * nfs_local_probe - probe local i/o support for an nfs_server and nfs_client + * - called after alloc_client and init_client (so cl_rpcclient exists) + * - this function is idempotent, it can be called for old or new clients + */ +void nfs_local_probe(struct nfs_client *clp) +{ + /* Disallow localio if disabled via sysfs or AUTH_SYS isn't used */ + if (!localio_enabled || + clp->cl_rpcclient->cl_auth->au_flavor != RPC_AUTH_UNIX) { + nfs_localio_disable_client(clp); + return; + } + + if (nfs_client_is_local(clp)) { + /* If already enabled, disable and re-enable */ + nfs_localio_disable_client(clp); + } + + if (!nfs_uuid_begin(&clp->cl_uuid)) + return; + if (nfs_server_uuid_is_local(clp)) + nfs_localio_enable_client(clp); + nfs_uuid_end(&clp->cl_uuid); +} +EXPORT_SYMBOL_GPL(nfs_local_probe); + +void nfs_local_probe_async_work(struct work_struct *work) +{ + struct nfs_client *clp = + container_of(work, struct nfs_client, cl_local_probe_work); + + nfs_local_probe(clp); +} + +void nfs_local_probe_async(struct nfs_client *clp) +{ + queue_work(nfsiod_workqueue, &clp->cl_local_probe_work); +} +EXPORT_SYMBOL_GPL(nfs_local_probe_async); + +static inline struct nfsd_file *nfs_local_file_get(struct nfsd_file *nf) +{ + return nfs_to->nfsd_file_get(nf); +} + +static inline void nfs_local_file_put(struct nfsd_file *nf) +{ + nfs_to->nfsd_file_put(nf); +} + +/* + * __nfs_local_open_fh - open a local filehandle in terms of nfsd_file. + * + * Returns a pointer to a struct nfsd_file or ERR_PTR. + * Caller must release returned nfsd_file with nfs_to_nfsd_file_put_local(). + */ +static struct nfsd_file * +__nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred, + struct nfs_fh *fh, struct nfs_file_localio *nfl, + const fmode_t mode) +{ + struct nfsd_file *localio; + + localio = nfs_open_local_fh(&clp->cl_uuid, clp->cl_rpcclient, + cred, fh, nfl, mode); + if (IS_ERR(localio)) { + int status = PTR_ERR(localio); + trace_nfs_local_open_fh(fh, mode, status); + switch (status) { + case -ENOMEM: + case -ENXIO: + case -ENOENT: + /* Revalidate localio, will disable if unsupported */ + nfs_local_probe(clp); + } + } + return localio; +} + +/* + * nfs_local_open_fh - open a local filehandle in terms of nfsd_file. + * First checking if the open nfsd_file is already cached, otherwise + * must __nfs_local_open_fh and insert the nfsd_file in nfs_file_localio. + * + * Returns a pointer to a struct nfsd_file or NULL. + */ +struct nfsd_file * +nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred, + struct nfs_fh *fh, struct nfs_file_localio *nfl, + const fmode_t mode) +{ + struct nfsd_file *nf, *new, __rcu **pnf; + + if (!nfs_server_is_local(clp)) + return NULL; + if (mode & ~(FMODE_READ | FMODE_WRITE)) + return NULL; + + if (mode & FMODE_WRITE) + pnf = &nfl->rw_file; + else + pnf = &nfl->ro_file; + + new = NULL; + rcu_read_lock(); + nf = rcu_dereference(*pnf); + if (!nf) { + rcu_read_unlock(); + new = __nfs_local_open_fh(clp, cred, fh, nfl, mode); + if (IS_ERR(new)) + return NULL; + rcu_read_lock(); + /* try to swap in the pointer */ + spin_lock(&clp->cl_uuid.lock); + nf = rcu_dereference_protected(*pnf, 1); + if (!nf) { + nf = new; + new = NULL; + rcu_assign_pointer(*pnf, nf); + } + spin_unlock(&clp->cl_uuid.lock); + } + nf = nfs_local_file_get(nf); + rcu_read_unlock(); + if (new) + nfs_to_nfsd_file_put_local(new); + return nf; +} +EXPORT_SYMBOL_GPL(nfs_local_open_fh); + +static struct bio_vec * +nfs_bvec_alloc_and_import_pagevec(struct page **pagevec, + unsigned int npages, gfp_t flags) +{ + struct bio_vec *bvec, *p; + + bvec = kmalloc_array(npages, sizeof(*bvec), flags); + if (bvec != NULL) { + for (p = bvec; npages > 0; p++, pagevec++, npages--) { + p->bv_page = *pagevec; + p->bv_len = PAGE_SIZE; + p->bv_offset = 0; + } + } + return bvec; +} + +static void +nfs_local_iocb_free(struct nfs_local_kiocb *iocb) +{ + kfree(iocb->bvec); + kfree(iocb); +} + +static struct nfs_local_kiocb * +nfs_local_iocb_alloc(struct nfs_pgio_header *hdr, + struct file *file, gfp_t flags) +{ + struct nfs_local_kiocb *iocb; + + iocb = kmalloc(sizeof(*iocb), flags); + if (iocb == NULL) + return NULL; + iocb->bvec = nfs_bvec_alloc_and_import_pagevec(hdr->page_array.pagevec, + hdr->page_array.npages, flags); + if (iocb->bvec == NULL) { + kfree(iocb); + return NULL; + } + + if (localio_O_DIRECT_semantics && + test_bit(NFS_IOHDR_ODIRECT, &hdr->flags)) { + iocb->kiocb.ki_filp = file; + iocb->kiocb.ki_flags = IOCB_DIRECT; + } else + init_sync_kiocb(&iocb->kiocb, file); + + iocb->kiocb.ki_pos = hdr->args.offset; + iocb->hdr = hdr; + iocb->kiocb.ki_flags &= ~IOCB_APPEND; + iocb->aio_complete_work = NULL; + + return iocb; +} + +static void +nfs_local_iter_init(struct iov_iter *i, struct nfs_local_kiocb *iocb, int dir) +{ + struct nfs_pgio_header *hdr = iocb->hdr; + + iov_iter_bvec(i, dir, iocb->bvec, hdr->page_array.npages, + hdr->args.count + hdr->args.pgbase); + if (hdr->args.pgbase != 0) + iov_iter_advance(i, hdr->args.pgbase); +} + +static void +nfs_local_hdr_release(struct nfs_pgio_header *hdr, + const struct rpc_call_ops *call_ops) +{ + call_ops->rpc_call_done(&hdr->task, hdr); + call_ops->rpc_release(hdr); +} + +static void +nfs_local_pgio_init(struct nfs_pgio_header *hdr, + const struct rpc_call_ops *call_ops) +{ + hdr->task.tk_ops = call_ops; + if (!hdr->task.tk_start) + hdr->task.tk_start = ktime_get(); +} + +static void +nfs_local_pgio_done(struct nfs_pgio_header *hdr, long status) +{ + if (status >= 0) { + hdr->res.count = status; + hdr->res.op_status = NFS4_OK; + hdr->task.tk_status = 0; + } else { + hdr->res.op_status = nfs_localio_errno_to_nfs4_stat(status); + hdr->task.tk_status = status; + } +} + +static void +nfs_local_pgio_release(struct nfs_local_kiocb *iocb) +{ + struct nfs_pgio_header *hdr = iocb->hdr; + + nfs_local_file_put(iocb->localio); + nfs_local_iocb_free(iocb); + nfs_local_hdr_release(hdr, hdr->task.tk_ops); +} + +/* + * Complete the I/O from iocb->kiocb.ki_complete() + * + * Note that this function can be called from a bottom half context, + * hence we need to queue the rpc_call_done() etc to a workqueue + */ +static inline void nfs_local_pgio_aio_complete(struct nfs_local_kiocb *iocb) +{ + INIT_WORK(&iocb->work, iocb->aio_complete_work); + queue_work(nfsiod_workqueue, &iocb->work); +} + +static void +nfs_local_read_done(struct nfs_local_kiocb *iocb, long status) +{ + struct nfs_pgio_header *hdr = iocb->hdr; + struct file *filp = iocb->kiocb.ki_filp; + + nfs_local_pgio_done(hdr, status); + + /* + * Must clear replen otherwise NFSv3 data corruption will occur + * if/when switching from LOCALIO back to using normal RPC. + */ + hdr->res.replen = 0; + + if (hdr->res.count != hdr->args.count || + hdr->args.offset + hdr->res.count >= i_size_read(file_inode(filp))) + hdr->res.eof = true; + + dprintk("%s: read %ld bytes eof %d.\n", __func__, + status > 0 ? status : 0, hdr->res.eof); +} + +static void nfs_local_read_aio_complete_work(struct work_struct *work) +{ + struct nfs_local_kiocb *iocb = + container_of(work, struct nfs_local_kiocb, work); + + nfs_local_pgio_release(iocb); +} + +static void nfs_local_read_aio_complete(struct kiocb *kiocb, long ret) +{ + struct nfs_local_kiocb *iocb = + container_of(kiocb, struct nfs_local_kiocb, kiocb); + + nfs_local_read_done(iocb, ret); + nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_read_aio_complete_work */ +} + +static void nfs_local_call_read(struct work_struct *work) +{ + struct nfs_local_kiocb *iocb = + container_of(work, struct nfs_local_kiocb, work); + struct file *filp = iocb->kiocb.ki_filp; + const struct cred *save_cred; + struct iov_iter iter; + ssize_t status; + + save_cred = override_creds(filp->f_cred); + + nfs_local_iter_init(&iter, iocb, READ); + + status = filp->f_op->read_iter(&iocb->kiocb, &iter); + if (status != -EIOCBQUEUED) { + nfs_local_read_done(iocb, status); + nfs_local_pgio_release(iocb); + } + + revert_creds(save_cred); +} + +static int +nfs_do_local_read(struct nfs_pgio_header *hdr, + struct nfsd_file *localio, + const struct rpc_call_ops *call_ops) +{ + struct nfs_local_kiocb *iocb; + struct file *file = nfs_to->nfsd_file_file(localio); + + /* Don't support filesystems without read_iter */ + if (!file->f_op->read_iter) + return -EAGAIN; + + dprintk("%s: vfs_read count=%u pos=%llu\n", + __func__, hdr->args.count, hdr->args.offset); + + iocb = nfs_local_iocb_alloc(hdr, file, GFP_KERNEL); + if (iocb == NULL) + return -ENOMEM; + iocb->localio = localio; + + nfs_local_pgio_init(hdr, call_ops); + hdr->res.eof = false; + + if (iocb->kiocb.ki_flags & IOCB_DIRECT) { + iocb->kiocb.ki_complete = nfs_local_read_aio_complete; + iocb->aio_complete_work = nfs_local_read_aio_complete_work; + } + + INIT_WORK(&iocb->work, nfs_local_call_read); + queue_work(nfslocaliod_workqueue, &iocb->work); + + return 0; +} + +static void +nfs_copy_boot_verifier(struct nfs_write_verifier *verifier, struct inode *inode) +{ + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; + u32 *verf = (u32 *)verifier->data; + int seq = 0; + + do { + read_seqbegin_or_lock(&clp->cl_boot_lock, &seq); + verf[0] = (u32)clp->cl_nfssvc_boot.tv_sec; + verf[1] = (u32)clp->cl_nfssvc_boot.tv_nsec; + } while (need_seqretry(&clp->cl_boot_lock, seq)); + done_seqretry(&clp->cl_boot_lock, seq); +} + +static void +nfs_reset_boot_verifier(struct inode *inode) +{ + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; + + write_seqlock(&clp->cl_boot_lock); + ktime_get_real_ts64(&clp->cl_nfssvc_boot); + write_sequnlock(&clp->cl_boot_lock); +} + +static void +nfs_set_local_verifier(struct inode *inode, + struct nfs_writeverf *verf, + enum nfs3_stable_how how) +{ + nfs_copy_boot_verifier(&verf->verifier, inode); + verf->committed = how; +} + +/* Factored out from fs/nfsd/vfs.h:fh_getattr() */ +static int __vfs_getattr(struct path *p, struct kstat *stat, int version) +{ + u32 request_mask = STATX_BASIC_STATS; + + if (version == 4) + request_mask |= (STATX_BTIME | STATX_CHANGE_COOKIE); + return vfs_getattr(p, stat, request_mask, AT_STATX_SYNC_AS_STAT); +} + +/* Copied from fs/nfsd/nfsfh.c:nfsd4_change_attribute() */ +static u64 __nfsd4_change_attribute(const struct kstat *stat, + const struct inode *inode) +{ + u64 chattr; + + if (stat->result_mask & STATX_CHANGE_COOKIE) { + chattr = stat->change_cookie; + if (S_ISREG(inode->i_mode) && + !(stat->attributes & STATX_ATTR_CHANGE_MONOTONIC)) { + chattr += (u64)stat->ctime.tv_sec << 30; + chattr += stat->ctime.tv_nsec; + } + } else { + chattr = time_to_chattr(&stat->ctime); + } + return chattr; +} + +static void nfs_local_vfs_getattr(struct nfs_local_kiocb *iocb) +{ + struct kstat stat; + struct file *filp = iocb->kiocb.ki_filp; + struct nfs_pgio_header *hdr = iocb->hdr; + struct nfs_fattr *fattr = hdr->res.fattr; + int version = NFS_PROTO(hdr->inode)->version; + + if (unlikely(!fattr) || __vfs_getattr(&filp->f_path, &stat, version)) + return; + + fattr->valid = (NFS_ATTR_FATTR_FILEID | + NFS_ATTR_FATTR_CHANGE | + NFS_ATTR_FATTR_SIZE | + NFS_ATTR_FATTR_ATIME | + NFS_ATTR_FATTR_MTIME | + NFS_ATTR_FATTR_CTIME | + NFS_ATTR_FATTR_SPACE_USED); + + fattr->fileid = stat.ino; + fattr->size = stat.size; + fattr->atime = stat.atime; + fattr->mtime = stat.mtime; + fattr->ctime = stat.ctime; + if (version == 4) { + fattr->change_attr = + __nfsd4_change_attribute(&stat, file_inode(filp)); + } else + fattr->change_attr = nfs_timespec_to_change_attr(&fattr->ctime); + fattr->du.nfs3.used = stat.blocks << 9; +} + +static void +nfs_local_write_done(struct nfs_local_kiocb *iocb, long status) +{ + struct nfs_pgio_header *hdr = iocb->hdr; + struct inode *inode = hdr->inode; + + dprintk("%s: wrote %ld bytes.\n", __func__, status > 0 ? status : 0); + + /* Handle short writes as if they are ENOSPC */ + if (status > 0 && status < hdr->args.count) { + hdr->mds_offset += status; + hdr->args.offset += status; + hdr->args.pgbase += status; + hdr->args.count -= status; + nfs_set_pgio_error(hdr, -ENOSPC, hdr->args.offset); + status = -ENOSPC; + } + if (status < 0) + nfs_reset_boot_verifier(inode); + + nfs_local_pgio_done(hdr, status); +} + +static void nfs_local_write_aio_complete_work(struct work_struct *work) +{ + struct nfs_local_kiocb *iocb = + container_of(work, struct nfs_local_kiocb, work); + + nfs_local_vfs_getattr(iocb); + nfs_local_pgio_release(iocb); +} + +static void nfs_local_write_aio_complete(struct kiocb *kiocb, long ret) +{ + struct nfs_local_kiocb *iocb = + container_of(kiocb, struct nfs_local_kiocb, kiocb); + + nfs_local_write_done(iocb, ret); + nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_write_aio_complete_work */ +} + +static void nfs_local_call_write(struct work_struct *work) +{ + struct nfs_local_kiocb *iocb = + container_of(work, struct nfs_local_kiocb, work); + struct file *filp = iocb->kiocb.ki_filp; + unsigned long old_flags = current->flags; + const struct cred *save_cred; + struct iov_iter iter; + ssize_t status; + + current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO; + save_cred = override_creds(filp->f_cred); + + nfs_local_iter_init(&iter, iocb, WRITE); + + file_start_write(filp); + status = filp->f_op->write_iter(&iocb->kiocb, &iter); + file_end_write(filp); + if (status != -EIOCBQUEUED) { + nfs_local_write_done(iocb, status); + nfs_local_vfs_getattr(iocb); + nfs_local_pgio_release(iocb); + } + + revert_creds(save_cred); + current->flags = old_flags; +} + +static int +nfs_do_local_write(struct nfs_pgio_header *hdr, + struct nfsd_file *localio, + const struct rpc_call_ops *call_ops) +{ + struct nfs_local_kiocb *iocb; + struct file *file = nfs_to->nfsd_file_file(localio); + + /* Don't support filesystems without write_iter */ + if (!file->f_op->write_iter) + return -EAGAIN; + + dprintk("%s: vfs_write count=%u pos=%llu %s\n", + __func__, hdr->args.count, hdr->args.offset, + (hdr->args.stable == NFS_UNSTABLE) ? "unstable" : "stable"); + + iocb = nfs_local_iocb_alloc(hdr, file, GFP_NOIO); + if (iocb == NULL) + return -ENOMEM; + iocb->localio = localio; + + switch (hdr->args.stable) { + default: + break; + case NFS_DATA_SYNC: + iocb->kiocb.ki_flags |= IOCB_DSYNC; + break; + case NFS_FILE_SYNC: + iocb->kiocb.ki_flags |= IOCB_DSYNC|IOCB_SYNC; + } + + nfs_local_pgio_init(hdr, call_ops); + + nfs_set_local_verifier(hdr->inode, hdr->res.verf, hdr->args.stable); + + if (iocb->kiocb.ki_flags & IOCB_DIRECT) { + iocb->kiocb.ki_complete = nfs_local_write_aio_complete; + iocb->aio_complete_work = nfs_local_write_aio_complete_work; + } + + INIT_WORK(&iocb->work, nfs_local_call_write); + queue_work(nfslocaliod_workqueue, &iocb->work); + + return 0; +} + +int nfs_local_doio(struct nfs_client *clp, struct nfsd_file *localio, + struct nfs_pgio_header *hdr, + const struct rpc_call_ops *call_ops) +{ + int status = 0; + + if (!hdr->args.count) + return 0; + + switch (hdr->rw_mode) { + case FMODE_READ: + status = nfs_do_local_read(hdr, localio, call_ops); + break; + case FMODE_WRITE: + status = nfs_do_local_write(hdr, localio, call_ops); + break; + default: + dprintk("%s: invalid mode: %d\n", __func__, + hdr->rw_mode); + status = -EINVAL; + } + + if (status != 0) { + if (status == -EAGAIN) + nfs_localio_disable_client(clp); + nfs_local_file_put(localio); + hdr->task.tk_status = status; + nfs_local_hdr_release(hdr, call_ops); + } + return status; +} + +static void +nfs_local_init_commit(struct nfs_commit_data *data, + const struct rpc_call_ops *call_ops) +{ + data->task.tk_ops = call_ops; +} + +static int +nfs_local_run_commit(struct file *filp, struct nfs_commit_data *data) +{ + loff_t start = data->args.offset; + loff_t end = LLONG_MAX; + + if (data->args.count > 0) { + end = start + data->args.count - 1; + if (end < start) + end = LLONG_MAX; + } + + dprintk("%s: commit %llu - %llu\n", __func__, start, end); + return vfs_fsync_range(filp, start, end, 0); +} + +static void +nfs_local_commit_done(struct nfs_commit_data *data, int status) +{ + if (status >= 0) { + nfs_set_local_verifier(data->inode, + data->res.verf, + NFS_FILE_SYNC); + data->res.op_status = NFS4_OK; + data->task.tk_status = 0; + } else { + nfs_reset_boot_verifier(data->inode); + data->res.op_status = nfs_localio_errno_to_nfs4_stat(status); + data->task.tk_status = status; + } +} + +static void +nfs_local_release_commit_data(struct nfsd_file *localio, + struct nfs_commit_data *data, + const struct rpc_call_ops *call_ops) +{ + nfs_local_file_put(localio); + call_ops->rpc_call_done(&data->task, data); + call_ops->rpc_release(data); +} + +static void +nfs_local_fsync_ctx_free(struct nfs_local_fsync_ctx *ctx) +{ + nfs_local_release_commit_data(ctx->localio, ctx->data, + ctx->data->task.tk_ops); + kfree(ctx); +} + +static void +nfs_local_fsync_work(struct work_struct *work) +{ + struct nfs_local_fsync_ctx *ctx; + int status; + + ctx = container_of(work, struct nfs_local_fsync_ctx, work); + + status = nfs_local_run_commit(nfs_to->nfsd_file_file(ctx->localio), + ctx->data); + nfs_local_commit_done(ctx->data, status); + if (ctx->done != NULL) + complete(ctx->done); + nfs_local_fsync_ctx_free(ctx); +} + +static struct nfs_local_fsync_ctx * +nfs_local_fsync_ctx_alloc(struct nfs_commit_data *data, + struct nfsd_file *localio, gfp_t flags) +{ + struct nfs_local_fsync_ctx *ctx = kmalloc(sizeof(*ctx), flags); + + if (ctx != NULL) { + ctx->localio = localio; + ctx->data = data; + INIT_WORK(&ctx->work, nfs_local_fsync_work); + ctx->done = NULL; + } + return ctx; +} + +int nfs_local_commit(struct nfsd_file *localio, + struct nfs_commit_data *data, + const struct rpc_call_ops *call_ops, int how) +{ + struct nfs_local_fsync_ctx *ctx; + + ctx = nfs_local_fsync_ctx_alloc(data, localio, GFP_KERNEL); + if (!ctx) { + nfs_local_commit_done(data, -ENOMEM); + nfs_local_release_commit_data(localio, data, call_ops); + return -ENOMEM; + } + + nfs_local_init_commit(data, call_ops); + + if (how & FLUSH_SYNC) { + DECLARE_COMPLETION_ONSTACK(done); + ctx->done = &done; + queue_work(nfsiod_workqueue, &ctx->work); + wait_for_completion(&done); + } else + queue_work(nfsiod_workqueue, &ctx->work); + + return 0; +} diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 68e76b626371..57c9dd700b58 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -128,11 +128,6 @@ struct mountres { rpc_authflavor_t *auth_flavors; }; -struct mnt_fhstatus { - u32 status; - struct nfs_fh *fh; -}; - /** * nfs_mount - Obtain an NFS file handle for the given host and path * @info: pointer to mount request arguments diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index e7494cdd957e..7f1ec9c67ff2 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -182,7 +182,7 @@ struct vfsmount *nfs_d_automount(struct path *path) ctx->version = client->rpc_ops->version; ctx->minorversion = client->cl_minorversion; ctx->nfs_mod = client->cl_nfs_mod; - __module_get(ctx->nfs_mod->owner); + get_nfs_version(ctx->nfs_mod); ret = client->rpc_ops->submount(fc, server); if (ret < 0) { @@ -195,7 +195,6 @@ struct vfsmount *nfs_d_automount(struct path *path) if (IS_ERR(mnt)) goto out_fc; - mntget(mnt); /* prevent immediate expiration */ if (timeout <= 0) goto out_fc; @@ -308,7 +307,7 @@ int nfs_submount(struct fs_context *fc, struct nfs_server *server) int err; /* Look it up again to get its attributes */ - err = server->nfs_client->rpc_ops->lookup(d_inode(parent), dentry, + err = server->nfs_client->rpc_ops->lookup(d_inode(parent), dentry, &dentry->d_name, ctx->mntfh, ctx->clone_data.fattr); dput(parent); if (err != 0) diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h index a68b21603ea9..6ba3ea39e928 100644 --- a/fs/nfs/netns.h +++ b/fs/nfs/netns.h @@ -31,7 +31,11 @@ struct nfs_net { unsigned short nfs_callback_tcpport; unsigned short nfs_callback_tcpport6; int cb_users[NFS4_MAX_MINOR_VERSION + 1]; -#endif +#endif /* CONFIG_NFS_V4 */ +#if IS_ENABLED(CONFIG_NFS_V4_1) + struct list_head nfs4_data_server_cache; + spinlock_t nfs4_data_server_lock; +#endif /* CONFIG_NFS_V4_1 */ struct nfs_netns_client *nfs_client; spinlock_t nfs_client_lock; ktime_t boot_time; diff --git a/fs/nfs/nfs.h b/fs/nfs/nfs.h index 0d3ce0460e35..8a5f51be013a 100644 --- a/fs/nfs/nfs.h +++ b/fs/nfs/nfs.h @@ -19,10 +19,10 @@ struct nfs_subversion { const struct nfs_rpc_ops *rpc_ops; /* NFS operations */ const struct super_operations *sops; /* NFS Super operations */ const struct xattr_handler * const *xattr; /* NFS xattr handlers */ - struct list_head list; /* List of NFS versions */ }; -struct nfs_subversion *get_nfs_version(unsigned int); +struct nfs_subversion *find_nfs_version(unsigned int); +int get_nfs_version(struct nfs_subversion *); void put_nfs_version(struct nfs_subversion *); void register_nfs_version(struct nfs_subversion *); void unregister_nfs_version(struct nfs_subversion *); diff --git a/fs/nfs/nfs2super.c b/fs/nfs/nfs2super.c index 467f21ee6a35..b1badc70bd71 100644 --- a/fs/nfs/nfs2super.c +++ b/fs/nfs/nfs2super.c @@ -26,6 +26,7 @@ static void __exit exit_nfs_v2(void) unregister_nfs_version(&nfs_v2); } +MODULE_DESCRIPTION("NFSv2 client support"); MODULE_LICENSE("GPL"); module_init(init_nfs_v2); diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index c19093814296..6e75c6c2d234 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -22,14 +22,12 @@ #include <linux/nfs.h> #include <linux/nfs2.h> #include <linux/nfs_fs.h> +#include <linux/nfs_common.h> #include "nfstrace.h" #include "internal.h" #define NFSDBG_FACILITY NFSDBG_XDR -/* Mapping from NFS error code to "errno" error code. */ -#define errno_NFSERR_IO EIO - /* * Declare the space requirements for NFS arguments and replies as * number of 32bit-words @@ -64,8 +62,6 @@ #define NFS_readdirres_sz (1+NFS_pagepad_sz) #define NFS_statfsres_sz (1+NFS_info_sz) -static int nfs_stat_to_errno(enum nfs_stat); - /* * Encode/decode NFSv2 basic data types * @@ -1054,70 +1050,6 @@ out_default: return nfs_stat_to_errno(status); } - -/* - * We need to translate between nfs status return values and - * the local errno values which may not be the same. - */ -static const struct { - int stat; - int errno; -} nfs_errtbl[] = { - { NFS_OK, 0 }, - { NFSERR_PERM, -EPERM }, - { NFSERR_NOENT, -ENOENT }, - { NFSERR_IO, -errno_NFSERR_IO}, - { NFSERR_NXIO, -ENXIO }, -/* { NFSERR_EAGAIN, -EAGAIN }, */ - { NFSERR_ACCES, -EACCES }, - { NFSERR_EXIST, -EEXIST }, - { NFSERR_XDEV, -EXDEV }, - { NFSERR_NODEV, -ENODEV }, - { NFSERR_NOTDIR, -ENOTDIR }, - { NFSERR_ISDIR, -EISDIR }, - { NFSERR_INVAL, -EINVAL }, - { NFSERR_FBIG, -EFBIG }, - { NFSERR_NOSPC, -ENOSPC }, - { NFSERR_ROFS, -EROFS }, - { NFSERR_MLINK, -EMLINK }, - { NFSERR_NAMETOOLONG, -ENAMETOOLONG }, - { NFSERR_NOTEMPTY, -ENOTEMPTY }, - { NFSERR_DQUOT, -EDQUOT }, - { NFSERR_STALE, -ESTALE }, - { NFSERR_REMOTE, -EREMOTE }, -#ifdef EWFLUSH - { NFSERR_WFLUSH, -EWFLUSH }, -#endif - { NFSERR_BADHANDLE, -EBADHANDLE }, - { NFSERR_NOT_SYNC, -ENOTSYNC }, - { NFSERR_BAD_COOKIE, -EBADCOOKIE }, - { NFSERR_NOTSUPP, -ENOTSUPP }, - { NFSERR_TOOSMALL, -ETOOSMALL }, - { NFSERR_SERVERFAULT, -EREMOTEIO }, - { NFSERR_BADTYPE, -EBADTYPE }, - { NFSERR_JUKEBOX, -EJUKEBOX }, - { -1, -EIO } -}; - -/** - * nfs_stat_to_errno - convert an NFS status code to a local errno - * @status: NFS status code to convert - * - * Returns a local errno value, or -EIO if the NFS status code is - * not recognized. This function is used jointly by NFSv2 and NFSv3. - */ -static int nfs_stat_to_errno(enum nfs_stat status) -{ - int i; - - for (i = 0; nfs_errtbl[i].stat != -1; i++) { - if (nfs_errtbl[i].stat == (int)status) - return nfs_errtbl[i].errno; - } - dprintk("NFS: Unrecognized nfs status value: %u\n", status); - return nfs_errtbl[i].errno; -} - #define PROC(proc, argtype, restype, timer) \ [NFSPROC_##proc] = { \ .p_proc = NFSPROC_##proc, \ diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 18d8f6529f61..a126eb31f62f 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -104,7 +104,7 @@ struct posix_acl *nfs3_get_acl(struct inode *inode, int type, bool rcu) switch (status) { case 0: - status = nfs_refresh_inode(inode, res.fattr); + nfs_refresh_inode(inode, res.fattr); break; case -EPFNOSUPPORT: case -EPROTONOSUPPORT: diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c index b0c8a39c2bbd..0d7310c1ee0c 100644 --- a/fs/nfs/nfs3client.c +++ b/fs/nfs/nfs3client.c @@ -120,6 +120,8 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv, if (mds_srv->flags & NFS_MOUNT_NORESVPORT) __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); + if (test_bit(NFS_CS_NETUNREACH_FATAL, &mds_clp->cl_flags)) + __set_bit(NFS_CS_NETUNREACH_FATAL, &cl_init.init_flags); __set_bit(NFS_CS_DS, &cl_init.init_flags); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index cbbe3f0193b8..a4cb67573aa7 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -39,7 +39,7 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) __set_current_state(TASK_KILLABLE|TASK_FREEZABLE_UNSAFE); schedule_timeout(NFS_JUKEBOX_RETRY_TIME); res = -ERESTARTSYS; - } while (!fatal_signal_pending(current)); + } while (!fatal_signal_pending(current) && !nfs_current_task_exiting()); return res; } @@ -192,7 +192,7 @@ __nfs3_proc_lookup(struct inode *dir, const char *name, size_t len, } static int -nfs3_proc_lookup(struct inode *dir, struct dentry *dentry, +nfs3_proc_lookup(struct inode *dir, struct dentry *dentry, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { unsigned short task_flags = 0; @@ -202,8 +202,7 @@ nfs3_proc_lookup(struct inode *dir, struct dentry *dentry, task_flags |= RPC_TASK_TIMEOUT; dprintk("NFS call lookup %pd2\n", dentry); - return __nfs3_proc_lookup(dir, dentry->d_name.name, - dentry->d_name.len, fhandle, fattr, + return __nfs3_proc_lookup(dir, name->name, name->len, fhandle, fattr, task_flags); } @@ -579,13 +578,13 @@ out: return status; } -static int +static struct dentry * nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) { struct posix_acl *default_acl, *acl; struct nfs3_createdata *data; - struct dentry *d_alias; - int status = -ENOMEM; + struct dentry *ret = ERR_PTR(-ENOMEM); + int status; dprintk("NFS call mkdir %pd\n", dentry); @@ -593,8 +592,9 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) if (data == NULL) goto out; - status = posix_acl_create(dir, &sattr->ia_mode, &default_acl, &acl); - if (status) + ret = ERR_PTR(posix_acl_create(dir, &sattr->ia_mode, + &default_acl, &acl)); + if (IS_ERR(ret)) goto out; data->msg.rpc_proc = &nfs3_procedures[NFS3PROC_MKDIR]; @@ -603,25 +603,27 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) data->arg.mkdir.len = dentry->d_name.len; data->arg.mkdir.sattr = sattr; - d_alias = nfs3_do_create(dir, dentry, data); - status = PTR_ERR_OR_ZERO(d_alias); + ret = nfs3_do_create(dir, dentry, data); - if (status != 0) + if (IS_ERR(ret)) goto out_release_acls; - if (d_alias) - dentry = d_alias; + if (ret) + dentry = ret; status = nfs3_proc_setacls(d_inode(dentry), acl, default_acl); + if (status) { + dput(ret); + ret = ERR_PTR(status); + } - dput(d_alias); out_release_acls: posix_acl_release(acl); posix_acl_release(default_acl); out: nfs3_free_createdata(data); - dprintk("NFS reply mkdir: %d\n", status); - return status; + dprintk("NFS reply mkdir: %d\n", PTR_ERR_OR_ZERO(ret)); + return ret; } static int @@ -844,6 +846,41 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, return status; } +#if IS_ENABLED(CONFIG_NFS_LOCALIO) + +static unsigned nfs3_localio_probe_throttle __read_mostly = 0; +module_param(nfs3_localio_probe_throttle, uint, 0644); +MODULE_PARM_DESC(nfs3_localio_probe_throttle, + "Probe for NFSv3 LOCALIO every N IO requests. Must be power-of-2, defaults to 0 (probing disabled)."); + +static void nfs3_localio_probe(struct nfs_server *server) +{ + struct nfs_client *clp = server->nfs_client; + + /* Throttled to reduce nfs_local_probe_async() frequency */ + if (!nfs3_localio_probe_throttle || nfs_server_is_local(clp)) + return; + + /* + * Try (re)enabling LOCALIO if isn't enabled -- admin deems + * it worthwhile to periodically check if LOCALIO possible by + * setting the 'nfs3_localio_probe_throttle' module parameter. + * + * This is useful if LOCALIO was previously enabled, but was + * disabled due to server restart, and IO has successfully + * completed in terms of normal RPC. + */ + if ((clp->cl_uuid.nfs3_localio_probe_count++ & + (nfs3_localio_probe_throttle - 1)) == 0) { + if (!nfs_server_is_local(clp)) + nfs_local_probe_async(clp); + } +} + +#else +static void nfs3_localio_probe(struct nfs_server *server) {} +#endif + static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr) { struct inode *inode = hdr->inode; @@ -855,8 +892,11 @@ static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr) if (nfs3_async_handle_jukebox(task, inode)) return -EAGAIN; - if (task->tk_status >= 0 && !server->read_hdrsize) - cmpxchg(&server->read_hdrsize, 0, hdr->res.replen); + if (task->tk_status >= 0) { + if (!server->read_hdrsize) + cmpxchg(&server->read_hdrsize, 0, hdr->res.replen); + nfs3_localio_probe(server); + } nfs_invalidate_atime(inode); nfs_refresh_inode(inode, &hdr->fattr); @@ -886,8 +926,10 @@ static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr) if (nfs3_async_handle_jukebox(task, inode)) return -EAGAIN; - if (task->tk_status >= 0) + if (task->tk_status >= 0) { nfs_writeback_update_inode(hdr); + nfs3_localio_probe(NFS_SERVER(inode)); + } return 0; } @@ -979,13 +1021,21 @@ nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl) return status; } -static int nfs3_have_delegation(struct inode *inode, fmode_t flags) +static int nfs3_have_delegation(struct inode *inode, fmode_t type, int flags) +{ + return 0; +} + +static int nfs3_return_delegation(struct inode *inode) { + if (S_ISREG(inode->i_mode)) + nfs_wb_all(inode); return 0; } static const struct inode_operations nfs3_dir_inode_operations = { .create = nfs_create, + .atomic_open = nfs_atomic_open_v23, .lookup = nfs_lookup, .link = nfs_link, .unlink = nfs_unlink, @@ -1061,6 +1111,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .clear_acl_cache = forget_all_cached_acls, .close_context = nfs_close_context, .have_delegation = nfs3_have_delegation, + .return_delegation = nfs3_return_delegation, .alloc_client = nfs_alloc_client, .init_client = nfs_init_client, .free_client = nfs_free_client, diff --git a/fs/nfs/nfs3super.c b/fs/nfs/nfs3super.c index 8a9be9e47f76..20a80478449e 100644 --- a/fs/nfs/nfs3super.c +++ b/fs/nfs/nfs3super.c @@ -27,6 +27,7 @@ static void __exit exit_nfs_v3(void) unregister_nfs_version(&nfs_v3); } +MODULE_DESCRIPTION("NFSv3 client support"); MODULE_LICENSE("GPL"); module_init(init_nfs_v3); diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 60f032be805a..4ae01c10b7e2 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -21,14 +21,13 @@ #include <linux/nfs3.h> #include <linux/nfs_fs.h> #include <linux/nfsacl.h> +#include <linux/nfs_common.h> + #include "nfstrace.h" #include "internal.h" #define NFSDBG_FACILITY NFSDBG_XDR -/* Mapping from NFS error code to "errno" error code. */ -#define errno_NFSERR_IO EIO - /* * Declare the space requirements for NFS arguments and replies as * number of 32bit-words @@ -91,8 +90,6 @@ NFS3_pagepad_sz) #define ACL3_setaclres_sz (1+NFS3_post_op_attr_sz) -static int nfs3_stat_to_errno(enum nfs_stat); - /* * Map file type to S_IFMT bits */ @@ -1406,7 +1403,7 @@ static int nfs3_xdr_dec_getattr3res(struct rpc_rqst *req, out: return error; out_default: - return nfs3_stat_to_errno(status); + return nfs_stat_to_errno(status); } /* @@ -1445,7 +1442,7 @@ static int nfs3_xdr_dec_setattr3res(struct rpc_rqst *req, out: return error; out_status: - return nfs3_stat_to_errno(status); + return nfs_stat_to_errno(status); } /* @@ -1495,7 +1492,7 @@ out_default: error = decode_post_op_attr(xdr, result->dir_attr, userns); if (unlikely(error)) goto out; - return nfs3_stat_to_errno(status); + return nfs_stat_to_errno(status); } /* @@ -1537,7 +1534,7 @@ static int nfs3_xdr_dec_access3res(struct rpc_rqst *req, out: return error; out_default: - return nfs3_stat_to_errno(status); + return nfs_stat_to_errno(status); } /* @@ -1578,7 +1575,7 @@ static int nfs3_xdr_dec_readlink3res(struct rpc_rqst *req, out: return error; out_default: - return nfs3_stat_to_errno(status); + return nfs_stat_to_errno(status); } /* @@ -1658,7 +1655,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr, out: return error; out_status: - return nfs3_stat_to_errno(status); + return nfs_stat_to_errno(status); } /* @@ -1728,7 +1725,7 @@ static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr, out: return error; out_status: - return nfs3_stat_to_errno(status); + return nfs_stat_to_errno(status); } /* @@ -1795,7 +1792,7 @@ out_default: error = decode_wcc_data(xdr, result->dir_attr, userns); if (unlikely(error)) goto out; - return nfs3_stat_to_errno(status); + return nfs_stat_to_errno(status); } /* @@ -1835,7 +1832,7 @@ static int nfs3_xdr_dec_remove3res(struct rpc_rqst *req, out: return error; out_status: - return nfs3_stat_to_errno(status); + return nfs_stat_to_errno(status); } /* @@ -1881,7 +1878,7 @@ static int nfs3_xdr_dec_rename3res(struct rpc_rqst *req, out: return error; out_status: - return nfs3_stat_to_errno(status); + return nfs_stat_to_errno(status); } /* @@ -1926,7 +1923,7 @@ static int nfs3_xdr_dec_link3res(struct rpc_rqst *req, struct xdr_stream *xdr, out: return error; out_status: - return nfs3_stat_to_errno(status); + return nfs_stat_to_errno(status); } /** @@ -2101,7 +2098,7 @@ out_default: error = decode_post_op_attr(xdr, result->dir_attr, rpc_rqst_userns(req)); if (unlikely(error)) goto out; - return nfs3_stat_to_errno(status); + return nfs_stat_to_errno(status); } /* @@ -2167,7 +2164,7 @@ static int nfs3_xdr_dec_fsstat3res(struct rpc_rqst *req, out: return error; out_status: - return nfs3_stat_to_errno(status); + return nfs_stat_to_errno(status); } /* @@ -2243,7 +2240,7 @@ static int nfs3_xdr_dec_fsinfo3res(struct rpc_rqst *req, out: return error; out_status: - return nfs3_stat_to_errno(status); + return nfs_stat_to_errno(status); } /* @@ -2304,7 +2301,7 @@ static int nfs3_xdr_dec_pathconf3res(struct rpc_rqst *req, out: return error; out_status: - return nfs3_stat_to_errno(status); + return nfs_stat_to_errno(status); } /* @@ -2350,7 +2347,7 @@ static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req, out: return error; out_status: - return nfs3_stat_to_errno(status); + return nfs_stat_to_errno(status); } #ifdef CONFIG_NFS_V3_ACL @@ -2416,7 +2413,7 @@ static int nfs3_xdr_dec_getacl3res(struct rpc_rqst *req, out: return error; out_default: - return nfs3_stat_to_errno(status); + return nfs_stat_to_errno(status); } static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req, @@ -2435,76 +2432,11 @@ static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req, out: return error; out_default: - return nfs3_stat_to_errno(status); + return nfs_stat_to_errno(status); } #endif /* CONFIG_NFS_V3_ACL */ - -/* - * We need to translate between nfs status return values and - * the local errno values which may not be the same. - */ -static const struct { - int stat; - int errno; -} nfs_errtbl[] = { - { NFS_OK, 0 }, - { NFSERR_PERM, -EPERM }, - { NFSERR_NOENT, -ENOENT }, - { NFSERR_IO, -errno_NFSERR_IO}, - { NFSERR_NXIO, -ENXIO }, -/* { NFSERR_EAGAIN, -EAGAIN }, */ - { NFSERR_ACCES, -EACCES }, - { NFSERR_EXIST, -EEXIST }, - { NFSERR_XDEV, -EXDEV }, - { NFSERR_NODEV, -ENODEV }, - { NFSERR_NOTDIR, -ENOTDIR }, - { NFSERR_ISDIR, -EISDIR }, - { NFSERR_INVAL, -EINVAL }, - { NFSERR_FBIG, -EFBIG }, - { NFSERR_NOSPC, -ENOSPC }, - { NFSERR_ROFS, -EROFS }, - { NFSERR_MLINK, -EMLINK }, - { NFSERR_NAMETOOLONG, -ENAMETOOLONG }, - { NFSERR_NOTEMPTY, -ENOTEMPTY }, - { NFSERR_DQUOT, -EDQUOT }, - { NFSERR_STALE, -ESTALE }, - { NFSERR_REMOTE, -EREMOTE }, -#ifdef EWFLUSH - { NFSERR_WFLUSH, -EWFLUSH }, -#endif - { NFSERR_BADHANDLE, -EBADHANDLE }, - { NFSERR_NOT_SYNC, -ENOTSYNC }, - { NFSERR_BAD_COOKIE, -EBADCOOKIE }, - { NFSERR_NOTSUPP, -ENOTSUPP }, - { NFSERR_TOOSMALL, -ETOOSMALL }, - { NFSERR_SERVERFAULT, -EREMOTEIO }, - { NFSERR_BADTYPE, -EBADTYPE }, - { NFSERR_JUKEBOX, -EJUKEBOX }, - { -1, -EIO } -}; - -/** - * nfs3_stat_to_errno - convert an NFS status code to a local errno - * @status: NFS status code to convert - * - * Returns a local errno value, or -EIO if the NFS status code is - * not recognized. This function is used jointly by NFSv2 and NFSv3. - */ -static int nfs3_stat_to_errno(enum nfs_stat status) -{ - int i; - - for (i = 0; nfs_errtbl[i].stat != -1; i++) { - if (nfs_errtbl[i].stat == (int)status) - return nfs_errtbl[i].errno; - } - dprintk("NFS: Unrecognized nfs status value: %u\n", status); - return nfs_errtbl[i].errno; -} - - #define PROC(proc, argtype, restype, timer) \ [NFS3PROC_##proc] = { \ .p_proc = NFS3PROC_##proc, \ diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 28704f924612..5cf52ece96ac 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -21,6 +21,8 @@ #define NFSDBG_FACILITY NFSDBG_PROC static int nfs42_do_offload_cancel_async(struct file *dst, nfs4_stateid *std); +static int nfs42_proc_offload_status(struct file *file, nfs4_stateid *stateid, + u64 *copied); static void nfs42_set_netaddr(struct file *filep, struct nfs42_netaddr *naddr) { @@ -173,6 +175,20 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len) return err; } +static void nfs4_copy_dequeue_callback(struct nfs_server *dst_server, + struct nfs_server *src_server, + struct nfs4_copy_state *copy) +{ + spin_lock(&dst_server->nfs_client->cl_lock); + list_del_init(©->copies); + spin_unlock(&dst_server->nfs_client->cl_lock); + if (dst_server != src_server) { + spin_lock(&src_server->nfs_client->cl_lock); + list_del_init(©->src_copies); + spin_unlock(&src_server->nfs_client->cl_lock); + } +} + static int handle_async_copy(struct nfs42_copy_res *res, struct nfs_server *dst_server, struct nfs_server *src_server, @@ -182,9 +198,12 @@ static int handle_async_copy(struct nfs42_copy_res *res, bool *restart) { struct nfs4_copy_state *copy, *tmp_copy = NULL, *iter; - int status = NFS4_OK; struct nfs_open_context *dst_ctx = nfs_file_open_context(dst); struct nfs_open_context *src_ctx = nfs_file_open_context(src); + struct nfs_client *clp = dst_server->nfs_client; + unsigned long timeout = 3 * HZ; + int status = NFS4_OK; + u64 copied; copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_KERNEL); if (!copy) @@ -218,19 +237,16 @@ static int handle_async_copy(struct nfs42_copy_res *res, if (dst_server != src_server) { spin_lock(&src_server->nfs_client->cl_lock); - list_add_tail(©->src_copies, &src_server->ss_copies); + list_add_tail(©->src_copies, &src_server->ss_src_copies); spin_unlock(&src_server->nfs_client->cl_lock); } - status = wait_for_completion_interruptible(©->completion); - spin_lock(&dst_server->nfs_client->cl_lock); - list_del_init(©->copies); - spin_unlock(&dst_server->nfs_client->cl_lock); - if (dst_server != src_server) { - spin_lock(&src_server->nfs_client->cl_lock); - list_del_init(©->src_copies); - spin_unlock(&src_server->nfs_client->cl_lock); - } +wait: + status = wait_for_completion_interruptible_timeout(©->completion, + timeout); + if (!status) + goto timeout; + nfs4_copy_dequeue_callback(dst_server, src_server, copy); if (status == -ERESTARTSYS) { goto out_cancel; } else if (copy->flags || copy->error == NFS4ERR_PARTNER_NO_AUTH) { @@ -240,6 +256,7 @@ static int handle_async_copy(struct nfs42_copy_res *res, } out: res->write_res.count = copy->count; + /* Copy out the updated write verifier provided by CB_OFFLOAD. */ memcpy(&res->write_res.verifier, ©->verf, sizeof(copy->verf)); status = -copy->error; @@ -251,6 +268,39 @@ out_cancel: if (!nfs42_files_from_same_server(src, dst)) nfs42_do_offload_cancel_async(src, src_stateid); goto out_free; +timeout: + timeout <<= 1; + if (timeout > (clp->cl_lease_time >> 1)) + timeout = clp->cl_lease_time >> 1; + status = nfs42_proc_offload_status(dst, ©->stateid, &copied); + if (status == -EINPROGRESS) + goto wait; + nfs4_copy_dequeue_callback(dst_server, src_server, copy); + switch (status) { + case 0: + /* The server recognized the copy stateid, so it hasn't + * rebooted. Don't overwrite the verifier returned in the + * COPY result. */ + res->write_res.count = copied; + goto out_free; + case -EREMOTEIO: + /* COPY operation failed on the server. */ + status = -EOPNOTSUPP; + res->write_res.count = copied; + goto out_free; + case -EBADF: + /* Server did not recognize the copy stateid. It has + * probably restarted and lost the plot. */ + res->write_res.count = 0; + status = -EOPNOTSUPP; + break; + case -EOPNOTSUPP: + /* RFC 7862 REQUIREs server to support OFFLOAD_STATUS when + * it has signed up for an async COPY, so server is not + * spec-compliant. */ + res->write_res.count = 0; + } + goto out_free; } static int process_copy_commit(struct file *dst, loff_t pos_dst, @@ -498,15 +548,15 @@ out_put_src_lock: return err; } -struct nfs42_offloadcancel_data { +struct nfs42_offload_data { struct nfs_server *seq_server; struct nfs42_offload_status_args args; struct nfs42_offload_status_res res; }; -static void nfs42_offload_cancel_prepare(struct rpc_task *task, void *calldata) +static void nfs42_offload_prepare(struct rpc_task *task, void *calldata) { - struct nfs42_offloadcancel_data *data = calldata; + struct nfs42_offload_data *data = calldata; nfs4_setup_sequence(data->seq_server->nfs_client, &data->args.osa_seq_args, @@ -515,7 +565,7 @@ static void nfs42_offload_cancel_prepare(struct rpc_task *task, void *calldata) static void nfs42_offload_cancel_done(struct rpc_task *task, void *calldata) { - struct nfs42_offloadcancel_data *data = calldata; + struct nfs42_offload_data *data = calldata; trace_nfs4_offload_cancel(&data->args, task->tk_status); nfs41_sequence_done(task, &data->res.osr_seq_res); @@ -525,22 +575,22 @@ static void nfs42_offload_cancel_done(struct rpc_task *task, void *calldata) rpc_restart_call_prepare(task); } -static void nfs42_free_offloadcancel_data(void *data) +static void nfs42_offload_release(void *data) { kfree(data); } static const struct rpc_call_ops nfs42_offload_cancel_ops = { - .rpc_call_prepare = nfs42_offload_cancel_prepare, + .rpc_call_prepare = nfs42_offload_prepare, .rpc_call_done = nfs42_offload_cancel_done, - .rpc_release = nfs42_free_offloadcancel_data, + .rpc_release = nfs42_offload_release, }; static int nfs42_do_offload_cancel_async(struct file *dst, nfs4_stateid *stateid) { struct nfs_server *dst_server = NFS_SERVER(file_inode(dst)); - struct nfs42_offloadcancel_data *data = NULL; + struct nfs42_offload_data *data = NULL; struct nfs_open_context *ctx = nfs_file_open_context(dst); struct rpc_task *task; struct rpc_message msg = { @@ -552,14 +602,14 @@ static int nfs42_do_offload_cancel_async(struct file *dst, .rpc_message = &msg, .callback_ops = &nfs42_offload_cancel_ops, .workqueue = nfsiod_workqueue, - .flags = RPC_TASK_ASYNC, + .flags = RPC_TASK_ASYNC | RPC_TASK_MOVEABLE, }; int status; if (!(dst_server->caps & NFS_CAP_OFFLOAD_CANCEL)) return -EOPNOTSUPP; - data = kzalloc(sizeof(struct nfs42_offloadcancel_data), GFP_KERNEL); + data = kzalloc(sizeof(struct nfs42_offload_data), GFP_KERNEL); if (data == NULL) return -ENOMEM; @@ -582,6 +632,108 @@ static int nfs42_do_offload_cancel_async(struct file *dst, return status; } +static int +_nfs42_proc_offload_status(struct nfs_server *server, struct file *file, + struct nfs42_offload_data *data) +{ + struct nfs_open_context *ctx = nfs_file_open_context(file); + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OFFLOAD_STATUS], + .rpc_argp = &data->args, + .rpc_resp = &data->res, + .rpc_cred = ctx->cred, + }; + int status; + + status = nfs4_call_sync(server->client, server, &msg, + &data->args.osa_seq_args, + &data->res.osr_seq_res, 1); + trace_nfs4_offload_status(&data->args, status); + switch (status) { + case 0: + break; + + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_OLD_STATEID: + /* + * Server does not recognize the COPY stateid. CB_OFFLOAD + * could have purged it, or server might have rebooted. + * Since COPY stateids don't have an associated inode, + * avoid triggering state recovery. + */ + status = -EBADF; + break; + case -NFS4ERR_NOTSUPP: + case -ENOTSUPP: + case -EOPNOTSUPP: + server->caps &= ~NFS_CAP_OFFLOAD_STATUS; + status = -EOPNOTSUPP; + break; + } + + return status; +} + +/** + * nfs42_proc_offload_status - Poll completion status of an async copy operation + * @dst: handle of file being copied into + * @stateid: copy stateid (from async COPY result) + * @copied: OUT: number of bytes copied so far + * + * Return values: + * %0: Server returned an NFS4_OK completion status + * %-EINPROGRESS: Server returned no completion status + * %-EREMOTEIO: Server returned an error completion status + * %-EBADF: Server did not recognize the copy stateid + * %-EOPNOTSUPP: Server does not support OFFLOAD_STATUS + * %-ERESTARTSYS: Wait interrupted by signal + * + * Other negative errnos indicate the client could not complete the + * request. + */ +static int +nfs42_proc_offload_status(struct file *dst, nfs4_stateid *stateid, u64 *copied) +{ + struct inode *inode = file_inode(dst); + struct nfs_server *server = NFS_SERVER(inode); + struct nfs4_exception exception = { + .inode = inode, + }; + struct nfs42_offload_data *data; + int status; + + if (!(server->caps & NFS_CAP_OFFLOAD_STATUS)) + return -EOPNOTSUPP; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + data->seq_server = server; + data->args.osa_src_fh = NFS_FH(inode); + memcpy(&data->args.osa_stateid, stateid, + sizeof(data->args.osa_stateid)); + exception.stateid = &data->args.osa_stateid; + do { + status = _nfs42_proc_offload_status(server, dst, data); + if (status == -EOPNOTSUPP) + goto out; + status = nfs4_handle_exception(server, status, &exception); + } while (exception.retry); + if (status) + goto out; + + *copied = data->res.osr_count; + if (!data->res.complete_count) + status = -EINPROGRESS; + else if (data->res.osr_complete != NFS_OK) + status = -EREMOTEIO; + +out: + kfree(data); + return status; +} + static int _nfs42_proc_copy_notify(struct file *src, struct file *dst, struct nfs42_copy_notify_args *args, struct nfs42_copy_notify_res *res) @@ -861,7 +1013,7 @@ int nfs42_proc_layoutstats_generic(struct nfs_server *server, .rpc_message = &msg, .callback_ops = &nfs42_layoutstat_ops, .callback_data = data, - .flags = RPC_TASK_ASYNC, + .flags = RPC_TASK_ASYNC | RPC_TASK_MOVEABLE, }; struct rpc_task *task; @@ -1016,7 +1168,7 @@ int nfs42_proc_layouterror(struct pnfs_layout_segment *lseg, struct rpc_task_setup task_setup = { .rpc_message = &msg, .callback_ops = &nfs42_layouterror_ops, - .flags = RPC_TASK_ASYNC, + .flags = RPC_TASK_ASYNC | RPC_TASK_MOVEABLE, }; unsigned int i; diff --git a/fs/nfs/nfs42xattr.c b/fs/nfs/nfs42xattr.c index b6e3d8f77b91..37d79400e5f4 100644 --- a/fs/nfs/nfs42xattr.c +++ b/fs/nfs/nfs42xattr.c @@ -802,7 +802,7 @@ static struct shrinker *nfs4_xattr_large_entry_shrinker; static enum lru_status cache_lru_isolate(struct list_head *item, - struct list_lru_one *lru, spinlock_t *lru_lock, void *arg) + struct list_lru_one *lru, void *arg) { struct list_head *dispose = arg; struct inode *inode; @@ -867,7 +867,7 @@ nfs4_xattr_cache_count(struct shrinker *shrink, struct shrink_control *sc) static enum lru_status entry_lru_isolate(struct list_head *item, - struct list_lru_one *lru, spinlock_t *lru_lock, void *arg) + struct list_lru_one *lru, void *arg) { struct list_head *dispose = arg; struct nfs4_xattr_bucket *bucket; diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 9e3ae53e2205..b1b663468249 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -35,6 +35,11 @@ #define encode_offload_cancel_maxsz (op_encode_hdr_maxsz + \ XDR_QUADLEN(NFS4_STATEID_SIZE)) #define decode_offload_cancel_maxsz (op_decode_hdr_maxsz) +#define encode_offload_status_maxsz (op_encode_hdr_maxsz + \ + XDR_QUADLEN(NFS4_STATEID_SIZE)) +#define decode_offload_status_maxsz (op_decode_hdr_maxsz + \ + 2 /* osr_count */ + \ + 2 /* osr_complete */) #define encode_copy_notify_maxsz (op_encode_hdr_maxsz + \ XDR_QUADLEN(NFS4_STATEID_SIZE) + \ 1 + /* nl4_type */ \ @@ -143,10 +148,20 @@ decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_offload_cancel_maxsz) +#define NFS4_enc_offload_status_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_offload_status_maxsz) +#define NFS4_dec_offload_status_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_offload_status_maxsz) #define NFS4_enc_copy_notify_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_copy_notify_maxsz) #define NFS4_dec_copy_notify_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_copy_notify_maxsz) #define NFS4_enc_deallocate_sz (compound_encode_hdr_maxsz + \ @@ -343,6 +358,14 @@ static void encode_offload_cancel(struct xdr_stream *xdr, encode_nfs4_stateid(xdr, &args->osa_stateid); } +static void encode_offload_status(struct xdr_stream *xdr, + const struct nfs42_offload_status_args *args, + struct compound_hdr *hdr) +{ + encode_op_hdr(xdr, OP_OFFLOAD_STATUS, decode_offload_status_maxsz, hdr); + encode_nfs4_stateid(xdr, &args->osa_stateid); +} + static void encode_copy_notify(struct xdr_stream *xdr, const struct nfs42_copy_notify_args *args, struct compound_hdr *hdr) @@ -549,7 +572,7 @@ static void nfs4_xdr_enc_copy(struct rpc_rqst *req, } /* - * Encode OFFLOAD_CANEL request + * Encode OFFLOAD_CANCEL request */ static void nfs4_xdr_enc_offload_cancel(struct rpc_rqst *req, struct xdr_stream *xdr, @@ -568,6 +591,25 @@ static void nfs4_xdr_enc_offload_cancel(struct rpc_rqst *req, } /* + * Encode OFFLOAD_STATUS request + */ +static void nfs4_xdr_enc_offload_status(struct rpc_rqst *req, + struct xdr_stream *xdr, + const void *data) +{ + const struct nfs42_offload_status_args *args = data; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->osa_seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->osa_seq_args, &hdr); + encode_putfh(xdr, args->osa_src_fh, &hdr); + encode_offload_status(xdr, args, &hdr); + encode_nops(&hdr); +} + +/* * Encode COPY_NOTIFY request */ static void nfs4_xdr_enc_copy_notify(struct rpc_rqst *req, @@ -919,6 +961,26 @@ static int decode_offload_cancel(struct xdr_stream *xdr, return decode_op_hdr(xdr, OP_OFFLOAD_CANCEL); } +static int decode_offload_status(struct xdr_stream *xdr, + struct nfs42_offload_status_res *res) +{ + ssize_t result; + int status; + + status = decode_op_hdr(xdr, OP_OFFLOAD_STATUS); + if (status) + return status; + /* osr_count */ + if (xdr_stream_decode_u64(xdr, &res->osr_count) < 0) + return -EIO; + /* osr_complete<1> */ + result = xdr_stream_decode_uint32_array(xdr, &res->osr_complete, 1); + if (result < 0) + return -EIO; + res->complete_count = result; + return 0; +} + static int decode_copy_notify(struct xdr_stream *xdr, struct nfs42_copy_notify_res *res) { @@ -1369,6 +1431,32 @@ out: } /* + * Decode OFFLOAD_STATUS response + */ +static int nfs4_xdr_dec_offload_status(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + void *data) +{ + struct nfs42_offload_status_res *res = data; + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->osr_seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + status = decode_offload_status(xdr, res); + +out: + return status; +} + +/* * Decode COPY_NOTIFY response */ static int nfs4_xdr_dec_copy_notify(struct rpc_rqst *rqstp, diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 7024230f0d1d..7d383d29a995 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -67,7 +67,8 @@ struct nfs4_minor_version_ops { void (*free_lock_state)(struct nfs_server *, struct nfs4_lock_state *); int (*test_and_free_expired)(struct nfs_server *, - nfs4_stateid *, const struct cred *); + const nfs4_stateid *, + const struct cred *); struct nfs_seqid * (*alloc_seqid)(struct nfs_seqid_counter *, gfp_t); void (*session_trunk)(struct rpc_clnt *clnt, @@ -82,7 +83,7 @@ struct nfs4_minor_version_ops { #define NFS_SEQID_CONFIRMED 1 struct nfs_seqid_counter { ktime_t create_time; - int owner_id; + u64 owner_id; int flags; u32 counter; spinlock_t lock; /* Protects the list */ diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 84573df5cf5a..162c85a83a14 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -231,9 +231,10 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init) __set_bit(NFS_CS_INFINITE_SLOTS, &clp->cl_flags); __set_bit(NFS_CS_DISCRTRY, &clp->cl_flags); __set_bit(NFS_CS_NO_RETRANS_TIMEOUT, &clp->cl_flags); - - if (test_bit(NFS_CS_DS, &cl_init->init_flags)) - __set_bit(NFS_CS_DS, &clp->cl_flags); + if (test_bit(NFS_CS_PNFS, &cl_init->init_flags)) + __set_bit(NFS_CS_PNFS, &clp->cl_flags); + if (test_bit(NFS_CS_NETUNREACH_FATAL, &cl_init->init_flags)) + __set_bit(NFS_CS_NETUNREACH_FATAL, &clp->cl_flags); /* * Set up the connection to the server before we add add to the * global list. @@ -938,6 +939,9 @@ static int nfs4_set_client(struct nfs_server *server, __set_bit(NFS_CS_TSM_POSSIBLE, &cl_init.init_flags); server->port = rpc_get_port((struct sockaddr *)addr); + if (server->flags & NFS_MOUNT_NETUNREACH_FATAL) + __set_bit(NFS_CS_NETUNREACH_FATAL, &cl_init.init_flags); + /* Allocate or find a client reference we can use */ clp = nfs_get_client(&cl_init); if (IS_ERR(clp)) @@ -1012,8 +1016,9 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv, if (mds_srv->flags & NFS_MOUNT_NORESVPORT) __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); + if (test_bit(NFS_CS_NETUNREACH_FATAL, &mds_clp->cl_flags)) + __set_bit(NFS_CS_NETUNREACH_FATAL, &cl_init.init_flags); - __set_bit(NFS_CS_DS, &cl_init.init_flags); __set_bit(NFS_CS_PNFS, &cl_init.init_flags); cl_init.max_connect = NFS_MAX_TRANSPORTS; /* diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ea390db94b62..b1d2122bd5a7 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -103,10 +103,10 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, const struct cred *cred, struct nfs4_slot *slot, bool is_privileged); -static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *, - const struct cred *); +static int nfs41_test_stateid(struct nfs_server *, const nfs4_stateid *, + const struct cred *); static int nfs41_free_stateid(struct nfs_server *, const nfs4_stateid *, - const struct cred *, bool); + const struct cred *, bool); #endif #ifdef CONFIG_NFS_V4_SECURITY_LABEL @@ -114,6 +114,7 @@ static inline struct nfs4_label * nfs4_label_init_security(struct inode *dir, struct dentry *dentry, struct iattr *sattr, struct nfs4_label *label) { + struct lsm_context shim; int err; if (label == NULL) @@ -128,18 +129,26 @@ nfs4_label_init_security(struct inode *dir, struct dentry *dentry, label->label = NULL; err = security_dentry_init_security(dentry, sattr->ia_mode, - &dentry->d_name, NULL, - (void **)&label->label, &label->len); - if (err == 0) - return label; + &dentry->d_name, NULL, &shim); + if (err) + return NULL; - return NULL; + label->lsmid = shim.id; + label->label = shim.context; + label->len = shim.len; + return label; } static inline void nfs4_label_release_security(struct nfs4_label *label) { - if (label) - security_release_secctx(label->label, label->len); + struct lsm_context shim; + + if (label) { + shim.context = label->label; + shim.len = label->len; + shim.id = label->lsmid; + security_release_secctx(&shim); + } } static inline u32 *nfs4_bitmask(struct nfs_server *server, struct nfs4_label *label) { @@ -186,6 +195,9 @@ static int nfs4_map_errors(int err) return -EBUSY; case -NFS4ERR_NOT_SAME: return -ENOTSYNC; + case -ENETDOWN: + case -ENETUNREACH: + break; default: dprintk("%s could not handle NFSv4 error %d\n", __func__, -err); @@ -293,7 +305,7 @@ static void nfs4_bitmap_copy_adjust(__u32 *dst, const __u32 *src, unsigned long cache_validity; memcpy(dst, src, NFS4_BITMASK_SZ*sizeof(*dst)); - if (!inode || !nfs4_have_delegation(inode, FMODE_READ)) + if (!inode || !nfs_have_read_or_write_delegation(inode)) return; cache_validity = READ_ONCE(NFS_I(inode)->cache_validity) | flags; @@ -310,6 +322,18 @@ static void nfs4_bitmap_copy_adjust(__u32 *dst, const __u32 *src, dst[1] &= ~FATTR4_WORD1_MODE; if (!(cache_validity & NFS_INO_INVALID_OTHER)) dst[1] &= ~(FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP); + + if (nfs_have_delegated_mtime(inode)) { + if (!(cache_validity & NFS_INO_INVALID_ATIME)) + dst[1] &= ~FATTR4_WORD1_TIME_ACCESS; + if (!(cache_validity & NFS_INO_INVALID_MTIME)) + dst[1] &= ~FATTR4_WORD1_TIME_MODIFY; + if (!(cache_validity & NFS_INO_INVALID_CTIME)) + dst[1] &= ~FATTR4_WORD1_TIME_METADATA; + } else if (nfs_have_delegated_atime(inode)) { + if (!(cache_validity & NFS_INO_INVALID_ATIME)) + dst[1] &= ~FATTR4_WORD1_TIME_ACCESS; + } } static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dentry, @@ -422,6 +446,8 @@ static int nfs4_delay_killable(long *timeout) { might_sleep(); + if (unlikely(nfs_current_task_exiting())) + return -EINTR; __set_current_state(TASK_KILLABLE|TASK_FREEZABLE_UNSAFE); schedule_timeout(nfs4_update_delay(timeout)); if (!__fatal_signal_pending(current)) @@ -433,6 +459,8 @@ static int nfs4_delay_interruptible(long *timeout) { might_sleep(); + if (unlikely(nfs_current_task_exiting())) + return -EINTR; __set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE_UNSAFE); schedule_timeout(nfs4_update_delay(timeout)); if (!signal_pending(current)) @@ -643,6 +671,15 @@ nfs4_async_handle_exception(struct rpc_task *task, struct nfs_server *server, struct nfs_client *clp = server->nfs_client; int ret; + if ((task->tk_rpc_status == -ENETDOWN || + task->tk_rpc_status == -ENETUNREACH) && + task->tk_flags & RPC_TASK_NETUNREACH_FATAL) { + exception->delay = 0; + exception->recovering = 0; + exception->retry = 0; + return -EIO; + } + ret = nfs4_do_handle_exception(server, errorcode, exception); if (exception->delay) { int ret2 = nfs4_exception_should_retrans(server, exception); @@ -1245,7 +1282,8 @@ nfs4_update_changeattr_locked(struct inode *inode, struct nfs_inode *nfsi = NFS_I(inode); u64 change_attr = inode_peek_iversion_raw(inode); - cache_validity |= NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME; + if (!nfs_have_delegated_mtime(inode)) + cache_validity |= NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME; if (S_ISDIR(inode->i_mode)) cache_validity |= NFS_INO_INVALID_DATA; @@ -1264,7 +1302,7 @@ nfs4_update_changeattr_locked(struct inode *inode, if (S_ISDIR(inode->i_mode)) nfs_force_lookup_revalidate(inode); - if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) + if (!nfs_have_delegated_attributes(inode)) cache_validity |= NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL | NFS_INO_INVALID_SIZE | NFS_INO_INVALID_OTHER | @@ -1320,8 +1358,7 @@ static fmode_t _nfs4_ctx_to_openmode(const struct nfs_open_context *ctx) } static u32 -nfs4_map_atomic_open_share(struct nfs_server *server, - fmode_t fmode, int openflags) +nfs4_fmode_to_share_access(fmode_t fmode) { u32 res = 0; @@ -1335,11 +1372,27 @@ nfs4_map_atomic_open_share(struct nfs_server *server, case FMODE_READ|FMODE_WRITE: res = NFS4_SHARE_ACCESS_BOTH; } + return res; +} + +static u32 +nfs4_map_atomic_open_share(struct nfs_server *server, + fmode_t fmode, int openflags) +{ + u32 res = nfs4_fmode_to_share_access(fmode); + if (!(server->caps & NFS_CAP_ATOMIC_OPEN_V1)) goto out; /* Want no delegation if we're using O_DIRECT */ - if (openflags & O_DIRECT) + if (openflags & O_DIRECT) { res |= NFS4_SHARE_WANT_NO_DELEG; + goto out; + } + /* res |= NFS4_SHARE_WANT_NO_PREFERENCE; */ + if (server->caps & NFS_CAP_DELEGTIME) + res |= NFS4_SHARE_WANT_DELEG_TIMESTAMPS; + if (server->caps & NFS_CAP_OPEN_XOR) + res |= NFS4_SHARE_WANT_OPEN_XOR_DELEGATION; out: return res; } @@ -1737,7 +1790,8 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state, rcu_read_unlock(); trace_nfs4_open_stateid_update_wait(state->inode, stateid, 0); - if (!fatal_signal_pending(current)) { + if (!fatal_signal_pending(current) && + !nfs_current_task_exiting()) { if (schedule_timeout(5*HZ) == 0) status = -EAGAIN; else @@ -1954,44 +2008,41 @@ out_return_state: } static void -nfs4_opendata_check_deleg(struct nfs4_opendata *data, struct nfs4_state *state) -{ - struct nfs_client *clp = NFS_SERVER(state->inode)->nfs_client; - struct nfs_delegation *delegation; - int delegation_flags = 0; - - rcu_read_lock(); - delegation = rcu_dereference(NFS_I(state->inode)->delegation); - if (delegation) - delegation_flags = delegation->flags; - rcu_read_unlock(); - switch (data->o_arg.claim) { - default: +nfs4_process_delegation(struct inode *inode, const struct cred *cred, + enum open_claim_type4 claim, + const struct nfs4_open_delegation *delegation) +{ + switch (delegation->open_delegation_type) { + case NFS4_OPEN_DELEGATE_READ: + case NFS4_OPEN_DELEGATE_WRITE: + case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG: + case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG: break; + default: + return; + } + switch (claim) { case NFS4_OPEN_CLAIM_DELEGATE_CUR: case NFS4_OPEN_CLAIM_DELEG_CUR_FH: pr_err_ratelimited("NFS: Broken NFSv4 server %s is " "returning a delegation for " "OPEN(CLAIM_DELEGATE_CUR)\n", - clp->cl_hostname); - return; + NFS_SERVER(inode)->nfs_client->cl_hostname); + break; + case NFS4_OPEN_CLAIM_PREVIOUS: + nfs_inode_reclaim_delegation(inode, cred, delegation->type, + &delegation->stateid, + delegation->pagemod_limit, + delegation->open_delegation_type); + break; + default: + nfs_inode_set_delegation(inode, cred, delegation->type, + &delegation->stateid, + delegation->pagemod_limit, + delegation->open_delegation_type); } - if ((delegation_flags & 1UL<<NFS_DELEGATION_NEED_RECLAIM) == 0) - nfs_inode_set_delegation(state->inode, - data->owner->so_cred, - data->o_res.delegation_type, - &data->o_res.delegation, - data->o_res.pagemod_limit); - else - nfs_inode_reclaim_delegation(state->inode, - data->owner->so_cred, - data->o_res.delegation_type, - &data->o_res.delegation, - data->o_res.pagemod_limit); - - if (data->o_res.do_recall) - nfs_async_inode_return_delegation(state->inode, - &data->o_res.delegation); + if (delegation->do_recall) + nfs_async_inode_return_delegation(inode, &delegation->stateid); } /* @@ -2015,11 +2066,16 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data) if (ret) return ERR_PTR(ret); - if (data->o_res.delegation_type != 0) - nfs4_opendata_check_deleg(data, state); + nfs4_process_delegation(state->inode, + data->owner->so_cred, + data->o_arg.claim, + &data->o_res.delegation); - if (!update_open_stateid(state, &data->o_res.stateid, - NULL, data->o_arg.fmode)) + if (!(data->o_res.rflags & NFS4_OPEN_RESULT_NO_OPEN_STATEID)) { + if (!update_open_stateid(state, &data->o_res.stateid, + NULL, data->o_arg.fmode)) + return ERR_PTR(-EAGAIN); + } else if (!update_open_stateid(state, NULL, NULL, data->o_arg.fmode)) return ERR_PTR(-EAGAIN); refcount_inc(&state->count); @@ -2083,10 +2139,18 @@ _nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data) if (IS_ERR(state)) goto out; - if (data->o_res.delegation_type != 0) - nfs4_opendata_check_deleg(data, state); - if (!update_open_stateid(state, &data->o_res.stateid, - NULL, data->o_arg.fmode)) { + nfs4_process_delegation(state->inode, + data->owner->so_cred, + data->o_arg.claim, + &data->o_res.delegation); + + if (!(data->o_res.rflags & NFS4_OPEN_RESULT_NO_OPEN_STATEID)) { + if (!update_open_stateid(state, &data->o_res.stateid, + NULL, data->o_arg.fmode)) { + nfs4_put_open_state(state); + state = ERR_PTR(-EAGAIN); + } + } else if (!update_open_stateid(state, NULL, NULL, data->o_arg.fmode)) { nfs4_put_open_state(state); state = ERR_PTR(-EAGAIN); } @@ -2222,7 +2286,7 @@ static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state { struct nfs_delegation *delegation; struct nfs4_opendata *opendata; - fmode_t delegation_type = 0; + u32 delegation_type = NFS4_OPEN_DELEGATE_NONE; int status; opendata = nfs4_open_recoverdata_alloc(ctx, state, @@ -2231,8 +2295,20 @@ static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state return PTR_ERR(opendata); rcu_read_lock(); delegation = rcu_dereference(NFS_I(state->inode)->delegation); - if (delegation != NULL && test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) != 0) - delegation_type = delegation->type; + if (delegation != NULL && test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) != 0) { + switch(delegation->type) { + case FMODE_READ: + delegation_type = NFS4_OPEN_DELEGATE_READ; + if (test_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags)) + delegation_type = NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG; + break; + case FMODE_WRITE: + case FMODE_READ|FMODE_WRITE: + delegation_type = NFS4_OPEN_DELEGATE_WRITE; + if (test_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags)) + delegation_type = NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG; + } + } rcu_read_unlock(); opendata->o_arg.u.delegation_type = delegation_type; status = nfs4_open_recover(opendata, state); @@ -2553,12 +2629,14 @@ static void nfs4_open_release(void *calldata) struct nfs4_opendata *data = calldata; struct nfs4_state *state = NULL; + /* In case of error, no cleanup! */ + if (data->rpc_status != 0 || !data->rpc_done) { + nfs_release_seqid(data->o_arg.seqid); + goto out_free; + } /* If this request hasn't been cancelled, do nothing */ if (!data->cancelled) goto out_free; - /* In case of error, no cleanup! */ - if (data->rpc_status != 0 || !data->rpc_done) - goto out_free; /* In case we need an open_confirm, no cleanup! */ if (data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM) goto out_free; @@ -2825,16 +2903,16 @@ static int nfs40_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st } static int nfs40_test_and_free_expired_stateid(struct nfs_server *server, - nfs4_stateid *stateid, - const struct cred *cred) + const nfs4_stateid *stateid, + const struct cred *cred) { return -NFS4ERR_BAD_STATEID; } #if defined(CONFIG_NFS_V4_1) static int nfs41_test_and_free_expired_stateid(struct nfs_server *server, - nfs4_stateid *stateid, - const struct cred *cred) + const nfs4_stateid *stateid, + const struct cred *cred) { int status; @@ -3093,9 +3171,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, if (d_really_is_negative(dentry)) { struct dentry *alias; d_drop(dentry); - alias = d_exact_alias(dentry, state->inode); - if (!alias) - alias = d_splice_alias(igrab(state->inode), dentry); + alias = d_splice_alias(igrab(state->inode), dentry); /* d_splice_alias() can't fail here - it's a non-directory */ if (alias) { dput(ctx->dentry); @@ -3111,7 +3187,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, case NFS4_OPEN_CLAIM_DELEGATE_PREV: if (!opendata->rpc_done) break; - if (opendata->o_res.delegation_type != 0) + if (opendata->o_res.delegation.type != 0) dir_verifier = nfs_save_change_attribute(dir); nfs_set_verifier(dentry, dir_verifier); } @@ -3394,13 +3470,18 @@ static int nfs4_do_setattr(struct inode *inode, const struct cred *cred, .inode = inode, .stateid = &arg.stateid, }; - unsigned long adjust_flags = NFS_INO_INVALID_CHANGE; + unsigned long adjust_flags = NFS_INO_INVALID_CHANGE | + NFS_INO_INVALID_CTIME; int err; if (sattr->ia_valid & (ATTR_MODE | ATTR_KILL_SUID | ATTR_KILL_SGID)) adjust_flags |= NFS_INO_INVALID_MODE; if (sattr->ia_valid & (ATTR_UID | ATTR_GID)) adjust_flags |= NFS_INO_INVALID_OTHER; + if (sattr->ia_valid & ATTR_ATIME) + adjust_flags |= NFS_INO_INVALID_ATIME; + if (sattr->ia_valid & ATTR_MTIME) + adjust_flags |= NFS_INO_INVALID_MTIME; do { nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, fattr->label), @@ -3512,7 +3593,7 @@ static bool nfs4_refresh_open_old_stateid(nfs4_stateid *dst, write_sequnlock(&state->seqlock); trace_nfs4_close_stateid_update_wait(state->inode, dst, 0); - if (fatal_signal_pending(current)) + if (fatal_signal_pending(current) || nfs_current_task_exiting()) status = -EINTR; else if (schedule_timeout(5*HZ) != 0) @@ -3700,7 +3781,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) if (calldata->arg.fmode == 0 || calldata->arg.fmode == FMODE_READ) { /* Close-to-open cache consistency revalidation */ - if (!nfs4_have_delegation(inode, FMODE_READ)) { + if (!nfs4_have_delegation(inode, FMODE_READ, 0)) { nfs4_bitmask_set(calldata->arg.bitmask_store, server->cache_consistency_bitmask, inode, 0); @@ -3710,8 +3791,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) } calldata->arg.share_access = - nfs4_map_atomic_open_share(NFS_SERVER(inode), - calldata->arg.fmode, 0); + nfs4_fmode_to_share_access(calldata->arg.fmode); if (calldata->res.fattr == NULL) calldata->arg.bitmask = NULL; @@ -3842,8 +3922,11 @@ nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync) { + struct dentry *dentry = ctx->dentry; if (ctx->state == NULL) return; + if (dentry->d_flags & DCACHE_NFSFS_RENAMED) + nfs4_inode_set_return_delegation_on_close(d_inode(dentry)); if (is_sync) nfs4_close_sync(ctx->state, _nfs4_ctx_to_openmode(ctx)); else @@ -3852,11 +3935,26 @@ static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync) #define FATTR4_WORD1_NFS40_MASK (2*FATTR4_WORD1_MOUNTED_ON_FILEID - 1UL) #define FATTR4_WORD2_NFS41_MASK (2*FATTR4_WORD2_SUPPATTR_EXCLCREAT - 1UL) -#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_XATTR_SUPPORT - 1UL) +#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_OPEN_ARGUMENTS - 1UL) + +#define FATTR4_WORD2_NFS42_TIME_DELEG_MASK \ + (FATTR4_WORD2_TIME_DELEG_MODIFY|FATTR4_WORD2_TIME_DELEG_ACCESS) +static bool nfs4_server_delegtime_capable(struct nfs4_server_caps_res *res) +{ + u32 share_access_want = res->open_caps.oa_share_access_want[0]; + u32 attr_bitmask = res->attr_bitmask[2]; + + return (share_access_want & NFS4_SHARE_WANT_DELEG_TIMESTAMPS) && + ((attr_bitmask & FATTR4_WORD2_NFS42_TIME_DELEG_MASK) == + FATTR4_WORD2_NFS42_TIME_DELEG_MASK); +} static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) { - u32 bitmask[3] = {}, minorversion = server->nfs_client->cl_minorversion; + u32 minorversion = server->nfs_client->cl_minorversion; + u32 bitmask[3] = { + [0] = FATTR4_WORD0_SUPPORTED_ATTRS, + }; struct nfs4_server_caps_arg args = { .fhandle = fhandle, .bitmask = bitmask, @@ -3878,10 +3976,19 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f FATTR4_WORD0_CASE_INSENSITIVE | FATTR4_WORD0_CASE_PRESERVING; if (minorversion) - bitmask[2] = FATTR4_WORD2_SUPPATTR_EXCLCREAT; + bitmask[2] = FATTR4_WORD2_SUPPATTR_EXCLCREAT | + FATTR4_WORD2_OPEN_ARGUMENTS; status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); if (status == 0) { + bitmask[0] = (FATTR4_WORD0_SUPPORTED_ATTRS | + FATTR4_WORD0_FH_EXPIRE_TYPE | + FATTR4_WORD0_LINK_SUPPORT | + FATTR4_WORD0_SYMLINK_SUPPORT | + FATTR4_WORD0_ACLSUPPORT | + FATTR4_WORD0_CASE_INSENSITIVE | + FATTR4_WORD0_CASE_PRESERVING) & + res.attr_bitmask[0]; /* Sanity check the server answers */ switch (minorversion) { case 0: @@ -3890,9 +3997,14 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f break; case 1: res.attr_bitmask[2] &= FATTR4_WORD2_NFS41_MASK; + bitmask[2] = FATTR4_WORD2_SUPPATTR_EXCLCREAT & + res.attr_bitmask[2]; break; case 2: res.attr_bitmask[2] &= FATTR4_WORD2_NFS42_MASK; + bitmask[2] = (FATTR4_WORD2_SUPPATTR_EXCLCREAT | + FATTR4_WORD2_OPEN_ARGUMENTS) & + res.attr_bitmask[2]; } memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask)); server->caps &= ~(NFS_CAP_ACLS | NFS_CAP_HARDLINKS | @@ -3939,6 +4051,12 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f sizeof(server->attr_bitmask)); server->attr_bitmask_nl[2] &= ~FATTR4_WORD2_SECURITY_LABEL; + if (res.open_caps.oa_share_access_want[0] & + NFS4_SHARE_WANT_OPEN_XOR_DELEGATION) + server->caps |= NFS_CAP_OPEN_XOR; + if (nfs4_server_delegtime_capable(&res)) + server->caps |= NFS_CAP_DELEGTIME; + memcpy(server->cache_consistency_bitmask, res.attr_bitmask, sizeof(server->cache_consistency_bitmask)); server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE; server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; @@ -4023,6 +4141,23 @@ static void test_fs_location_for_trunking(struct nfs4_fs_location *location, } } +static bool _is_same_nfs4_pathname(struct nfs4_pathname *path1, + struct nfs4_pathname *path2) +{ + int i; + + if (path1->ncomponents != path2->ncomponents) + return false; + for (i = 0; i < path1->ncomponents; i++) { + if (path1->components[i].len != path2->components[i].len) + return false; + if (memcmp(path1->components[i].data, path2->components[i].data, + path1->components[i].len)) + return false; + } + return true; +} + static int _nfs4_discover_trunking(struct nfs_server *server, struct nfs_fh *fhandle) { @@ -4056,9 +4191,13 @@ static int _nfs4_discover_trunking(struct nfs_server *server, if (status) goto out_free_3; - for (i = 0; i < locations->nlocations; i++) + for (i = 0; i < locations->nlocations; i++) { + if (!_is_same_nfs4_pathname(&locations->fs_path, + &locations->locations[i].rootpath)) + continue; test_fs_location_for_trunking(&locations->locations[i], clp, server); + } out_free_3: kfree(locations->fattr); out_free_2: @@ -4424,15 +4563,15 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, } static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, - struct dentry *dentry, struct nfs_fh *fhandle, - struct nfs_fattr *fattr) + struct dentry *dentry, const struct qstr *name, + struct nfs_fh *fhandle, struct nfs_fattr *fattr) { struct nfs_server *server = NFS_SERVER(dir); int status; struct nfs4_lookup_arg args = { .bitmask = server->attr_bitmask, .dir_fh = NFS_FH(dir), - .name = &dentry->d_name, + .name = name, }; struct nfs4_lookup_res res = { .server = server, @@ -4474,17 +4613,16 @@ static void nfs_fixup_secinfo_attributes(struct nfs_fattr *fattr) } static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir, - struct dentry *dentry, struct nfs_fh *fhandle, - struct nfs_fattr *fattr) + struct dentry *dentry, const struct qstr *name, + struct nfs_fh *fhandle, struct nfs_fattr *fattr) { struct nfs4_exception exception = { .interruptible = true, }; struct rpc_clnt *client = *clnt; - const struct qstr *name = &dentry->d_name; int err; do { - err = _nfs4_proc_lookup(client, dir, dentry, fhandle, fattr); + err = _nfs4_proc_lookup(client, dir, dentry, name, fhandle, fattr); trace_nfs4_lookup(dir, name, err); switch (err) { case -NFS4ERR_BADNAME: @@ -4519,13 +4657,13 @@ out: return err; } -static int nfs4_proc_lookup(struct inode *dir, struct dentry *dentry, +static int nfs4_proc_lookup(struct inode *dir, struct dentry *dentry, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { int status; struct rpc_clnt *client = NFS_CLIENT(dir); - status = nfs4_proc_lookup_common(&client, dir, dentry, fhandle, fattr); + status = nfs4_proc_lookup_common(&client, dir, dentry, name, fhandle, fattr); if (client != NFS_CLIENT(dir)) { rpc_shutdown_client(client); nfs_fixup_secinfo_attributes(fattr); @@ -4540,7 +4678,8 @@ nfs4_proc_lookup_mountpoint(struct inode *dir, struct dentry *dentry, struct rpc_clnt *client = NFS_CLIENT(dir); int status; - status = nfs4_proc_lookup_common(&client, dir, dentry, fhandle, fattr); + status = nfs4_proc_lookup_common(&client, dir, dentry, &dentry->d_name, + fhandle, fattr); if (status < 0) return ERR_PTR(status); return (client == NFS_CLIENT(dir)) ? rpc_clone_client(client) : client; @@ -4617,7 +4756,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry }; int status = 0; - if (!nfs4_have_delegation(inode, FMODE_READ)) { + if (!nfs4_have_delegation(inode, FMODE_READ, 0)) { res.fattr = nfs_alloc_fattr(); if (res.fattr == NULL) return -ENOMEM; @@ -4935,8 +5074,9 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, const struct goto out; nfs4_inode_make_writeable(inode); - nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, res.fattr->label), inode, - NFS_INO_INVALID_CHANGE); + nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, res.fattr->label), + inode, + NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME); status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); if (!status) { nfs4_update_changeattr(dir, &res.cinfo, res.fattr->time_start, @@ -5014,9 +5154,6 @@ static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_ &data->arg.seq_args, &data->res.seq_res, 1); if (status == 0) { spin_lock(&dir->i_lock); - /* Creating a directory bumps nlink in the parent */ - if (data->arg.ftype == NF4DIR) - nfs4_inc_nlink_locked(dir); nfs4_update_changeattr_locked(dir, &data->res.dir_cinfo, data->res.fattr->time_start, NFS_INO_INVALID_DATA); @@ -5026,6 +5163,25 @@ static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_ return status; } +static struct dentry *nfs4_do_mkdir(struct inode *dir, struct dentry *dentry, + struct nfs4_createdata *data) +{ + int status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &data->msg, + &data->arg.seq_args, &data->res.seq_res, 1); + + if (status) + return ERR_PTR(status); + + spin_lock(&dir->i_lock); + /* Creating a directory bumps nlink in the parent */ + nfs4_inc_nlink_locked(dir); + nfs4_update_changeattr_locked(dir, &data->res.dir_cinfo, + data->res.fattr->time_start, + NFS_INO_INVALID_DATA); + spin_unlock(&dir->i_lock); + return nfs_add_or_obtain(dentry, data->res.fh, data->res.fattr); +} + static void nfs4_free_createdata(struct nfs4_createdata *data) { nfs4_label_free(data->fattr.label); @@ -5082,32 +5238,34 @@ static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, return err; } -static int _nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, - struct iattr *sattr, struct nfs4_label *label) +static struct dentry *_nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, + struct iattr *sattr, + struct nfs4_label *label) { struct nfs4_createdata *data; - int status = -ENOMEM; + struct dentry *ret = ERR_PTR(-ENOMEM); data = nfs4_alloc_createdata(dir, &dentry->d_name, sattr, NF4DIR); if (data == NULL) goto out; data->arg.label = label; - status = nfs4_do_create(dir, dentry, data); + ret = nfs4_do_mkdir(dir, dentry, data); nfs4_free_createdata(data); out: - return status; + return ret; } -static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, - struct iattr *sattr) +static struct dentry *nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, + struct iattr *sattr) { struct nfs_server *server = NFS_SERVER(dir); struct nfs4_exception exception = { .interruptible = true, }; struct nfs4_label l, *label; + struct dentry *alias; int err; label = nfs4_label_init_security(dir, dentry, sattr, &l); @@ -5115,14 +5273,15 @@ static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, if (!(server->attr_bitmask[2] & FATTR4_WORD2_MODE_UMASK)) sattr->ia_mode &= ~current_umask(); do { - err = _nfs4_proc_mkdir(dir, dentry, sattr, label); + alias = _nfs4_proc_mkdir(dir, dentry, sattr, label); + err = PTR_ERR_OR_ZERO(alias); trace_nfs4_mkdir(dir, &dentry->d_name, err); err = nfs4_handle_exception(NFS_SERVER(dir), err, &exception); } while (exception.retry); nfs4_label_release_security(label); - return err; + return alias; } static int _nfs4_proc_readdir(struct nfs_readdir_arg *nr_arg, @@ -5456,7 +5615,7 @@ static bool nfs4_read_plus_not_supported(struct rpc_task *task, struct rpc_message *msg = &task->tk_msg; if (msg->rpc_proc == &nfs4_procedures[NFSPROC4_CLNT_READ_PLUS] && - server->caps & NFS_CAP_READ_PLUS && task->tk_status == -ENOTSUPP) { + task->tk_status == -ENOTSUPP) { server->caps &= ~NFS_CAP_READ_PLUS; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; rpc_restart_call_prepare(task); @@ -5586,7 +5745,7 @@ bool nfs4_write_need_cache_consistency_data(struct nfs_pgio_header *hdr) /* Otherwise, request attributes if and only if we don't hold * a delegation */ - return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0; + return nfs4_have_delegation(hdr->inode, FMODE_READ, 0) == 0; } void nfs4_bitmask_set(__u32 bitmask[], const __u32 src[], @@ -6148,7 +6307,7 @@ static int _nfs4_get_security_label(struct inode *inode, void *buf, size_t buflen) { struct nfs_server *server = NFS_SERVER(inode); - struct nfs4_label label = {0, 0, buflen, buf}; + struct nfs4_label label = {0, 0, 0, buflen, buf}; u32 bitmask[3] = { 0, 0, FATTR4_WORD2_SECURITY_LABEL }; struct nfs_fattr fattr = { @@ -6253,7 +6412,7 @@ static int nfs4_do_set_security_label(struct inode *inode, static int nfs4_set_security_label(struct inode *inode, const void *buf, size_t buflen) { - struct nfs4_label ilabel = {0, 0, buflen, (char *)buf }; + struct nfs4_label ilabel = {0, 0, 0, buflen, (char *)buf }; struct nfs_fattr *fattr; int status; @@ -6268,6 +6427,7 @@ nfs4_set_security_label(struct inode *inode, const void *buf, size_t buflen) if (status == 0) nfs_setsecurity(inode, fattr); + nfs_free_fattr(fattr); return status; } #endif /* CONFIG_NFS_V4_SECURITY_LABEL */ @@ -6553,6 +6713,7 @@ struct nfs4_delegreturndata { u32 roc_barrier; bool roc; } lr; + struct nfs4_delegattr sattr; struct nfs_fattr fattr; int rpc_status; struct inode *inode; @@ -6577,6 +6738,30 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) &data->res.lr_ret) == -EAGAIN) goto out_restart; + if (data->args.sattr_args && task->tk_status != 0) { + switch(data->res.sattr_ret) { + case 0: + data->args.sattr_args = NULL; + data->res.sattr_res = false; + break; + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_DELEG_REVOKED: + case -NFS4ERR_EXPIRED: + case -NFS4ERR_BAD_STATEID: + /* Let the main handler below do stateid recovery */ + break; + case -NFS4ERR_OLD_STATEID: + if (nfs4_refresh_delegation_stateid(&data->stateid, + data->inode)) + goto out_restart; + fallthrough; + default: + data->args.sattr_args = NULL; + data->res.sattr_res = false; + goto out_restart; + } + } + switch (task->tk_status) { case 0: renew_lease(data->res.server, data->timestamp); @@ -6670,7 +6855,10 @@ static const struct rpc_call_ops nfs4_delegreturn_ops = { .rpc_release = nfs4_delegreturn_release, }; -static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, const nfs4_stateid *stateid, int issync) +static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, + const nfs4_stateid *stateid, + struct nfs_delegation *delegation, + int issync) { struct nfs4_delegreturndata *data; struct nfs_server *server = NFS_SERVER(inode); @@ -6722,12 +6910,27 @@ static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, } } + if (delegation && + test_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags)) { + if (delegation->type & FMODE_READ) { + data->sattr.atime = inode_get_atime(inode); + data->sattr.atime_set = true; + } + if (delegation->type & FMODE_WRITE) { + data->sattr.mtime = inode_get_mtime(inode); + data->sattr.mtime_set = true; + } + data->args.sattr_args = &data->sattr; + data->res.sattr_res = true; + } + if (!data->inode) nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1, 1); else nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1, 0); + task_setup_data.callback_data = data; msg.rpc_argp = &data->args; msg.rpc_resp = &data->res; @@ -6745,13 +6948,16 @@ out: return status; } -int nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, const nfs4_stateid *stateid, int issync) +int nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, + const nfs4_stateid *stateid, + struct nfs_delegation *delegation, int issync) { struct nfs_server *server = NFS_SERVER(inode); struct nfs4_exception exception = { }; int err; do { - err = _nfs4_proc_delegreturn(inode, cred, stateid, issync); + err = _nfs4_proc_delegreturn(inode, cred, stateid, + delegation, issync); trace_nfs4_delegreturn(inode, stateid, err); switch (err) { case -NFS4ERR_STALE_STATEID: @@ -6877,10 +7083,18 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl, struct nfs4_unlockdata *p; struct nfs4_state *state = lsp->ls_state; struct inode *inode = state->inode; + struct nfs_lock_context *l_ctx; p = kzalloc(sizeof(*p), GFP_KERNEL); if (p == NULL) return NULL; + l_ctx = nfs_get_lock_context(ctx); + if (!IS_ERR(l_ctx)) { + p->l_ctx = l_ctx; + } else { + kfree(p); + return NULL; + } p->arg.fh = NFS_FH(inode); p->arg.fl = &p->fl; p->arg.seqid = seqid; @@ -6888,7 +7102,6 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl, p->lsp = lsp; /* Ensure we don't close file until we're done freeing locks! */ p->ctx = get_nfs_open_context(ctx); - p->l_ctx = nfs_get_lock_context(ctx); locks_init_lock(&p->fl); locks_copy_lock(&p->fl, fl); p->server = NFS_SERVER(inode); @@ -7607,10 +7820,10 @@ static int nfs4_add_lease(struct file *file, int arg, struct file_lease **lease, int ret; /* No delegation, no lease */ - if (!nfs4_have_delegation(inode, type)) + if (!nfs4_have_delegation(inode, type, 0)) return -EAGAIN; ret = generic_setlease(file, arg, lease, priv); - if (ret || nfs4_have_delegation(inode, type)) + if (ret || nfs4_have_delegation(inode, type, 0)) return ret; /* We raced with a delegation return */ nfs4_delete_lease(file, priv); @@ -8818,7 +9031,7 @@ nfs4_run_exchange_id(struct nfs_client *clp, const struct cred *cred, #ifdef CONFIG_NFS_V4_1_MIGRATION calldata->args.flags |= EXCHGID4_FLAG_SUPP_MOVED_MIGR; #endif - if (test_bit(NFS_CS_DS, &clp->cl_flags)) + if (test_bit(NFS_CS_PNFS, &clp->cl_flags)) calldata->args.flags |= EXCHGID4_FLAG_USE_PNFS_DS; msg.rpc_argp = &calldata->args; msg.rpc_resp = &calldata->res; @@ -9405,7 +9618,7 @@ static void nfs41_sequence_call_done(struct rpc_task *task, void *data) return; trace_nfs4_sequence(clp, task->tk_status); - if (task->tk_status < 0 && !task->tk_client->cl_shutdown) { + if (task->tk_status < 0 && clp->cl_cons_state >= 0) { dprintk("%s ERROR %d\n", __func__, task->tk_status); if (refcount_read(&clp->cl_count) == 1) return; @@ -9832,6 +10045,11 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) if (!nfs41_sequence_process(task, &lrp->res.seq_res)) return; + if (task->tk_rpc_status == -ETIMEDOUT) { + lrp->rpc_status = -EAGAIN; + lrp->res.lrs_present = 0; + return; + } /* * Was there an RPC level error? Assume the call succeeded, * and that we need to release the layout @@ -9851,13 +10069,25 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) fallthrough; default: task->tk_status = 0; + lrp->res.lrs_present = 0; fallthrough; case 0: break; + case -NFS4ERR_BADSESSION: + case -NFS4ERR_DEADSESSION: + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + nfs4_schedule_session_recovery(server->nfs_client->cl_session, + task->tk_status); + lrp->res.lrs_present = 0; + lrp->rpc_status = -EAGAIN; + task->tk_status = 0; + break; case -NFS4ERR_DELAY: - if (nfs4_async_handle_error(task, server, NULL, NULL) != -EAGAIN) - break; - goto out_restart; + if (nfs4_async_handle_error(task, server, NULL, NULL) == + -EAGAIN) + goto out_restart; + lrp->res.lrs_present = 0; + break; } return; out_restart: @@ -9871,8 +10101,13 @@ static void nfs4_layoutreturn_release(void *calldata) struct nfs4_layoutreturn *lrp = calldata; struct pnfs_layout_hdr *lo = lrp->args.layout; - pnfs_layoutreturn_free_lsegs(lo, &lrp->args.stateid, &lrp->args.range, + if (lrp->rpc_status == 0 || !lrp->inode) + pnfs_layoutreturn_free_lsegs( + lo, &lrp->args.stateid, &lrp->args.range, lrp->res.lrs_present ? &lrp->res.stateid : NULL); + else + pnfs_layoutreturn_retry_later(lo, &lrp->args.stateid, + &lrp->args.range); nfs4_sequence_free_slot(&lrp->res.seq_res); if (lrp->ld_private.ops && lrp->ld_private.ops->free) lrp->ld_private.ops->free(&lrp->ld_private); @@ -9888,7 +10123,7 @@ static const struct rpc_call_ops nfs4_layoutreturn_call_ops = { .rpc_release = nfs4_layoutreturn_release, }; -int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync) +int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, unsigned int flags) { struct rpc_task *task; struct rpc_message msg = { @@ -9911,7 +10146,7 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync) &task_setup_data.rpc_client, &msg); lrp->inode = nfs_igrab_and_active(lrp->args.inode); - if (!sync) { + if (flags & PNFS_FL_LAYOUTRETURN_ASYNC) { if (!lrp->inode) { nfs4_layoutreturn_release(lrp); return -EAGAIN; @@ -9919,6 +10154,8 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync) task_setup_data.flags |= RPC_TASK_ASYNC; } if (!lrp->inode) + flags |= PNFS_FL_LAYOUTRETURN_PRIVILEGED; + if (flags & PNFS_FL_LAYOUTRETURN_PRIVILEGED) nfs4_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1, 1); else @@ -9927,7 +10164,7 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync) task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); - if (sync) + if (!(flags & PNFS_FL_LAYOUTRETURN_ASYNC)) status = task->tk_status; trace_nfs4_layoutreturn(lrp->args.inode, &lrp->args.stateid, status); dprintk("<-- %s status=%d\n", __func__, status); @@ -10245,12 +10482,12 @@ out: } static int _nfs41_test_stateid(struct nfs_server *server, - nfs4_stateid *stateid, - const struct cred *cred) + const nfs4_stateid *stateid, + const struct cred *cred) { int status; struct nfs41_test_stateid_args args = { - .stateid = stateid, + .stateid = *stateid, }; struct nfs41_test_stateid_res res; struct rpc_message msg = { @@ -10306,8 +10543,8 @@ static void nfs4_handle_delay_or_session_error(struct nfs_server *server, * failed or the state ID is not currently valid. */ static int nfs41_test_stateid(struct nfs_server *server, - nfs4_stateid *stateid, - const struct cred *cred) + const nfs4_stateid *stateid, + const struct cred *cred) { struct nfs4_exception exception = { .interruptible = true, @@ -10585,7 +10822,8 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { | NFS_CAP_CLONE | NFS_CAP_LAYOUTERROR | NFS_CAP_READ_PLUS - | NFS_CAP_MOVEABLE, + | NFS_CAP_MOVEABLE + | NFS_CAP_OFFLOAD_STATUS, .init_client = nfs41_init_client, .shutdown_client = nfs41_shutdown_client, .match_stateid = nfs41_match_stateid, @@ -10737,6 +10975,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .close_context = nfs4_close_context, .open_context = nfs4_atomic_open, .have_delegation = nfs4_have_delegation, + .return_delegation = nfs4_inode_return_delegation, .alloc_client = nfs4_alloc_client, .init_client = nfs4_init_client, .free_client = nfs4_free_client, diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h index 351616c61df5..f9c291e2165c 100644 --- a/fs/nfs/nfs4session.h +++ b/fs/nfs/nfs4session.h @@ -148,16 +148,12 @@ static inline void nfs4_copy_sessionid(struct nfs4_sessionid *dst, memcpy(dst->data, src->data, NFS4_MAX_SESSIONID_LEN); } -#ifdef CONFIG_CRC32 /* * nfs_session_id_hash - calculate the crc32 hash for the session id * @session - pointer to session */ #define nfs_session_id_hash(sess_id) \ (~crc32_le(0xFFFFFFFF, &(sess_id)->data[0], sizeof((sess_id)->data))) -#else -#define nfs_session_id_hash(session) (0) -#endif #else /* defined(CONFIG_NFS_V4_1) */ static inline int nfs4_init_session(struct nfs_client *clp) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 662e86ea3a2d..7612e977e80b 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -501,11 +501,7 @@ nfs4_alloc_state_owner(struct nfs_server *server, sp = kzalloc(sizeof(*sp), gfp_flags); if (!sp) return NULL; - sp->so_seqid.owner_id = ida_alloc(&server->openowner_id, gfp_flags); - if (sp->so_seqid.owner_id < 0) { - kfree(sp); - return NULL; - } + sp->so_seqid.owner_id = atomic64_inc_return(&server->owner_ctr); sp->so_server = server; sp->so_cred = get_cred(cred); spin_lock_init(&sp->so_lock); @@ -536,7 +532,6 @@ static void nfs4_free_state_owner(struct nfs4_state_owner *sp) { nfs4_destroy_seqid_counter(&sp->so_seqid); put_cred(sp->so_cred); - ida_free(&sp->so_server->openowner_id, sp->so_seqid.owner_id); kfree(sp); } @@ -879,19 +874,13 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f refcount_set(&lsp->ls_count, 1); lsp->ls_state = state; lsp->ls_owner = owner; - lsp->ls_seqid.owner_id = ida_alloc(&server->lockowner_id, GFP_KERNEL_ACCOUNT); - if (lsp->ls_seqid.owner_id < 0) - goto out_free; + lsp->ls_seqid.owner_id = atomic64_inc_return(&server->owner_ctr); INIT_LIST_HEAD(&lsp->ls_locks); return lsp; -out_free: - kfree(lsp); - return NULL; } void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp) { - ida_free(&server->lockowner_id, lsp->ls_seqid.owner_id); nfs4_destroy_seqid_counter(&lsp->ls_seqid); kfree(lsp); } @@ -1094,14 +1083,12 @@ void nfs_release_seqid(struct nfs_seqid *seqid) return; sequence = seqid->sequence; spin_lock(&sequence->lock); - list_del_init(&seqid->list); - if (!list_empty(&sequence->list)) { - struct nfs_seqid *next; - - next = list_first_entry(&sequence->list, - struct nfs_seqid, list); + if (list_is_first(&seqid->list, &sequence->list) && + !list_is_singular(&sequence->list)) { + struct nfs_seqid *next = list_next_entry(seqid, list); rpc_wake_up_queued_task(&sequence->wait, next->task); } + list_del_init(&seqid->list); spin_unlock(&sequence->lock); } @@ -1211,7 +1198,7 @@ void nfs4_schedule_state_manager(struct nfs_client *clp) struct rpc_clnt *clnt = clp->cl_rpcclient; bool swapon = false; - if (clnt->cl_shutdown) + if (clp->cl_cons_state < 0) return; set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state); @@ -1416,7 +1403,7 @@ int nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4_ dprintk("%s: scheduling stateid recovery for server %s\n", __func__, clp->cl_hostname); nfs4_schedule_state_manager(clp); - return 0; + return clp->cl_cons_state < 0 ? clp->cl_cons_state : 0; } EXPORT_SYMBOL_GPL(nfs4_schedule_stateid_recovery); @@ -1596,7 +1583,7 @@ static void nfs42_complete_copies(struct nfs4_state_owner *sp, struct nfs4_state complete(©->completion); } } - list_for_each_entry(copy, &sp->so_server->ss_copies, src_copies) { + list_for_each_entry(copy, &sp->so_server->ss_src_copies, src_copies) { if ((test_bit(NFS_CLNT_SRC_SSC_COPY_STATE, &state->flags) && !nfs4_stateid_match_other(&state->stateid, ©->parent_src_state->stateid))) @@ -1863,6 +1850,7 @@ static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp) if (!nfs4_state_clear_reclaim_reboot(clp)) return; + pnfs_destroy_all_layouts(clp); ops = clp->cl_mvops->reboot_recovery_ops; cred = nfs4_get_clid_cred(clp); err = nfs4_reclaim_complete(clp, ops, cred); @@ -1956,6 +1944,7 @@ restart: set_bit(ops->owner_flag_bit, &sp->so_flags); nfs4_put_state_owner(sp); status = nfs4_recovery_handle_error(clp, status); + nfs4_free_state_owners(&freeme); return (status != 0) ? status : -EAGAIN; } @@ -1966,6 +1955,7 @@ restart: } rcu_read_unlock(); nfs4_free_state_owners(&freeme); + nfs_local_probe_async(clp); if (lost_locks) pr_warn("NFS: %s: lost %d locks\n", clp->cl_hostname, lost_locks); @@ -2022,6 +2012,12 @@ static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status) nfs_mark_client_ready(clp, -EPERM); clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); return -EPERM; + case -ETIMEDOUT: + if (clp->cl_cons_state == NFS_CS_SESSION_INITING) { + nfs_mark_client_ready(clp, -EIO); + return -EIO; + } + fallthrough; case -EACCES: case -NFS4ERR_DELAY: case -EAGAIN: @@ -2068,7 +2064,6 @@ static int nfs4_establish_lease(struct nfs_client *clp) put_cred(cred); if (status != 0) return status; - pnfs_destroy_all_layouts(clp); return 0; } @@ -2116,6 +2111,7 @@ static int nfs4_try_migration(struct nfs_server *server, const struct cred *cred { struct nfs_client *clp = server->nfs_client; struct nfs4_fs_locations *locations = NULL; + struct nfs_fattr *fattr; struct inode *inode; struct page *page; int status, result; @@ -2125,19 +2121,16 @@ static int nfs4_try_migration(struct nfs_server *server, const struct cred *cred (unsigned long long)server->fsid.minor, clp->cl_hostname); - result = 0; page = alloc_page(GFP_KERNEL); locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL); - if (page == NULL || locations == NULL) { - dprintk("<-- %s: no memory\n", __func__); - goto out; - } - locations->fattr = nfs_alloc_fattr(); - if (locations->fattr == NULL) { + fattr = nfs_alloc_fattr(); + if (page == NULL || locations == NULL || fattr == NULL) { dprintk("<-- %s: no memory\n", __func__); + result = 0; goto out; } + locations->fattr = fattr; inode = d_inode(server->super->s_root); result = nfs4_proc_get_locations(server, NFS_FH(inode), locations, page, cred); @@ -2682,6 +2675,8 @@ static void nfs4_state_manager(struct nfs_client *clp) section = "reclaim reboot"; status = nfs4_do_reclaim(clp, clp->cl_mvops->reboot_recovery_ops); + if (status == 0) + status = pnfs_layout_handle_reboot(clp); if (status == -EAGAIN) continue; if (status < 0) @@ -2744,7 +2739,15 @@ out_error: pr_warn_ratelimited("NFS: state manager%s%s failed on NFSv4 server %s" " with error %d\n", section_sep, section, clp->cl_hostname, -status); - ssleep(1); + switch (status) { + case -ENETDOWN: + case -ENETUNREACH: + nfs_mark_client_ready(clp, -EIO); + break; + default: + ssleep(1); + break; + } out_drain: memalloc_nofs_restore(memflags); nfs4_end_drain_session(clp); diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 8da5a9c000f4..b29a26923ce0 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -332,6 +332,7 @@ static void __exit exit_nfs_v4(void) nfs_dns_resolver_destroy(); } +MODULE_DESCRIPTION("NFSv4 client support"); MODULE_LICENSE("GPL"); module_init(init_nfs_v4); diff --git a/fs/nfs/nfs4sysctl.c b/fs/nfs/nfs4sysctl.c index 886a7c4c60b3..d1a92d8f8ba4 100644 --- a/fs/nfs/nfs4sysctl.c +++ b/fs/nfs/nfs4sysctl.c @@ -17,7 +17,7 @@ static const int nfs_set_port_min; static const int nfs_set_port_max = 65535; static struct ctl_table_header *nfs4_callback_sysctl_table; -static struct ctl_table nfs4_cb_sysctls[] = { +static const struct ctl_table nfs4_cb_sysctls[] = { { .procname = "nfs_callback_tcpport", .data = &nfs_callback_set_tcpport, diff --git a/fs/nfs/nfs4trace.c b/fs/nfs/nfs4trace.c index d22c6670f770..389941ccc9c9 100644 --- a/fs/nfs/nfs4trace.c +++ b/fs/nfs/nfs4trace.c @@ -2,6 +2,8 @@ /* * Copyright (c) 2013 Trond Myklebust <Trond.Myklebust@netapp.com> */ +#include <uapi/linux/pr.h> +#include <linux/blkdev.h> #include <linux/nfs_fs.h> #include "nfs4_fs.h" #include "internal.h" @@ -29,5 +31,10 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_read_error); EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_write_error); EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_commit_error); +EXPORT_TRACEPOINT_SYMBOL_GPL(bl_pr_key_reg); +EXPORT_TRACEPOINT_SYMBOL_GPL(bl_pr_key_reg_err); +EXPORT_TRACEPOINT_SYMBOL_GPL(bl_pr_key_unreg); +EXPORT_TRACEPOINT_SYMBOL_GPL(bl_pr_key_unreg_err); + EXPORT_TRACEPOINT_SYMBOL_GPL(fl_getdevinfo); #endif diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 10985a4b8259..deab4c0e21a0 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -47,7 +47,7 @@ DECLARE_EVENT_CLASS(nfs4_clientid_event, TP_fast_assign( __entry->error = error < 0 ? -error : 0; - __assign_str(dstaddr, clp->cl_hostname); + __assign_str(dstaddr); ), TP_printk( @@ -94,8 +94,8 @@ TRACE_EVENT(nfs4_trunked_exchange_id, TP_fast_assign( __entry->error = error < 0 ? -error : 0; - __assign_str(main_addr, clp->cl_hostname); - __assign_str(trunk_addr, addr); + __assign_str(main_addr); + __assign_str(trunk_addr); ), TP_printk( @@ -365,7 +365,7 @@ TRACE_EVENT(nfs4_state_mgr, TP_fast_assign( __entry->state = clp->cl_state; - __assign_str(hostname, clp->cl_hostname); + __assign_str(hostname); ), TP_printk( @@ -393,8 +393,8 @@ TRACE_EVENT(nfs4_state_mgr_failed, TP_fast_assign( __entry->error = status < 0 ? -status : 0; __entry->state = clp->cl_state; - __assign_str(hostname, clp->cl_hostname); - __assign_str(section, section); + __assign_str(hostname); + __assign_str(section); ), TP_printk( @@ -578,7 +578,7 @@ DECLARE_EVENT_CLASS(nfs4_open_event, __entry->fhandle = 0; } __entry->dir = NFS_FILEID(d_inode(ctx->dentry->d_parent)); - __assign_str(name, ctx->dentry->d_name.name); + __assign_str(name); ), TP_printk( @@ -1072,7 +1072,7 @@ DECLARE_EVENT_CLASS(nfs4_lookup_event, __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); __entry->error = -error; - __assign_str(name, name->name); + __assign_str(name); ), TP_printk( @@ -1156,8 +1156,8 @@ TRACE_EVENT(nfs4_rename, __entry->olddir = NFS_FILEID(olddir); __entry->newdir = NFS_FILEID(newdir); __entry->error = error < 0 ? -error : 0; - __assign_str(oldname, oldname->name); - __assign_str(newname, newname->name); + __assign_str(oldname); + __assign_str(newname); ), TP_printk( @@ -1359,7 +1359,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_callback_event, __entry->fileid = 0; __entry->dev = 0; } - __assign_str(dstaddr, clp ? clp->cl_hostname : "unknown"); + __assign_str(dstaddr); ), TP_printk( @@ -1416,7 +1416,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_callback_event, __entry->fileid = 0; __entry->dev = 0; } - __assign_str(dstaddr, clp ? clp->cl_hostname : "unknown"); + __assign_str(dstaddr); __entry->stateid_seq = be32_to_cpu(stateid->seqid); __entry->stateid_hash = @@ -1960,7 +1960,7 @@ DECLARE_EVENT_CLASS(nfs4_deviceid_event, ), TP_fast_assign( - __assign_str(dstaddr, clp->cl_hostname); + __assign_str(dstaddr); memcpy(__entry->deviceid, deviceid->data, NFS4_DEVICEID4_SIZE); ), @@ -1998,7 +1998,7 @@ DECLARE_EVENT_CLASS(nfs4_deviceid_status, TP_fast_assign( __entry->dev = server->s_dev; __entry->status = status; - __assign_str(dstaddr, server->nfs_client->cl_hostname); + __assign_str(dstaddr); memcpy(__entry->deviceid, deviceid->data, NFS4_DEVICEID4_SIZE); ), @@ -2036,8 +2036,8 @@ TRACE_EVENT(fl_getdevinfo, ), TP_fast_assign( - __assign_str(mds_addr, server->nfs_client->cl_hostname); - __assign_str(ds_ips, ds_remotestr); + __assign_str(mds_addr); + __assign_str(ds_ips); memcpy(__entry->deviceid, deviceid->data, NFS4_DEVICEID4_SIZE); ), @@ -2051,13 +2051,15 @@ TRACE_EVENT(fl_getdevinfo, DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event, TP_PROTO( - const struct nfs_pgio_header *hdr + const struct nfs_pgio_header *hdr, + int error ), - TP_ARGS(hdr), + TP_ARGS(hdr, error), TP_STRUCT__entry( __field(unsigned long, error) + __field(unsigned long, nfs_error) __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) @@ -2073,7 +2075,8 @@ DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event, TP_fast_assign( const struct inode *inode = hdr->inode; - __entry->error = hdr->res.op_status; + __entry->error = -error; + __entry->nfs_error = hdr->res.op_status; __entry->fhandle = nfs_fhandle_hash(hdr->args.fh); __entry->fileid = NFS_FILEID(inode); __entry->dev = inode->i_sb->s_dev; @@ -2083,14 +2086,13 @@ DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event, be32_to_cpu(hdr->args.stateid.seqid); __entry->stateid_hash = nfs_stateid_hash(&hdr->args.stateid); - __assign_str(dstaddr, hdr->ds_clp ? - rpc_peeraddr2str(hdr->ds_clp->cl_rpcclient, - RPC_DISPLAY_ADDR) : "unknown"); + __assign_str(dstaddr); ), TP_printk( "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " - "offset=%llu count=%u stateid=%d:0x%08x dstaddr=%s", + "offset=%llu count=%u stateid=%d:0x%08x dstaddr=%s " + "nfs_error=%lu (%s)", -__entry->error, show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), @@ -2098,28 +2100,32 @@ DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event, __entry->fhandle, __entry->offset, __entry->count, __entry->stateid_seq, __entry->stateid_hash, - __get_str(dstaddr) + __get_str(dstaddr), __entry->nfs_error, + show_nfs4_status(__entry->nfs_error) ) ); #define DEFINE_NFS4_FLEXFILES_IO_EVENT(name) \ DEFINE_EVENT(nfs4_flexfiles_io_event, name, \ TP_PROTO( \ - const struct nfs_pgio_header *hdr \ + const struct nfs_pgio_header *hdr, \ + int error \ ), \ - TP_ARGS(hdr)) + TP_ARGS(hdr, error)) DEFINE_NFS4_FLEXFILES_IO_EVENT(ff_layout_read_error); DEFINE_NFS4_FLEXFILES_IO_EVENT(ff_layout_write_error); TRACE_EVENT(ff_layout_commit_error, TP_PROTO( - const struct nfs_commit_data *data + const struct nfs_commit_data *data, + int error ), - TP_ARGS(data), + TP_ARGS(data, error), TP_STRUCT__entry( __field(unsigned long, error) + __field(unsigned long, nfs_error) __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) @@ -2133,30 +2139,118 @@ TRACE_EVENT(ff_layout_commit_error, TP_fast_assign( const struct inode *inode = data->inode; - __entry->error = data->res.op_status; + __entry->error = -error; + __entry->nfs_error = data->res.op_status; __entry->fhandle = nfs_fhandle_hash(data->args.fh); __entry->fileid = NFS_FILEID(inode); __entry->dev = inode->i_sb->s_dev; __entry->offset = data->args.offset; __entry->count = data->args.count; - __assign_str(dstaddr, data->ds_clp ? - rpc_peeraddr2str(data->ds_clp->cl_rpcclient, - RPC_DISPLAY_ADDR) : "unknown"); + __assign_str(dstaddr); ), TP_printk( "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " - "offset=%llu count=%u dstaddr=%s", + "offset=%llu count=%u dstaddr=%s nfs_error=%lu (%s)", -__entry->error, show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, __entry->offset, __entry->count, - __get_str(dstaddr) + __get_str(dstaddr), __entry->nfs_error, + show_nfs4_status(__entry->nfs_error) ) ); +DECLARE_EVENT_CLASS(pnfs_bl_pr_key_class, + TP_PROTO( + const struct block_device *bdev, + u64 key + ), + TP_ARGS(bdev, key), + TP_STRUCT__entry( + __field(u64, key) + __field(dev_t, dev) + __string(device, bdev->bd_disk->disk_name) + ), + TP_fast_assign( + __entry->key = key; + __entry->dev = bdev->bd_dev; + __assign_str(device); + ), + TP_printk("dev=%d,%d (%s) key=0x%016llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __get_str(device), __entry->key + ) +); + +#define DEFINE_NFS4_BLOCK_PRKEY_EVENT(name) \ + DEFINE_EVENT(pnfs_bl_pr_key_class, name, \ + TP_PROTO( \ + const struct block_device *bdev, \ + u64 key \ + ), \ + TP_ARGS(bdev, key)) +DEFINE_NFS4_BLOCK_PRKEY_EVENT(bl_pr_key_reg); +DEFINE_NFS4_BLOCK_PRKEY_EVENT(bl_pr_key_unreg); + +/* + * From uapi/linux/pr.h + */ +TRACE_DEFINE_ENUM(PR_STS_SUCCESS); +TRACE_DEFINE_ENUM(PR_STS_IOERR); +TRACE_DEFINE_ENUM(PR_STS_RESERVATION_CONFLICT); +TRACE_DEFINE_ENUM(PR_STS_RETRY_PATH_FAILURE); +TRACE_DEFINE_ENUM(PR_STS_PATH_FAST_FAILED); +TRACE_DEFINE_ENUM(PR_STS_PATH_FAILED); + +#define show_pr_status(x) \ + __print_symbolic(x, \ + { PR_STS_SUCCESS, "SUCCESS" }, \ + { PR_STS_IOERR, "IOERR" }, \ + { PR_STS_RESERVATION_CONFLICT, "RESERVATION_CONFLICT" }, \ + { PR_STS_RETRY_PATH_FAILURE, "RETRY_PATH_FAILURE" }, \ + { PR_STS_PATH_FAST_FAILED, "PATH_FAST_FAILED" }, \ + { PR_STS_PATH_FAILED, "PATH_FAILED" }) + +DECLARE_EVENT_CLASS(pnfs_bl_pr_key_err_class, + TP_PROTO( + const struct block_device *bdev, + u64 key, + int status + ), + TP_ARGS(bdev, key, status), + TP_STRUCT__entry( + __field(u64, key) + __field(dev_t, dev) + __field(unsigned long, status) + __string(device, bdev->bd_disk->disk_name) + ), + TP_fast_assign( + __entry->key = key; + __entry->dev = bdev->bd_dev; + __entry->status = status; + __assign_str(device); + ), + TP_printk("dev=%d,%d (%s) key=0x%016llx status=%s", + MAJOR(__entry->dev), MINOR(__entry->dev), + __get_str(device), __entry->key, + show_pr_status(__entry->status) + ) +); + +#define DEFINE_NFS4_BLOCK_PRKEY_ERR_EVENT(name) \ + DEFINE_EVENT(pnfs_bl_pr_key_err_class, name, \ + TP_PROTO( \ + const struct block_device *bdev, \ + u64 key, \ + int status \ + ), \ + TP_ARGS(bdev, key, status)) +DEFINE_NFS4_BLOCK_PRKEY_ERR_EVENT(bl_pr_key_reg_err); +DEFINE_NFS4_BLOCK_PRKEY_ERR_EVENT(bl_pr_key_unreg_err); + #ifdef CONFIG_NFS_V4_2 TRACE_DEFINE_ENUM(NFS4_CONTENT_DATA); TRACE_DEFINE_ENUM(NFS4_CONTENT_HOLE); @@ -2524,7 +2618,7 @@ TRACE_EVENT(nfs4_copy_notify, ) ); -TRACE_EVENT(nfs4_offload_cancel, +DECLARE_EVENT_CLASS(nfs4_offload_class, TP_PROTO( const struct nfs42_offload_status_args *args, int error @@ -2556,6 +2650,15 @@ TRACE_EVENT(nfs4_offload_cancel, __entry->stateid_seq, __entry->stateid_hash ) ); +#define DEFINE_NFS4_OFFLOAD_EVENT(name) \ + DEFINE_EVENT(nfs4_offload_class, name, \ + TP_PROTO( \ + const struct nfs42_offload_status_args *args, \ + int error \ + ), \ + TP_ARGS(args, error)) +DEFINE_NFS4_OFFLOAD_EVENT(nfs4_offload_cancel); +DEFINE_NFS4_OFFLOAD_EVENT(nfs4_offload_status); DECLARE_EVENT_CLASS(nfs4_xattr_event, TP_PROTO( @@ -2579,7 +2682,7 @@ DECLARE_EVENT_CLASS(nfs4_xattr_event, __entry->dev = inode->i_sb->s_dev; __entry->fileid = NFS_FILEID(inode); __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); - __assign_str(name, name); + __assign_str(name); ), TP_printk( diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 1416099dfcd1..55bef5fbfa47 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -52,6 +52,7 @@ #include <linux/nfs.h> #include <linux/nfs4.h> #include <linux/nfs_fs.h> +#include <linux/nfs_common.h> #include "nfs4_fs.h" #include "nfs4trace.h" @@ -63,11 +64,7 @@ #define NFSDBG_FACILITY NFSDBG_XDR -/* Mapping from NFS error code to "errno" error code. */ -#define errno_NFSERR_IO EIO - struct compound_hdr; -static int nfs4_stat_to_errno(int); static void encode_layoutget(struct xdr_stream *xdr, const struct nfs4_layoutget_args *args, struct compound_hdr *hdr); @@ -85,9 +82,8 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, * we currently use size 2 (u64) out of (NFS4_OPAQUE_LIMIT >> 2) */ #define pagepad_maxsz (1) -#define open_owner_id_maxsz (1 + 2 + 1 + 1 + 2) -#define lock_owner_id_maxsz (1 + 1 + 4) -#define decode_lockowner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) +#define open_owner_id_maxsz (2 + 1 + 2 + 2) +#define lock_owner_id_maxsz (2 + 1 + 2) #define compound_encode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2)) #define compound_decode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2)) #define op_encode_hdr_maxsz (1) @@ -188,7 +184,7 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, #define encode_claim_null_maxsz (1 + nfs4_name_maxsz) #define encode_open_maxsz (op_encode_hdr_maxsz + \ 2 + encode_share_access_maxsz + 2 + \ - open_owner_id_maxsz + \ + 1 + open_owner_id_maxsz + \ encode_opentype_maxsz + \ encode_claim_null_maxsz) #define decode_space_limit_maxsz (3) @@ -224,6 +220,11 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, encode_attrs_maxsz) #define decode_setattr_maxsz (op_decode_hdr_maxsz + \ nfs4_fattr_bitmap_maxsz) +#define encode_delegattr_maxsz (op_encode_hdr_maxsz + \ + encode_stateid_maxsz + \ + nfs4_fattr_bitmap_maxsz + \ + 2*nfstime4_maxsz) +#define decode_delegattr_maxsz (decode_setattr_maxsz) #define encode_read_maxsz (op_encode_hdr_maxsz + \ encode_stateid_maxsz + 3) #define decode_read_maxsz (op_decode_hdr_maxsz + 2 + pagepad_maxsz) @@ -253,13 +254,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, #define encode_link_maxsz (op_encode_hdr_maxsz + \ nfs4_name_maxsz) #define decode_link_maxsz (op_decode_hdr_maxsz + decode_change_info_maxsz) -#define encode_lockowner_maxsz (7) +#define encode_lockowner_maxsz (2 + 1 + lock_owner_id_maxsz) + #define encode_lock_maxsz (op_encode_hdr_maxsz + \ 7 + \ 1 + encode_stateid_maxsz + 1 + \ encode_lockowner_maxsz) #define decode_lock_denied_maxsz \ - (8 + decode_lockowner_maxsz) + (2 + 2 + 1 + 2 + 1 + lock_owner_id_maxsz) #define decode_lock_maxsz (op_decode_hdr_maxsz + \ decode_lock_denied_maxsz) #define encode_lockt_maxsz (op_encode_hdr_maxsz + 5 + \ @@ -615,7 +617,7 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, encode_lockowner_maxsz) #define NFS4_dec_release_lockowner_sz \ (compound_decode_hdr_maxsz + \ - decode_lockowner_maxsz) + decode_release_lockowner_maxsz) #define NFS4_enc_access_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ @@ -758,12 +760,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_layoutreturn_maxsz + \ + encode_delegattr_maxsz + \ encode_delegreturn_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_delegreturn_sz (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_layoutreturn_maxsz + \ + decode_delegattr_maxsz + \ decode_delegreturn_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_getacl_sz (compound_encode_hdr_maxsz + \ @@ -968,11 +972,6 @@ static __be32 *reserve_space(struct xdr_stream *xdr, size_t nbytes) return p; } -static void encode_opaque_fixed(struct xdr_stream *xdr, const void *buf, size_t len) -{ - WARN_ON_ONCE(xdr_stream_encode_opaque_fixed(xdr, buf, len) < 0); -} - static void encode_string(struct xdr_stream *xdr, unsigned int len, const char *str) { WARN_ON_ONCE(xdr_stream_encode_opaque(xdr, str, len) < 0); @@ -1060,9 +1059,10 @@ static void encode_nops(struct compound_hdr *hdr) *hdr->nops_p = htonl(hdr->nops); } -static void encode_nfs4_stateid(struct xdr_stream *xdr, const nfs4_stateid *stateid) +static void encode_nfs4_stateid(struct xdr_stream *xdr, + const nfs4_stateid *stateid) { - encode_opaque_fixed(xdr, stateid, NFS4_STATEID_SIZE); + encode_opaque_fixed(xdr, stateid->data, NFS4_STATEID_SIZE); } static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *verf) @@ -1412,16 +1412,16 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena __be32 *p; /* * opcode 4, seqid 4, share_access 4, share_deny 4, clientid 8, ownerlen 4, - * owner 4 = 32 + * owner 28 */ encode_nfs4_seqid(xdr, arg->seqid); encode_share_access(xdr, arg->share_access); - p = reserve_space(xdr, 36); + p = reserve_space(xdr, 40); p = xdr_encode_hyper(p, arg->clientid); - *p++ = cpu_to_be32(24); + *p++ = cpu_to_be32(28); p = xdr_encode_opaque_fixed(p, "open id:", 8); *p++ = cpu_to_be32(arg->server->s_dev); - *p++ = cpu_to_be32(arg->id.uniquifier); + p = xdr_encode_hyper(p, arg->id.uniquifier); xdr_encode_hyper(p, arg->id.create_time); } @@ -1468,20 +1468,18 @@ static void encode_opentype(struct xdr_stream *xdr, const struct nfs_openargs *a } } -static inline void encode_delegation_type(struct xdr_stream *xdr, fmode_t delegation_type) +static inline void encode_delegation_type(struct xdr_stream *xdr, u32 delegation_type) { __be32 *p; p = reserve_space(xdr, 4); switch (delegation_type) { - case 0: - *p = cpu_to_be32(NFS4_OPEN_DELEGATE_NONE); - break; - case FMODE_READ: - *p = cpu_to_be32(NFS4_OPEN_DELEGATE_READ); - break; - case FMODE_WRITE|FMODE_READ: - *p = cpu_to_be32(NFS4_OPEN_DELEGATE_WRITE); + case NFS4_OPEN_DELEGATE_NONE: + case NFS4_OPEN_DELEGATE_READ: + case NFS4_OPEN_DELEGATE_WRITE: + case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG: + case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG: + *p = cpu_to_be32(delegation_type); break; default: BUG(); @@ -1497,7 +1495,7 @@ static inline void encode_claim_null(struct xdr_stream *xdr, const struct qstr * encode_string(xdr, name->len, name->name); } -static inline void encode_claim_previous(struct xdr_stream *xdr, fmode_t type) +static inline void encode_claim_previous(struct xdr_stream *xdr, u32 type) { __be32 *p; @@ -1735,6 +1733,33 @@ static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs server->attr_bitmask); } +static void encode_delegattr(struct xdr_stream *xdr, + const nfs4_stateid *stateid, + const struct nfs4_delegattr *attr, + struct compound_hdr *hdr) +{ + uint32_t bitmap[3] = { 0 }; + uint32_t len = 0; + __be32 *p; + + encode_op_hdr(xdr, OP_SETATTR, encode_delegattr_maxsz, hdr); + encode_nfs4_stateid(xdr, stateid); + if (attr->atime_set) { + bitmap[2] |= FATTR4_WORD2_TIME_DELEG_ACCESS; + len += (nfstime4_maxsz << 2); + } + if (attr->mtime_set) { + bitmap[2] |= FATTR4_WORD2_TIME_DELEG_MODIFY; + len += (nfstime4_maxsz << 2); + } + xdr_encode_bitmap4(xdr, bitmap, ARRAY_SIZE(bitmap)); + xdr_stream_encode_opaque_inline(xdr, (void **)&p, len); + if (bitmap[2] & FATTR4_WORD2_TIME_DELEG_ACCESS) + p = xdr_encode_nfstime4(p, &attr->atime); + if (bitmap[2] & FATTR4_WORD2_TIME_DELEG_MODIFY) + p = xdr_encode_nfstime4(p, &attr->mtime); +} + static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclientid *setclientid, struct compound_hdr *hdr) { __be32 *p; @@ -2105,7 +2130,7 @@ static void encode_test_stateid(struct xdr_stream *xdr, { encode_op_hdr(xdr, OP_TEST_STATEID, decode_test_stateid_maxsz, hdr); encode_uint32(xdr, 1); - encode_nfs4_stateid(xdr, args->stateid); + encode_nfs4_stateid(xdr, &args->stateid); } static void encode_free_stateid(struct xdr_stream *xdr, @@ -2812,6 +2837,8 @@ static void nfs4_xdr_enc_delegreturn(struct rpc_rqst *req, encode_putfh(xdr, args->fhandle, &hdr); if (args->lr_args) encode_layoutreturn(xdr, args->lr_args, &hdr); + if (args->sattr_args) + encode_delegattr(xdr, args->stateid, args->sattr_args, &hdr); if (args->bitmask) encode_getfattr(xdr, args->bitmask, &hdr); encode_delegreturn(xdr, args->stateid, &hdr); @@ -3412,7 +3439,7 @@ static int decode_attr_link_support(struct xdr_stream *xdr, uint32_t *bitmap, ui *res = be32_to_cpup(p); bitmap[0] &= ~FATTR4_WORD0_LINK_SUPPORT; } - dprintk("%s: link support=%s\n", __func__, *res == 0 ? "false" : "true"); + dprintk("%s: link support=%s\n", __func__, str_false_true(*res == 0)); return 0; } @@ -3430,7 +3457,7 @@ static int decode_attr_symlink_support(struct xdr_stream *xdr, uint32_t *bitmap, *res = be32_to_cpup(p); bitmap[0] &= ~FATTR4_WORD0_SYMLINK_SUPPORT; } - dprintk("%s: symlink support=%s\n", __func__, *res == 0 ? "false" : "true"); + dprintk("%s: symlink support=%s\n", __func__, str_false_true(*res == 0)); return 0; } @@ -3572,7 +3599,7 @@ static int decode_attr_case_insensitive(struct xdr_stream *xdr, uint32_t *bitmap *res = be32_to_cpup(p); bitmap[0] &= ~FATTR4_WORD0_CASE_INSENSITIVE; } - dprintk("%s: case_insensitive=%s\n", __func__, *res == 0 ? "false" : "true"); + dprintk("%s: case_insensitive=%s\n", __func__, str_false_true(*res == 0)); return 0; } @@ -3590,7 +3617,7 @@ static int decode_attr_case_preserving(struct xdr_stream *xdr, uint32_t *bitmap, *res = be32_to_cpup(p); bitmap[0] &= ~FATTR4_WORD0_CASE_PRESERVING; } - dprintk("%s: case_preserving=%s\n", __func__, *res == 0 ? "false" : "true"); + dprintk("%s: case_preserving=%s\n", __func__, str_false_true(*res == 0)); return 0; } @@ -4298,8 +4325,29 @@ static int decode_attr_xattrsupport(struct xdr_stream *xdr, uint32_t *bitmap, *res = be32_to_cpup(p); bitmap[2] &= ~FATTR4_WORD2_XATTR_SUPPORT; } - dprintk("%s: XATTR support=%s\n", __func__, - *res == 0 ? "false" : "true"); + dprintk("%s: XATTR support=%s\n", __func__, str_false_true(*res == 0)); + return 0; +} + +static int decode_attr_open_arguments(struct xdr_stream *xdr, uint32_t *bitmap, + struct nfs4_open_caps *res) +{ + memset(res, 0, sizeof(*res)); + if (unlikely(bitmap[2] & (FATTR4_WORD2_OPEN_ARGUMENTS - 1U))) + return -EIO; + if (likely(bitmap[2] & FATTR4_WORD2_OPEN_ARGUMENTS)) { + if (decode_bitmap4(xdr, res->oa_share_access, ARRAY_SIZE(res->oa_share_access)) < 0) + return -EIO; + if (decode_bitmap4(xdr, res->oa_share_deny, ARRAY_SIZE(res->oa_share_deny)) < 0) + return -EIO; + if (decode_bitmap4(xdr, res->oa_share_access_want, ARRAY_SIZE(res->oa_share_access_want)) < 0) + return -EIO; + if (decode_bitmap4(xdr, res->oa_open_claim, ARRAY_SIZE(res->oa_open_claim)) < 0) + return -EIO; + if (decode_bitmap4(xdr, res->oa_createmode, ARRAY_SIZE(res->oa_createmode)) < 0) + return -EIO; + bitmap[2] &= ~FATTR4_WORD2_OPEN_ARGUMENTS; + } return 0; } @@ -4352,14 +4400,6 @@ static int decode_access(struct xdr_stream *xdr, u32 *supported, u32 *access) return 0; } -static int decode_opaque_fixed(struct xdr_stream *xdr, void *buf, size_t len) -{ - ssize_t ret = xdr_stream_decode_opaque_fixed(xdr, buf, len); - if (unlikely(ret < 0)) - return -EIO; - return 0; -} - static int decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) { return decode_opaque_fixed(xdr, stateid, NFS4_STATEID_SIZE); @@ -4477,6 +4517,8 @@ static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_re if ((status = decode_attr_exclcreat_supported(xdr, bitmap, res->exclcreat_bitmask)) != 0) goto xdr_error; + if ((status = decode_attr_open_arguments(xdr, bitmap, &res->open_caps)) != 0) + goto xdr_error; status = verify_attr_len(xdr, savep, attrlen); xdr_error: dprintk("%s: xdr returned %d!\n", __func__, -status); @@ -5035,7 +5077,7 @@ static int decode_link(struct xdr_stream *xdr, struct nfs4_change_info *cinfo) /* * We create the owner, so we know a proper owner.id length is 4. */ -static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl) +static int decode_lock_denied(struct xdr_stream *xdr, struct file_lock *fl) { uint64_t offset, length, clientid; __be32 *p; @@ -5148,13 +5190,12 @@ static int decode_space_limit(struct xdr_stream *xdr, } static int decode_rw_delegation(struct xdr_stream *xdr, - uint32_t delegation_type, - struct nfs_openres *res) + struct nfs4_open_delegation *res) { __be32 *p; int status; - status = decode_delegation_stateid(xdr, &res->delegation); + status = decode_delegation_stateid(xdr, &res->stateid); if (unlikely(status)) return status; p = xdr_inline_decode(xdr, 4); @@ -5162,52 +5203,57 @@ static int decode_rw_delegation(struct xdr_stream *xdr, return -EIO; res->do_recall = be32_to_cpup(p); - switch (delegation_type) { + switch (res->open_delegation_type) { case NFS4_OPEN_DELEGATE_READ: - res->delegation_type = FMODE_READ; + case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG: + res->type = FMODE_READ; break; case NFS4_OPEN_DELEGATE_WRITE: - res->delegation_type = FMODE_WRITE|FMODE_READ; + case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG: + res->type = FMODE_WRITE|FMODE_READ; if (decode_space_limit(xdr, &res->pagemod_limit) < 0) return -EIO; } return decode_ace(xdr, NULL); } -static int decode_no_delegation(struct xdr_stream *xdr, struct nfs_openres *res) +static int decode_no_delegation(struct xdr_stream *xdr, + struct nfs4_open_delegation *res) { __be32 *p; - uint32_t why_no_delegation; p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) return -EIO; - why_no_delegation = be32_to_cpup(p); - switch (why_no_delegation) { + res->why_no_delegation = be32_to_cpup(p); + switch (res->why_no_delegation) { case WND4_CONTENTION: case WND4_RESOURCE: - xdr_inline_decode(xdr, 4); - /* Ignore for now */ + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + return -EIO; + res->will_notify = be32_to_cpup(p); } return 0; } -static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res) +static int decode_delegation(struct xdr_stream *xdr, + struct nfs4_open_delegation *res) { __be32 *p; - uint32_t delegation_type; p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) return -EIO; - delegation_type = be32_to_cpup(p); - res->delegation_type = 0; - switch (delegation_type) { + res->open_delegation_type = be32_to_cpup(p); + switch (res->open_delegation_type) { case NFS4_OPEN_DELEGATE_NONE: return 0; case NFS4_OPEN_DELEGATE_READ: case NFS4_OPEN_DELEGATE_WRITE: - return decode_rw_delegation(xdr, delegation_type, res); + case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG: + case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG: + return decode_rw_delegation(xdr, res); case NFS4_OPEN_DELEGATE_NONE_EXT: return decode_no_delegation(xdr, res); } @@ -5248,7 +5294,7 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) for (; i < NFS4_BITMAP_SIZE; i++) res->attrset[i] = 0; - return decode_delegation(xdr, res); + return decode_delegation(xdr, &res->delegation); xdr_error: dprintk("%s: Bitmap too large! Length = %u\n", __func__, bmlen); return -EIO; @@ -5480,6 +5526,11 @@ static int decode_setattr(struct xdr_stream *xdr) return -EIO; } +static int decode_delegattr(struct xdr_stream *xdr) +{ + return decode_setattr(xdr); +} + static int decode_setclientid(struct xdr_stream *xdr, struct nfs4_setclientid_res *res) { __be32 *p; @@ -7052,6 +7103,12 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, if (status) goto out; } + if (res->sattr_res) { + status = decode_delegattr(xdr); + res->sattr_ret = status; + if (status) + goto out; + } if (res->fattr) { status = decode_getfattr(xdr, res->fattr, res->server); if (status != 0) @@ -7547,72 +7604,6 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, return 0; } -/* - * We need to translate between nfs status return values and - * the local errno values which may not be the same. - */ -static struct { - int stat; - int errno; -} nfs_errtbl[] = { - { NFS4_OK, 0 }, - { NFS4ERR_PERM, -EPERM }, - { NFS4ERR_NOENT, -ENOENT }, - { NFS4ERR_IO, -errno_NFSERR_IO}, - { NFS4ERR_NXIO, -ENXIO }, - { NFS4ERR_ACCESS, -EACCES }, - { NFS4ERR_EXIST, -EEXIST }, - { NFS4ERR_XDEV, -EXDEV }, - { NFS4ERR_NOTDIR, -ENOTDIR }, - { NFS4ERR_ISDIR, -EISDIR }, - { NFS4ERR_INVAL, -EINVAL }, - { NFS4ERR_FBIG, -EFBIG }, - { NFS4ERR_NOSPC, -ENOSPC }, - { NFS4ERR_ROFS, -EROFS }, - { NFS4ERR_MLINK, -EMLINK }, - { NFS4ERR_NAMETOOLONG, -ENAMETOOLONG }, - { NFS4ERR_NOTEMPTY, -ENOTEMPTY }, - { NFS4ERR_DQUOT, -EDQUOT }, - { NFS4ERR_STALE, -ESTALE }, - { NFS4ERR_BADHANDLE, -EBADHANDLE }, - { NFS4ERR_BAD_COOKIE, -EBADCOOKIE }, - { NFS4ERR_NOTSUPP, -ENOTSUPP }, - { NFS4ERR_TOOSMALL, -ETOOSMALL }, - { NFS4ERR_SERVERFAULT, -EREMOTEIO }, - { NFS4ERR_BADTYPE, -EBADTYPE }, - { NFS4ERR_LOCKED, -EAGAIN }, - { NFS4ERR_SYMLINK, -ELOOP }, - { NFS4ERR_OP_ILLEGAL, -EOPNOTSUPP }, - { NFS4ERR_DEADLOCK, -EDEADLK }, - { NFS4ERR_NOXATTR, -ENODATA }, - { NFS4ERR_XATTR2BIG, -E2BIG }, - { -1, -EIO } -}; - -/* - * Convert an NFS error code to a local one. - * This one is used jointly by NFSv2 and NFSv3. - */ -static int -nfs4_stat_to_errno(int stat) -{ - int i; - for (i = 0; nfs_errtbl[i].stat != -1; i++) { - if (nfs_errtbl[i].stat == stat) - return nfs_errtbl[i].errno; - } - if (stat <= 10000 || stat > 10100) { - /* The server is looney tunes. */ - return -EREMOTEIO; - } - /* If we cannot translate the error, the recovery routines should - * handle it. - * Note: remaining NFSv4 error codes have values > 10000, so should - * not conflict with native Linux error codes. - */ - return -stat; -} - #ifdef CONFIG_NFS_V4_2 #include "nfs42xdr.c" #endif /* CONFIG_NFS_V4_2 */ @@ -7711,6 +7702,7 @@ const struct rpc_procinfo nfs4_procedures[] = { PROC42(CLONE, enc_clone, dec_clone), PROC42(COPY, enc_copy, dec_copy), PROC42(OFFLOAD_CANCEL, enc_offload_cancel, dec_offload_cancel), + PROC42(OFFLOAD_STATUS, enc_offload_status, dec_offload_status), PROC42(COPY_NOTIFY, enc_copy_notify, dec_copy_notify), PROC(LOOKUPP, enc_lookupp, dec_lookupp), PROC42(LAYOUTERROR, enc_layouterror, dec_layouterror), diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index afedb449b54f..7a058bd8c566 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -409,7 +409,7 @@ DECLARE_EVENT_CLASS(nfs_lookup_event, __entry->dir = NFS_FILEID(dir); __entry->flags = flags; __entry->fileid = d_is_negative(dentry) ? 0 : NFS_FILEID(d_inode(dentry)); - __assign_str(name, dentry->d_name.name); + __assign_str(name); ), TP_printk( @@ -457,7 +457,7 @@ DECLARE_EVENT_CLASS(nfs_lookup_event_done, __entry->error = error < 0 ? -error : 0; __entry->flags = flags; __entry->fileid = d_is_negative(dentry) ? 0 : NFS_FILEID(d_inode(dentry)); - __assign_str(name, dentry->d_name.name); + __assign_str(name); ), TP_printk( @@ -512,7 +512,7 @@ TRACE_EVENT(nfs_atomic_open_enter, __entry->dir = NFS_FILEID(dir); __entry->flags = flags; __entry->fmode = (__force unsigned long)ctx->mode; - __assign_str(name, ctx->dentry->d_name.name); + __assign_str(name); ), TP_printk( @@ -551,7 +551,7 @@ TRACE_EVENT(nfs_atomic_open_exit, __entry->dir = NFS_FILEID(dir); __entry->flags = flags; __entry->fmode = (__force unsigned long)ctx->mode; - __assign_str(name, ctx->dentry->d_name.name); + __assign_str(name); ), TP_printk( @@ -587,7 +587,7 @@ TRACE_EVENT(nfs_create_enter, __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); __entry->flags = flags; - __assign_str(name, dentry->d_name.name); + __assign_str(name); ), TP_printk( @@ -623,7 +623,7 @@ TRACE_EVENT(nfs_create_exit, __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); __entry->flags = flags; - __assign_str(name, dentry->d_name.name); + __assign_str(name); ), TP_printk( @@ -654,7 +654,7 @@ DECLARE_EVENT_CLASS(nfs_directory_event, TP_fast_assign( __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); - __assign_str(name, dentry->d_name.name); + __assign_str(name); ), TP_printk( @@ -693,7 +693,7 @@ DECLARE_EVENT_CLASS(nfs_directory_event_done, __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); __entry->error = error < 0 ? -error : 0; - __assign_str(name, dentry->d_name.name); + __assign_str(name); ), TP_printk( @@ -747,7 +747,7 @@ TRACE_EVENT(nfs_link_enter, __entry->dev = inode->i_sb->s_dev; __entry->fileid = NFS_FILEID(inode); __entry->dir = NFS_FILEID(dir); - __assign_str(name, dentry->d_name.name); + __assign_str(name); ), TP_printk( @@ -783,7 +783,7 @@ TRACE_EVENT(nfs_link_exit, __entry->fileid = NFS_FILEID(inode); __entry->dir = NFS_FILEID(dir); __entry->error = error < 0 ? -error : 0; - __assign_str(name, dentry->d_name.name); + __assign_str(name); ), TP_printk( @@ -819,8 +819,8 @@ DECLARE_EVENT_CLASS(nfs_rename_event, __entry->dev = old_dir->i_sb->s_dev; __entry->old_dir = NFS_FILEID(old_dir); __entry->new_dir = NFS_FILEID(new_dir); - __assign_str(old_name, old_dentry->d_name.name); - __assign_str(new_name, new_dentry->d_name.name); + __assign_str(old_name); + __assign_str(new_name); ), TP_printk( @@ -868,8 +868,8 @@ DECLARE_EVENT_CLASS(nfs_rename_event_done, __entry->error = -error; __entry->old_dir = NFS_FILEID(old_dir); __entry->new_dir = NFS_FILEID(new_dir); - __assign_str(old_name, old_dentry->d_name.name); - __assign_str(new_name, new_dentry->d_name.name); + __assign_str(old_name); + __assign_str(new_name); ), TP_printk( @@ -939,10 +939,11 @@ TRACE_EVENT(nfs_sillyrename_unlink, DECLARE_EVENT_CLASS(nfs_folio_event, TP_PROTO( const struct inode *inode, - struct folio *folio + loff_t offset, + size_t count ), - TP_ARGS(inode, folio), + TP_ARGS(inode, offset, count), TP_STRUCT__entry( __field(dev_t, dev) @@ -950,7 +951,7 @@ DECLARE_EVENT_CLASS(nfs_folio_event, __field(u64, fileid) __field(u64, version) __field(loff_t, offset) - __field(u32, count) + __field(size_t, count) ), TP_fast_assign( @@ -960,13 +961,13 @@ DECLARE_EVENT_CLASS(nfs_folio_event, __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); __entry->version = inode_peek_iversion_raw(inode); - __entry->offset = folio_file_pos(folio); - __entry->count = nfs_folio_length(folio); + __entry->offset = offset, + __entry->count = count; ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu " - "offset=%lld count=%u", + "offset=%lld count=%zu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, __entry->version, @@ -978,18 +979,20 @@ DECLARE_EVENT_CLASS(nfs_folio_event, DEFINE_EVENT(nfs_folio_event, name, \ TP_PROTO( \ const struct inode *inode, \ - struct folio *folio \ + loff_t offset, \ + size_t count \ ), \ - TP_ARGS(inode, folio)) + TP_ARGS(inode, offset, count)) DECLARE_EVENT_CLASS(nfs_folio_event_done, TP_PROTO( const struct inode *inode, - struct folio *folio, + loff_t offset, + size_t count, int ret ), - TP_ARGS(inode, folio, ret), + TP_ARGS(inode, offset, count, ret), TP_STRUCT__entry( __field(dev_t, dev) @@ -998,7 +1001,7 @@ DECLARE_EVENT_CLASS(nfs_folio_event_done, __field(u64, fileid) __field(u64, version) __field(loff_t, offset) - __field(u32, count) + __field(size_t, count) ), TP_fast_assign( @@ -1008,14 +1011,14 @@ DECLARE_EVENT_CLASS(nfs_folio_event_done, __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); __entry->version = inode_peek_iversion_raw(inode); - __entry->offset = folio_file_pos(folio); - __entry->count = nfs_folio_length(folio); + __entry->offset = offset, + __entry->count = count, __entry->ret = ret; ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu " - "offset=%lld count=%u ret=%d", + "offset=%lld count=%zu ret=%d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, __entry->version, @@ -1027,10 +1030,11 @@ DECLARE_EVENT_CLASS(nfs_folio_event_done, DEFINE_EVENT(nfs_folio_event_done, name, \ TP_PROTO( \ const struct inode *inode, \ - struct folio *folio, \ + loff_t offset, \ + size_t count, \ int ret \ ), \ - TP_ARGS(inode, folio, ret)) + TP_ARGS(inode, offset, count, ret)) DEFINE_NFS_FOLIO_EVENT(nfs_aop_readpage); DEFINE_NFS_FOLIO_EVENT_DONE(nfs_aop_readpage_done); @@ -1636,8 +1640,8 @@ TRACE_EVENT(nfs_mount_assign, ), TP_fast_assign( - __assign_str(option, option); - __assign_str(value, value); + __assign_str(option); + __assign_str(value); ), TP_printk("option %s=%s", @@ -1657,7 +1661,7 @@ TRACE_EVENT(nfs_mount_option, ), TP_fast_assign( - __assign_str(option, param->key); + __assign_str(option); ), TP_printk("option %s", __get_str(option)) @@ -1675,12 +1679,41 @@ TRACE_EVENT(nfs_mount_path, ), TP_fast_assign( - __assign_str(path, path); + __assign_str(path); ), TP_printk("path='%s'", __get_str(path)) ); +TRACE_EVENT(nfs_local_open_fh, + TP_PROTO( + const struct nfs_fh *fh, + fmode_t fmode, + int error + ), + + TP_ARGS(fh, fmode, error), + + TP_STRUCT__entry( + __field(int, error) + __field(u32, fhandle) + __field(unsigned int, fmode) + ), + + TP_fast_assign( + __entry->error = error; + __entry->fhandle = nfs_fhandle_hash(fh); + __entry->fmode = (__force unsigned int)fmode; + ), + + TP_printk( + "error=%d fhandle=0x%08x mode=%s", + __entry->error, + __entry->fhandle, + show_fs_fmode_flags(__entry->fmode) + ) +); + DECLARE_EVENT_CLASS(nfs_xdr_event, TP_PROTO( const struct xdr_stream *xdr, @@ -1710,9 +1743,8 @@ DECLARE_EVENT_CLASS(nfs_xdr_event, __entry->xid = be32_to_cpu(rqstp->rq_xid); __entry->version = task->tk_client->cl_vers; __entry->error = error; - __assign_str(program, - task->tk_client->cl_program->name); - __assign_str(procedure, task->tk_msg.rpc_proc->p_name); + __assign_str(program); + __assign_str(procedure); ), TP_printk(SUNRPC_TRACE_TASK_SPECIFIER diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 6efb5068c116..11968dcb7243 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -188,102 +188,6 @@ nfs_async_iocounter_wait(struct rpc_task *task, struct nfs_lock_context *l_ctx) EXPORT_SYMBOL_GPL(nfs_async_iocounter_wait); /* - * nfs_page_lock_head_request - page lock the head of the page group - * @req: any member of the page group - */ -struct nfs_page * -nfs_page_group_lock_head(struct nfs_page *req) -{ - struct nfs_page *head = req->wb_head; - - while (!nfs_lock_request(head)) { - int ret = nfs_wait_on_request(head); - if (ret < 0) - return ERR_PTR(ret); - } - if (head != req) - kref_get(&head->wb_kref); - return head; -} - -/* - * nfs_unroll_locks - unlock all newly locked reqs and wait on @req - * @head: head request of page group, must be holding head lock - * @req: request that couldn't lock and needs to wait on the req bit lock - * - * This is a helper function for nfs_lock_and_join_requests - * returns 0 on success, < 0 on error. - */ -static void -nfs_unroll_locks(struct nfs_page *head, struct nfs_page *req) -{ - struct nfs_page *tmp; - - /* relinquish all the locks successfully grabbed this run */ - for (tmp = head->wb_this_page ; tmp != req; tmp = tmp->wb_this_page) { - if (!kref_read(&tmp->wb_kref)) - continue; - nfs_unlock_and_release_request(tmp); - } -} - -/* - * nfs_page_group_lock_subreq - try to lock a subrequest - * @head: head request of page group - * @subreq: request to lock - * - * This is a helper function for nfs_lock_and_join_requests which - * must be called with the head request and page group both locked. - * On error, it returns with the page group unlocked. - */ -static int -nfs_page_group_lock_subreq(struct nfs_page *head, struct nfs_page *subreq) -{ - int ret; - - if (!kref_get_unless_zero(&subreq->wb_kref)) - return 0; - while (!nfs_lock_request(subreq)) { - nfs_page_group_unlock(head); - ret = nfs_wait_on_request(subreq); - if (!ret) - ret = nfs_page_group_lock(head); - if (ret < 0) { - nfs_unroll_locks(head, subreq); - nfs_release_request(subreq); - return ret; - } - } - return 0; -} - -/* - * nfs_page_group_lock_subrequests - try to lock the subrequests - * @head: head request of page group - * - * This is a helper function for nfs_lock_and_join_requests which - * must be called with the head request locked. - */ -int nfs_page_group_lock_subrequests(struct nfs_page *head) -{ - struct nfs_page *subreq; - int ret; - - ret = nfs_page_group_lock(head); - if (ret < 0) - return ret; - /* lock each request in the page group */ - for (subreq = head->wb_this_page; subreq != head; - subreq = subreq->wb_this_page) { - ret = nfs_page_group_lock_subreq(head, subreq); - if (ret < 0) - return ret; - } - nfs_page_group_unlock(head); - return 0; -} - -/* * nfs_page_set_headlock - set the request PG_HEADLOCK * @req: request that is to be locked * @@ -569,7 +473,7 @@ struct nfs_page *nfs_page_create_from_folio(struct nfs_open_context *ctx, if (IS_ERR(l_ctx)) return ERR_CAST(l_ctx); - ret = nfs_page_create(l_ctx, offset, folio_index(folio), offset, count); + ret = nfs_page_create(l_ctx, offset, folio->index, offset, count); if (!IS_ERR(ret)) { nfs_page_assign_folio(ret, folio); nfs_page_group_init(ret, NULL); @@ -694,25 +598,6 @@ void nfs_release_request(struct nfs_page *req) } EXPORT_SYMBOL_GPL(nfs_release_request); -/** - * nfs_wait_on_request - Wait for a request to complete. - * @req: request to wait upon. - * - * Interruptible by fatal signals only. - * The user is responsible for holding a count on the request. - */ -int -nfs_wait_on_request(struct nfs_page *req) -{ - if (!test_bit(PG_BUSY, &req->wb_flags)) - return 0; - set_bit(PG_CONTENDED2, &req->wb_flags); - smp_mb__after_atomic(); - return wait_on_bit_io(&req->wb_flags, PG_BUSY, - TASK_UNINTERRUPTIBLE); -} -EXPORT_SYMBOL_GPL(nfs_wait_on_request); - /* * nfs_generic_pg_test - determine if requests can be coalesced * @desc: pointer to descriptor @@ -846,7 +731,8 @@ static void nfs_pgio_prepare(struct rpc_task *task, void *calldata) int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr, const struct cred *cred, const struct nfs_rpc_ops *rpc_ops, - const struct rpc_call_ops *call_ops, int how, int flags) + const struct rpc_call_ops *call_ops, int how, int flags, + struct nfsd_file *localio) { struct rpc_task *task; struct rpc_message msg = { @@ -876,6 +762,10 @@ int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr, hdr->args.count, (unsigned long long)hdr->args.offset); + if (localio) + return nfs_local_doio(NFS_SERVER(hdr->inode)->nfs_client, + localio, hdr, call_ops); + task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); @@ -1068,6 +958,13 @@ static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) nfs_pgheader_init(desc, hdr, nfs_pgio_header_free); ret = nfs_generic_pgio(desc, hdr); if (ret == 0) { + struct nfs_client *clp = NFS_SERVER(hdr->inode)->nfs_client; + + struct nfsd_file *localio = + nfs_local_open_fh(clp, hdr->cred, hdr->args.fh, + &hdr->args.context->nfl, + hdr->args.context->mode); + if (NFS_SERVER(hdr->inode)->nfs_client->cl_minorversion) task_flags = RPC_TASK_MOVEABLE; ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode), @@ -1076,7 +973,8 @@ static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) NFS_PROTO(hdr->inode), desc->pg_rpc_callops, desc->pg_ioflags, - RPC_TASK_CRED_NOREF | task_flags); + RPC_TASK_CRED_NOREF | task_flags, + localio); } return ret; } @@ -1545,6 +1443,11 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index) continue; } else if (index == prev->wb_index + 1) continue; + /* + * We will submit more requests after these. Indicate + * this to the underlying layers. + */ + desc->pg_moreio = 1; nfs_pageio_complete(desc); break; } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index a5cc6199127f..3adb7d0dbec7 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -61,6 +61,7 @@ static void pnfs_free_returned_lsegs(struct pnfs_layout_hdr *lo, u32 seq); static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg, struct list_head *tmp_list); +static int pnfs_layout_return_on_reboot(struct pnfs_layout_hdr *lo); /* Return the registered pnfs layout driver module matching given id */ static struct pnfs_layoutdriver_type * @@ -476,6 +477,18 @@ pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo, return !list_empty(&lo->plh_segs); } +static int pnfs_mark_layout_stateid_return(struct pnfs_layout_hdr *lo, + struct list_head *lseg_list, + enum pnfs_iomode iomode, u32 seq) +{ + struct pnfs_layout_range range = { + .iomode = iomode, + .length = NFS4_MAX_UINT64, + }; + + return pnfs_mark_matching_lsegs_return(lo, lseg_list, &range, seq); +} + static int pnfs_iomode_to_fail_bit(u32 iomode) { @@ -732,6 +745,14 @@ pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, return remaining; } +static void pnfs_reset_return_info(struct pnfs_layout_hdr *lo) +{ + struct pnfs_layout_segment *lseg; + + list_for_each_entry(lseg, &lo->plh_return_segs, pls_list) + pnfs_set_plh_return_info(lo, lseg->pls_range.iomode, 0); +} + static void pnfs_free_returned_lsegs(struct pnfs_layout_hdr *lo, struct list_head *free_me, @@ -846,8 +867,6 @@ pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp, break; inode = pnfs_grab_inode_layout_hdr(lo); if (inode != NULL) { - if (test_and_clear_bit(NFS_LAYOUT_HASHED, &lo->plh_flags)) - list_del_rcu(&lo->plh_layouts); if (pnfs_layout_add_bulk_destroy_list(inode, layout_list)) continue; @@ -868,7 +887,7 @@ pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp, static int pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list, - bool is_bulk_recall) + enum pnfs_layout_destroy_mode mode) { struct pnfs_layout_hdr *lo; struct inode *inode; @@ -886,8 +905,11 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list, spin_lock(&inode->i_lock); list_del_init(&lo->plh_bulk_destroy); - if (pnfs_mark_layout_stateid_invalid(lo, &lseg_list)) { - if (is_bulk_recall) + if (mode == PNFS_LAYOUT_FILE_BULK_RETURN) { + pnfs_mark_layout_stateid_return(lo, &lseg_list, + IOMODE_ANY, 0); + } else if (pnfs_mark_layout_stateid_invalid(lo, &lseg_list)) { + if (mode == PNFS_LAYOUT_BULK_RETURN) set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); ret = -EAGAIN; } @@ -901,10 +923,8 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list, return ret; } -int -pnfs_destroy_layouts_byfsid(struct nfs_client *clp, - struct nfs_fsid *fsid, - bool is_recall) +int pnfs_layout_destroy_byfsid(struct nfs_client *clp, struct nfs_fsid *fsid, + enum pnfs_layout_destroy_mode mode) { struct nfs_server *server; LIST_HEAD(layout_list); @@ -923,33 +943,40 @@ restart: rcu_read_unlock(); spin_unlock(&clp->cl_lock); - if (list_empty(&layout_list)) - return 0; - return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall); + return pnfs_layout_free_bulk_destroy_list(&layout_list, mode); } -int -pnfs_destroy_layouts_byclid(struct nfs_client *clp, - bool is_recall) +static void pnfs_layout_build_destroy_list_byclient(struct nfs_client *clp, + struct list_head *list) { struct nfs_server *server; - LIST_HEAD(layout_list); spin_lock(&clp->cl_lock); rcu_read_lock(); restart: list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { - if (pnfs_layout_bulk_destroy_byserver_locked(clp, - server, - &layout_list) != 0) + if (pnfs_layout_bulk_destroy_byserver_locked(clp, server, + list) != 0) goto restart; } rcu_read_unlock(); spin_unlock(&clp->cl_lock); +} - if (list_empty(&layout_list)) - return 0; - return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall); +static int pnfs_layout_do_destroy_byclid(struct nfs_client *clp, + struct list_head *list, + enum pnfs_layout_destroy_mode mode) +{ + pnfs_layout_build_destroy_list_byclient(clp, list); + return pnfs_layout_free_bulk_destroy_list(list, mode); +} + +int pnfs_layout_destroy_byclid(struct nfs_client *clp, + enum pnfs_layout_destroy_mode mode) +{ + LIST_HEAD(layout_list); + + return pnfs_layout_do_destroy_byclid(clp, &layout_list, mode); } /* @@ -962,7 +989,68 @@ pnfs_destroy_all_layouts(struct nfs_client *clp) nfs4_deviceid_mark_client_invalid(clp); nfs4_deviceid_purge_client(clp); - pnfs_destroy_layouts_byclid(clp, false); + pnfs_layout_destroy_byclid(clp, PNFS_LAYOUT_INVALIDATE); +} + +static void pnfs_layout_build_recover_list_byclient(struct nfs_client *clp, + struct list_head *list) +{ + struct nfs_server *server; + + spin_lock(&clp->cl_lock); + rcu_read_lock(); +restart: + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { + if (!(server->caps & NFS_CAP_REBOOT_LAYOUTRETURN)) + continue; + if (pnfs_layout_bulk_destroy_byserver_locked(clp, server, + list) != 0) + goto restart; + } + rcu_read_unlock(); + spin_unlock(&clp->cl_lock); +} + +static int pnfs_layout_bulk_list_reboot(struct list_head *list) +{ + struct pnfs_layout_hdr *lo; + struct nfs_server *server; + int ret; + + list_for_each_entry(lo, list, plh_bulk_destroy) { + server = NFS_SERVER(lo->plh_inode); + ret = pnfs_layout_return_on_reboot(lo); + switch (ret) { + case 0: + continue; + case -NFS4ERR_BAD_STATEID: + server->caps &= ~NFS_CAP_REBOOT_LAYOUTRETURN; + break; + case -NFS4ERR_NO_GRACE: + break; + default: + goto err; + } + break; + } + return 0; +err: + return ret; +} + +int pnfs_layout_handle_reboot(struct nfs_client *clp) +{ + LIST_HEAD(list); + int ret = 0, ret2; + + pnfs_layout_build_recover_list_byclient(clp, &list); + if (!list_empty(&list)) + ret = pnfs_layout_bulk_list_reboot(&list); + ret2 = pnfs_layout_do_destroy_byclid(clp, &list, + PNFS_LAYOUT_INVALIDATE); + if (!ret) + ret = ret2; + return (ret == 0) ? 0 : -EAGAIN; } static void @@ -1163,6 +1251,33 @@ static void pnfs_clear_layoutcommit(struct inode *inode, } } +static void +pnfs_layoutreturn_retry_later_locked(struct pnfs_layout_hdr *lo, + const nfs4_stateid *arg_stateid, + const struct pnfs_layout_range *range, + struct list_head *freeme) +{ + if (pnfs_layout_is_valid(lo) && + nfs4_stateid_match_other(&lo->plh_stateid, arg_stateid)) + pnfs_reset_return_info(lo); + else + pnfs_mark_layout_stateid_invalid(lo, freeme); + pnfs_clear_layoutreturn_waitbit(lo); +} + +void pnfs_layoutreturn_retry_later(struct pnfs_layout_hdr *lo, + const nfs4_stateid *arg_stateid, + const struct pnfs_layout_range *range) +{ + struct inode *inode = lo->plh_inode; + LIST_HEAD(freeme); + + spin_lock(&inode->i_lock); + pnfs_layoutreturn_retry_later_locked(lo, arg_stateid, range, &freeme); + spin_unlock(&inode->i_lock); + pnfs_free_lseg_list(&freeme); +} + void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo, const nfs4_stateid *arg_stateid, const struct pnfs_layout_range *range, @@ -1172,15 +1287,15 @@ void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo, LIST_HEAD(freeme); spin_lock(&inode->i_lock); - if (!pnfs_layout_is_valid(lo) || - !nfs4_stateid_match_other(&lo->plh_stateid, arg_stateid)) + if (!nfs4_stateid_match_other(&lo->plh_stateid, arg_stateid)) goto out_unlock; - if (stateid) { + if (stateid && pnfs_layout_is_valid(lo)) { u32 seq = be32_to_cpu(arg_stateid->seqid); pnfs_mark_matching_lsegs_invalid(lo, &freeme, range, seq); pnfs_free_returned_lsegs(lo, &freeme, range, seq); pnfs_set_layout_stateid(lo, stateid, NULL, true); + pnfs_reset_return_info(lo); } else pnfs_mark_layout_stateid_invalid(lo, &freeme); out_unlock: @@ -1197,7 +1312,7 @@ pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo, enum pnfs_iomode *iomode) { /* Serialise LAYOUTGET/LAYOUTRETURN */ - if (atomic_read(&lo->plh_outstanding) != 0) + if (atomic_read(&lo->plh_outstanding) != 0 && lo->plh_return_seq == 0) return false; if (test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) return false; @@ -1239,7 +1354,7 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid, const struct cred **pcred, enum pnfs_iomode iomode, - bool sync) + unsigned int flags) { struct inode *ino = lo->plh_inode; struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld; @@ -1266,33 +1381,21 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, if (ld->prepare_layoutreturn) ld->prepare_layoutreturn(&lrp->args); - status = nfs4_proc_layoutreturn(lrp, sync); + status = nfs4_proc_layoutreturn(lrp, flags); out: dprintk("<-- %s status: %d\n", __func__, status); return status; } -static bool -pnfs_layout_segments_returnable(struct pnfs_layout_hdr *lo, - enum pnfs_iomode iomode, - u32 seq) -{ - struct pnfs_layout_range recall_range = { - .length = NFS4_MAX_UINT64, - .iomode = iomode, - }; - return pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs, - &recall_range, seq) != -EBUSY; -} - /* Return true if layoutreturn is needed */ static bool pnfs_layout_need_return(struct pnfs_layout_hdr *lo) { if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) return false; - return pnfs_layout_segments_returnable(lo, lo->plh_return_iomode, - lo->plh_return_seq); + return pnfs_mark_layout_stateid_return(lo, &lo->plh_return_segs, + lo->plh_return_iomode, + lo->plh_return_seq) != EBUSY; } static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo) @@ -1312,7 +1415,8 @@ static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo) spin_unlock(&inode->i_lock); if (send) { /* Send an async layoutreturn so we dont deadlock */ - pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, false); + pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, + PNFS_FL_LAYOUTRETURN_ASYNC); } } else spin_unlock(&inode->i_lock); @@ -1379,7 +1483,8 @@ _pnfs_return_layout(struct inode *ino) send = pnfs_prepare_layoutreturn(lo, &stateid, &cred, NULL); spin_unlock(&ino->i_lock); if (send) - status = pnfs_send_layoutreturn(lo, &stateid, &cred, IOMODE_ANY, true); + status = pnfs_send_layoutreturn(lo, &stateid, &cred, IOMODE_ANY, + 0); out_wait_layoutreturn: wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN, TASK_UNINTERRUPTIBLE); out_put_layout_hdr: @@ -1417,6 +1522,24 @@ pnfs_commit_and_return_layout(struct inode *inode) return ret; } +static int pnfs_layout_return_on_reboot(struct pnfs_layout_hdr *lo) +{ + struct inode *inode = lo->plh_inode; + const struct cred *cred; + + spin_lock(&inode->i_lock); + if (!pnfs_layout_is_valid(lo)) { + spin_unlock(&inode->i_lock); + return 0; + } + cred = get_cred(lo->plh_lc_cred); + pnfs_get_layout_hdr(lo); + spin_unlock(&inode->i_lock); + + return pnfs_send_layoutreturn(lo, &zero_stateid, &cred, IOMODE_ANY, + PNFS_FL_LAYOUTRETURN_PRIVILEGED); +} + bool pnfs_roc(struct inode *ino, struct nfs4_layoutreturn_args *args, struct nfs4_layoutreturn_res *res, @@ -1520,7 +1643,7 @@ out_noroc: return true; } if (layoutreturn) - pnfs_send_layoutreturn(lo, &stateid, &lc_cred, iomode, true); + pnfs_send_layoutreturn(lo, &stateid, &lc_cred, iomode, 0); pnfs_put_layout_hdr(lo); return false; } @@ -1542,6 +1665,18 @@ int pnfs_roc_done(struct rpc_task *task, struct nfs4_layoutreturn_args **argpp, /* Was there an RPC level error? If not, retry */ if (task->tk_rpc_status == 0) break; + /* + * Is there a fatal network level error? + * If so release the layout, but flag the error. + */ + if ((task->tk_rpc_status == -ENETDOWN || + task->tk_rpc_status == -ENETUNREACH) && + task->tk_flags & RPC_TASK_NETUNREACH_FATAL) { + *ret = 0; + (*respp)->lrs_present = 0; + retval = -EIO; + break; + } /* If the call was not sent, let caller handle it */ if (!RPC_WAS_SENT(task)) return 0; @@ -1570,22 +1705,24 @@ int pnfs_roc_done(struct rpc_task *task, struct nfs4_layoutreturn_args **argpp, } void pnfs_roc_release(struct nfs4_layoutreturn_args *args, - struct nfs4_layoutreturn_res *res, - int ret) + struct nfs4_layoutreturn_res *res, int ret) { struct pnfs_layout_hdr *lo = args->layout; struct inode *inode = args->inode; const nfs4_stateid *res_stateid = NULL; struct nfs4_xdr_opaque_data *ld_private = args->ld_private; + LIST_HEAD(freeme); switch (ret) { + case -NFS4ERR_BADSESSION: + case -NFS4ERR_DEADSESSION: + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: case -NFS4ERR_NOMATCHING_LAYOUT: spin_lock(&inode->i_lock); - if (pnfs_layout_is_valid(lo) && - nfs4_stateid_match_other(&args->stateid, &lo->plh_stateid)) - pnfs_set_plh_return_info(lo, args->range.iomode, 0); - pnfs_clear_layoutreturn_waitbit(lo); + pnfs_layoutreturn_retry_later_locked(lo, &args->stateid, + &args->range, &freeme); spin_unlock(&inode->i_lock); + pnfs_free_lseg_list(&freeme); break; case 0: if (res->lrs_present) @@ -2566,7 +2703,8 @@ pnfs_mark_layout_for_return(struct inode *inode, return_now = pnfs_prepare_layoutreturn(lo, &stateid, &cred, &iomode); spin_unlock(&inode->i_lock); if (return_now) - pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, false); + pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, + PNFS_FL_LAYOUTRETURN_ASYNC); } else { spin_unlock(&inode->i_lock); nfs_commit_inode(inode, 0); @@ -2682,7 +2820,8 @@ restart: } spin_unlock(&inode->i_lock); rcu_read_unlock(); - pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, false); + pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, + PNFS_FL_LAYOUTRETURN_ASYNC); pnfs_put_layout_hdr(lo); cond_resched(); goto restart; @@ -2705,38 +2844,28 @@ pnfs_layout_return_unused_byclid(struct nfs_client *clp, &range); } +/* Check if we have we have a valid layout but if there isn't an intersection + * between the request and the pgio->pg_lseg, put this pgio->pg_lseg away. + */ void -pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio) +pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio, + struct nfs_page *req) { if (pgio->pg_lseg == NULL || - test_bit(NFS_LSEG_VALID, &pgio->pg_lseg->pls_flags)) + (test_bit(NFS_LSEG_VALID, &pgio->pg_lseg->pls_flags) && + pnfs_lseg_request_intersecting(pgio->pg_lseg, req))) return; pnfs_put_lseg(pgio->pg_lseg); pgio->pg_lseg = NULL; } EXPORT_SYMBOL_GPL(pnfs_generic_pg_check_layout); -/* - * Check for any intersection between the request and the pgio->pg_lseg, - * and if none, put this pgio->pg_lseg away. - */ -void -pnfs_generic_pg_check_range(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) -{ - if (pgio->pg_lseg && !pnfs_lseg_request_intersecting(pgio->pg_lseg, req)) { - pnfs_put_lseg(pgio->pg_lseg); - pgio->pg_lseg = NULL; - } -} -EXPORT_SYMBOL_GPL(pnfs_generic_pg_check_range); - void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { u64 rd_size; - pnfs_generic_pg_check_layout(pgio); - pnfs_generic_pg_check_range(pgio, req); + pnfs_generic_pg_check_layout(pgio, req); if (pgio->pg_lseg == NULL) { if (pgio->pg_dreq == NULL) rd_size = i_size_read(pgio->pg_inode) - req_offset(req); @@ -2766,8 +2895,7 @@ void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req, u64 wb_size) { - pnfs_generic_pg_check_layout(pgio); - pnfs_generic_pg_check_range(pgio, req); + pnfs_generic_pg_check_layout(pgio, req); if (pgio->pg_lseg == NULL) { pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, nfs_req_openctx(req), diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index db57a85500ee..91ff877185c8 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -60,6 +60,7 @@ struct nfs4_pnfs_ds { struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */ char *ds_remotestr; /* comma sep list of addrs */ struct list_head ds_addrs; + const struct net *ds_net; struct nfs_client *ds_clp; refcount_t ds_count; unsigned long ds_state; @@ -118,6 +119,12 @@ enum layoutdriver_policy_flags { PNFS_LAYOUTGET_ON_OPEN = 1 << 3, }; +enum pnfs_layout_destroy_mode { + PNFS_LAYOUT_INVALIDATE = 0, + PNFS_LAYOUT_BULK_RETURN, + PNFS_LAYOUT_FILE_BULK_RETURN, +}; + struct nfs4_deviceid_node; /* Per-layout driver specific registration structure */ @@ -127,7 +134,6 @@ struct pnfs_layoutdriver_type { const char *name; struct module *owner; unsigned flags; - unsigned max_deviceinfo_size; unsigned max_layoutget_response; int (*set_layoutdriver) (struct nfs_server *, const struct nfs_fh *); @@ -193,8 +199,6 @@ struct pnfs_commit_ops { int max); void (*recover_commit_reqs) (struct list_head *list, struct nfs_commit_info *cinfo); - struct nfs_page * (*search_commit_reqs)(struct nfs_commit_info *cinfo, - struct folio *folio); }; struct pnfs_layout_hdr { @@ -242,6 +246,9 @@ extern const struct pnfs_layoutdriver_type *pnfs_find_layoutdriver(u32 id); extern void pnfs_put_layoutdriver(const struct pnfs_layoutdriver_type *ld); /* nfs4proc.c */ +#define PNFS_FL_LAYOUTRETURN_ASYNC (1U << 0) +#define PNFS_FL_LAYOUTRETURN_PRIVILEGED (1U << 1) + extern size_t max_response_pages(struct nfs_server *server); extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *dev, @@ -249,7 +256,8 @@ extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, extern struct pnfs_layout_segment * nfs4_proc_layoutget(struct nfs4_layoutget *lgp, struct nfs4_exception *exception); -extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync); +extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, + unsigned int flags); /* pnfs.c */ void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo); @@ -257,8 +265,7 @@ void pnfs_put_lseg(struct pnfs_layout_segment *lseg); void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, struct nfs_fsinfo *); void unset_pnfs_layoutdriver(struct nfs_server *); -void pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio); -void pnfs_generic_pg_check_range(struct nfs_pageio_descriptor *pgio, struct nfs_page *req); +void pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio, struct nfs_page *req); void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *); int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc); void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, @@ -274,11 +281,10 @@ void pnfs_free_lseg_list(struct list_head *tmp_list); void pnfs_destroy_layout(struct nfs_inode *); void pnfs_destroy_layout_final(struct nfs_inode *); void pnfs_destroy_all_layouts(struct nfs_client *); -int pnfs_destroy_layouts_byfsid(struct nfs_client *clp, - struct nfs_fsid *fsid, - bool is_recall); -int pnfs_destroy_layouts_byclid(struct nfs_client *clp, - bool is_recall); +int pnfs_layout_destroy_byfsid(struct nfs_client *clp, struct nfs_fsid *fsid, + enum pnfs_layout_destroy_mode mode); +int pnfs_layout_destroy_byclid(struct nfs_client *clp, + enum pnfs_layout_destroy_mode mode); bool nfs4_layout_refresh_old_stateid(nfs4_stateid *dst, struct pnfs_layout_range *dst_range, struct inode *inode); @@ -324,6 +330,9 @@ struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, enum pnfs_iomode iomode, bool strict_iomode, gfp_t gfp_flags); +void pnfs_layoutreturn_retry_later(struct pnfs_layout_hdr *lo, + const nfs4_stateid *arg_stateid, + const struct pnfs_layout_range *range); void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo, const nfs4_stateid *arg_stateid, const struct pnfs_layout_range *range, @@ -345,6 +354,7 @@ void pnfs_error_mark_layout_for_return(struct inode *inode, struct pnfs_layout_segment *lseg); void pnfs_layout_return_unused_byclid(struct nfs_client *clp, enum pnfs_iomode iomode); +int pnfs_layout_handle_reboot(struct nfs_client *clp); /* nfs4_deviceid_flags */ enum { @@ -397,8 +407,6 @@ void pnfs_generic_prepare_to_resend_writes(struct nfs_commit_data *data); void pnfs_generic_rw_release(void *data); void pnfs_generic_recover_commit_reqs(struct list_head *dst, struct nfs_commit_info *cinfo); -struct nfs_page *pnfs_generic_search_commit_reqs(struct nfs_commit_info *cinfo, - struct folio *folio); int pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages, int how, @@ -408,7 +416,8 @@ int pnfs_generic_commit_pagelist(struct inode *inode, int pnfs_generic_scan_commit_lists(struct nfs_commit_info *cinfo, int max); void pnfs_generic_write_commit_done(struct rpc_task *task, void *data); void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds); -struct nfs4_pnfs_ds *nfs4_pnfs_ds_add(struct list_head *dsaddrs, +struct nfs4_pnfs_ds *nfs4_pnfs_ds_add(const struct net *net, + struct list_head *dsaddrs, gfp_t gfp_flags); void nfs4_pnfs_v3_ds_connect_unload(void); int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, @@ -558,17 +567,6 @@ pnfs_recover_commit_reqs(struct list_head *head, struct nfs_commit_info *cinfo) fl_cinfo->ops->recover_commit_reqs(head, cinfo); } -static inline struct nfs_page * -pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo, - struct folio *folio) -{ - struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds; - - if (!fl_cinfo->ops || !fl_cinfo->ops->search_commit_reqs) - return NULL; - return fl_cinfo->ops->search_commit_reqs(cinfo, folio); -} - /* Should the pNFS client commit and return the layout upon a setattr */ static inline bool pnfs_ld_layoutret_on_setattr(struct inode *inode) @@ -726,6 +724,11 @@ static inline void pnfs_destroy_layout_final(struct nfs_inode *nfsi) { } +static inline int pnfs_layout_handle_reboot(struct nfs_client *clp) +{ + return 0; +} + static inline struct pnfs_layout_segment * pnfs_get_lseg(struct pnfs_layout_segment *lseg) { @@ -865,13 +868,6 @@ pnfs_recover_commit_reqs(struct list_head *head, struct nfs_commit_info *cinfo) { } -static inline struct nfs_page * -pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo, - struct folio *folio) -{ - return NULL; -} - static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync) { return 0; diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c index 178001c90156..bf0f2d67e96c 100644 --- a/fs/nfs/pnfs_dev.c +++ b/fs/nfs/pnfs_dev.c @@ -110,9 +110,6 @@ nfs4_get_device_info(struct nfs_server *server, * GETDEVICEINFO's maxcount */ max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; - if (server->pnfs_curr_ld->max_deviceinfo_size && - server->pnfs_curr_ld->max_deviceinfo_size < max_resp_sz) - max_resp_sz = server->pnfs_curr_ld->max_deviceinfo_size; max_pages = nfs_page_array_len(0, max_resp_sz); dprintk("%s: server %p max_resp_sz %u max_pages %d\n", __func__, server, max_resp_sz, max_pages); diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 88e061bd711b..91ef486f40b9 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -16,6 +16,7 @@ #include "nfs4session.h" #include "internal.h" #include "pnfs.h" +#include "netns.h" #define NFSDBG_FACILITY NFSDBG_PNFS @@ -351,53 +352,6 @@ void pnfs_generic_recover_commit_reqs(struct list_head *dst, } EXPORT_SYMBOL_GPL(pnfs_generic_recover_commit_reqs); -static struct nfs_page * -pnfs_bucket_search_commit_reqs(struct pnfs_commit_bucket *buckets, - unsigned int nbuckets, struct folio *folio) -{ - struct nfs_page *req; - struct pnfs_commit_bucket *b; - unsigned int i; - - /* Linearly search the commit lists for each bucket until a matching - * request is found */ - for (i = 0, b = buckets; i < nbuckets; i++, b++) { - list_for_each_entry(req, &b->written, wb_list) { - if (nfs_page_to_folio(req) == folio) - return req->wb_head; - } - list_for_each_entry(req, &b->committing, wb_list) { - if (nfs_page_to_folio(req) == folio) - return req->wb_head; - } - } - return NULL; -} - -/* pnfs_generic_search_commit_reqs - Search lists in @cinfo for the head request - * for @folio - * @cinfo - commit info for current inode - * @folio - page to search for matching head request - * - * Return: the head request if one is found, otherwise %NULL. - */ -struct nfs_page *pnfs_generic_search_commit_reqs(struct nfs_commit_info *cinfo, - struct folio *folio) -{ - struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds; - struct pnfs_commit_array *array; - struct nfs_page *req; - - list_for_each_entry(array, &fl_cinfo->commits, cinfo_list) { - req = pnfs_bucket_search_commit_reqs(array->buckets, - array->nbuckets, folio); - if (req) - return req; - } - return NULL; -} -EXPORT_SYMBOL_GPL(pnfs_generic_search_commit_reqs); - static struct pnfs_layout_segment * pnfs_bucket_get_committing(struct list_head *head, struct pnfs_commit_bucket *bucket, @@ -537,7 +491,7 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages, nfs_initiate_commit(NFS_CLIENT(inode), data, NFS_PROTO(data->inode), data->mds_ops, how, - RPC_TASK_CRED_NOREF); + RPC_TASK_CRED_NOREF, NULL); } else { nfs_init_commit(data, NULL, data->lseg, cinfo); initiate_commit(data, how); @@ -551,14 +505,14 @@ EXPORT_SYMBOL_GPL(pnfs_generic_commit_pagelist); /* * Data server cache * - * Data servers can be mapped to different device ids. - * nfs4_pnfs_ds reference counting + * Data servers can be mapped to different device ids, but should + * never be shared between net namespaces. + * + * nfs4_pnfs_ds reference counting: * - set to 1 on allocation * - incremented when a device id maps a data server already in the cache. * - decremented when deviceid is removed from the cache. */ -static DEFINE_SPINLOCK(nfs4_ds_cache_lock); -static LIST_HEAD(nfs4_data_server_cache); /* Debug routines */ static void @@ -651,11 +605,11 @@ _same_data_server_addrs_locked(const struct list_head *dsaddrs1, * Lookup DS by addresses. nfs4_ds_cache_lock is held */ static struct nfs4_pnfs_ds * -_data_server_lookup_locked(const struct list_head *dsaddrs) +_data_server_lookup_locked(const struct nfs_net *nn, const struct list_head *dsaddrs) { struct nfs4_pnfs_ds *ds; - list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) + list_for_each_entry(ds, &nn->nfs4_data_server_cache, ds_node) if (_same_data_server_addrs_locked(&ds->ds_addrs, dsaddrs)) return ds; return NULL; @@ -700,10 +654,11 @@ static void destroy_ds(struct nfs4_pnfs_ds *ds) void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds) { - if (refcount_dec_and_lock(&ds->ds_count, - &nfs4_ds_cache_lock)) { + struct nfs_net *nn = net_generic(ds->ds_net, nfs_net_id); + + if (refcount_dec_and_lock(&ds->ds_count, &nn->nfs4_data_server_lock)) { list_del_init(&ds->ds_node); - spin_unlock(&nfs4_ds_cache_lock); + spin_unlock(&nn->nfs4_data_server_lock); destroy_ds(ds); } } @@ -763,8 +718,9 @@ out_err: * uncached and return cached struct nfs4_pnfs_ds. */ struct nfs4_pnfs_ds * -nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags) +nfs4_pnfs_ds_add(const struct net *net, struct list_head *dsaddrs, gfp_t gfp_flags) { + struct nfs_net *nn = net_generic(net, nfs_net_id); struct nfs4_pnfs_ds *tmp_ds, *ds = NULL; char *remotestr; @@ -780,16 +736,17 @@ nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags) /* this is only used for debugging, so it's ok if its NULL */ remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags); - spin_lock(&nfs4_ds_cache_lock); - tmp_ds = _data_server_lookup_locked(dsaddrs); + spin_lock(&nn->nfs4_data_server_lock); + tmp_ds = _data_server_lookup_locked(nn, dsaddrs); if (tmp_ds == NULL) { INIT_LIST_HEAD(&ds->ds_addrs); list_splice_init(dsaddrs, &ds->ds_addrs); ds->ds_remotestr = remotestr; refcount_set(&ds->ds_count, 1); INIT_LIST_HEAD(&ds->ds_node); + ds->ds_net = net; ds->ds_clp = NULL; - list_add(&ds->ds_node, &nfs4_data_server_cache); + list_add(&ds->ds_node, &nn->nfs4_data_server_cache); dprintk("%s add new data server %s\n", __func__, ds->ds_remotestr); } else { @@ -801,7 +758,7 @@ nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags) refcount_read(&tmp_ds->ds_count)); ds = tmp_ds; } - spin_unlock(&nfs4_ds_cache_lock); + spin_unlock(&nn->nfs4_data_server_lock); out: return ds; } diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index ad3a321ae997..63e71310b9f6 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -153,13 +153,13 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, } static int -nfs_proc_lookup(struct inode *dir, struct dentry *dentry, +nfs_proc_lookup(struct inode *dir, struct dentry *dentry, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { struct nfs_diropargs arg = { .fh = NFS_FH(dir), - .name = dentry->d_name.name, - .len = dentry->d_name.len + .name = name->name, + .len = name->len }; struct nfs_diropok res = { .fh = fhandle, @@ -446,13 +446,14 @@ out: return status; } -static int +static struct dentry * nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) { struct nfs_createdata *data; struct rpc_message msg = { .rpc_proc = &nfs_procedures[NFSPROC_MKDIR], }; + struct dentry *alias = NULL; int status = -ENOMEM; dprintk("NFS call mkdir %pd\n", dentry); @@ -464,12 +465,15 @@ nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); nfs_mark_for_revalidate(dir); - if (status == 0) - status = nfs_instantiate(dentry, data->res.fh, data->res.fattr); + if (status == 0) { + alias = nfs_add_or_obtain(dentry, data->res.fh, data->res.fattr); + status = PTR_ERR_OR_ZERO(alias); + } else + alias = ERR_PTR(status); nfs_free_createdata(data); out: dprintk("NFS reply mkdir: %d\n", status); - return status; + return alias; } static int @@ -687,14 +691,22 @@ out_einval: return -EINVAL; } -static int nfs_have_delegation(struct inode *inode, fmode_t flags) +static int nfs_have_delegation(struct inode *inode, fmode_t type, int flags) +{ + return 0; +} + +static int nfs_return_delegation(struct inode *inode) { + if (S_ISREG(inode->i_mode)) + nfs_wb_all(inode); return 0; } static const struct inode_operations nfs_dir_inode_operations = { .create = nfs_create, .lookup = nfs_lookup, + .atomic_open = nfs_atomic_open_v23, .link = nfs_link, .unlink = nfs_unlink, .symlink = nfs_symlink, @@ -756,6 +768,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .lock_check_bounds = nfs_lock_check_bounds, .close_context = nfs_close_context, .have_delegation = nfs_have_delegation, + .return_delegation = nfs_return_delegation, .alloc_client = nfs_alloc_client, .init_client = nfs_init_client, .free_client = nfs_free_client, diff --git a/fs/nfs/read.c b/fs/nfs/read.c index a142287d86f6..81bd1b9aba17 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -28,6 +28,7 @@ #include "fscache.h" #include "pnfs.h" #include "nfstrace.h" +#include "delegation.h" #define NFSDBG_FACILITY NFSDBG_PAGECACHE @@ -47,8 +48,7 @@ static struct nfs_pgio_header *nfs_readhdr_alloc(void) static void nfs_readhdr_free(struct nfs_pgio_header *rhdr) { - if (rhdr->res.scratch != NULL) - kfree(rhdr->res.scratch); + kfree(rhdr->res.scratch); kmem_cache_free(nfs_rdata_cachep, rhdr); } @@ -122,8 +122,6 @@ static void nfs_readpage_release(struct nfs_page *req, int error) { struct folio *folio = nfs_page_to_folio(req); - if (nfs_error_is_fatal_on_server(error) && error != -ETIMEDOUT) - folio_set_error(folio); if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) if (nfs_netfs_folio_unlock(folio)) folio_unlock(folio); @@ -288,7 +286,7 @@ int nfs_read_add_folio(struct nfs_pageio_descriptor *pgio, struct nfs_open_context *ctx, struct folio *folio) { - struct inode *inode = folio_file_mapping(folio)->host; + struct inode *inode = folio->mapping->host; struct nfs_server *server = NFS_SERVER(inode); size_t fsize = folio_size(folio); unsigned int rsize = server->rsize; @@ -324,21 +322,57 @@ out: } /* - * Read a page over NFS. - * We read the page synchronously in the following case: - * - The error flag is set for this page. This happens only when a - * previous async read operation failed. + * Actually read a folio over the wire. */ -int nfs_read_folio(struct file *file, struct folio *folio) +static int nfs_do_read_folio(struct file *file, struct folio *folio) { struct inode *inode = file_inode(file); struct nfs_pageio_descriptor pgio; struct nfs_open_context *ctx; int ret; - trace_nfs_aop_readpage(inode, folio); + ctx = get_nfs_open_context(nfs_file_open_context(file)); + + xchg(&ctx->error, 0); + nfs_pageio_init_read(&pgio, inode, false, + &nfs_async_read_completion_ops); + + ret = nfs_read_add_folio(&pgio, ctx, folio); + if (ret) + goto out_put; + + nfs_pageio_complete_read(&pgio); + nfs_update_delegated_atime(inode); + if (pgio.pg_error < 0) { + ret = pgio.pg_error; + goto out_put; + } + + ret = folio_wait_locked_killable(folio); + if (!folio_test_uptodate(folio) && !ret) + ret = xchg(&ctx->error, 0); + +out_put: + put_nfs_open_context(ctx); + return ret; +} + +/* + * Synchronously read a folio. + * + * This is not heavily used as most users to try an asynchronous + * large read through ->readahead first. + */ +int nfs_read_folio(struct file *file, struct folio *folio) +{ + struct inode *inode = file_inode(file); + loff_t pos = folio_pos(folio); + size_t len = folio_size(folio); + int ret; + + trace_nfs_aop_readpage(inode, pos, len); nfs_inc_stats(inode, NFSIOS_VFSREADPAGE); - task_io_account_read(folio_size(folio)); + task_io_account_read(len); /* * Try to flush any pending writes to the file.. @@ -358,30 +392,10 @@ int nfs_read_folio(struct file *file, struct folio *folio) goto out_unlock; ret = nfs_netfs_read_folio(file, folio); - if (!ret) - goto out; - - ctx = get_nfs_open_context(nfs_file_open_context(file)); - - xchg(&ctx->error, 0); - nfs_pageio_init_read(&pgio, inode, false, - &nfs_async_read_completion_ops); - - ret = nfs_read_add_folio(&pgio, ctx, folio); if (ret) - goto out_put; - - nfs_pageio_complete_read(&pgio); - ret = pgio.pg_error < 0 ? pgio.pg_error : 0; - if (!ret) { - ret = folio_wait_locked_killable(folio); - if (!folio_test_uptodate(folio) && !ret) - ret = xchg(&ctx->error, 0); - } -out_put: - put_nfs_open_context(ctx); + ret = nfs_do_read_folio(file, folio); out: - trace_nfs_aop_readpage_done(inode, folio, ret); + trace_nfs_aop_readpage_done(inode, pos, len, ret); return ret; out_unlock: folio_unlock(folio); @@ -428,6 +442,7 @@ void nfs_readahead(struct readahead_control *ractl) } nfs_pageio_complete_read(&pgio); + nfs_update_delegated_atime(inode); put_nfs_open_context(ctx); out: diff --git a/fs/nfs/super.c b/fs/nfs/super.c index dc03f98f7616..9eea9e62afc9 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -47,6 +47,7 @@ #include <linux/vfs.h> #include <linux/inet.h> #include <linux/in6.h> +#include <linux/sched.h> #include <linux/slab.h> #include <net/ipv6.h> #include <linux/netdevice.h> @@ -72,6 +73,7 @@ #include "nfs.h" #include "netns.h" #include "sysfs.h" +#include "nfs4idmap.h" #define NFSDBG_FACILITY NFSDBG_VFS @@ -228,6 +230,7 @@ static int __nfs_list_for_each_server(struct list_head *head, ret = fn(server, data); if (ret) goto out; + cond_resched(); rcu_read_lock(); } rcu_read_unlock(); @@ -451,8 +454,12 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, { NFS_MOUNT_NONLM, ",nolock", "" }, { NFS_MOUNT_NOACL, ",noacl", "" }, { NFS_MOUNT_NORDIRPLUS, ",nordirplus", "" }, + { NFS_MOUNT_FORCE_RDIRPLUS, ",rdirplus=force", "" }, { NFS_MOUNT_UNSHARED, ",nosharecache", "" }, { NFS_MOUNT_NORESVPORT, ",noresvport", "" }, + { NFS_MOUNT_NETUNREACH_FATAL, + ",fatal_neterrors=ENETDOWN:ENETUNREACH", + ",fatal_neterrors=none" }, { 0, NULL, NULL } }; const struct proc_nfs_info *nfs_infop; @@ -549,6 +556,9 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, else seq_puts(m, ",local_lock=posix"); + if (nfss->flags & NFS_MOUNT_NO_ALIGNWRITE) + seq_puts(m, ",noalignwrite"); + if (nfss->flags & NFS_MOUNT_WRITE_EAGER) { if (nfss->flags & NFS_MOUNT_WRITE_WAIT) seq_puts(m, ",write=wait"); @@ -880,7 +890,15 @@ static int nfs_request_mount(struct fs_context *fc, * Now ask the mount server to map our export path * to a file handle. */ - status = nfs_mount(&request, ctx->timeo, ctx->retrans); + if ((request.protocol == XPRT_TRANSPORT_UDP) == + !(ctx->flags & NFS_MOUNT_TCP)) + /* + * NFS protocol and mount protocol are both UDP or neither UDP + * so timeouts are compatible. Use NFS timeouts for MOUNT + */ + status = nfs_mount(&request, ctx->timeo, ctx->retrans); + else + status = nfs_mount(&request, NFS_UNSPEC_TIMEO, NFS_UNSPEC_RETRANS); if (status != 0) { dfprintk(MOUNT, "NFS: unable to mount server %s, error %d\n", request.hostname, status); @@ -901,6 +919,16 @@ static struct nfs_server *nfs_try_mount_request(struct fs_context *fc) rpc_authflavor_t authlist[NFS_MAX_SECFLAVORS]; unsigned int authlist_len = ARRAY_SIZE(authlist); + /* make sure 'nolock'/'lock' override the 'local_lock' mount option */ + if (ctx->lock_status) { + if (ctx->lock_status == NFS_LOCK_NOLOCK) { + ctx->flags |= NFS_MOUNT_NONLM; + ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL); + } else { + ctx->flags &= ~NFS_MOUNT_NONLM; + ctx->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL); + } + } status = nfs_request_mount(fc, ctx->mntfh, authlist, &authlist_len); if (status) return ERR_PTR(status); diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c index 0e27a2e4e68b..58146e935402 100644 --- a/fs/nfs/symlink.c +++ b/fs/nfs/symlink.c @@ -32,47 +32,39 @@ static int nfs_symlink_filler(struct file *file, struct folio *folio) int error; error = NFS_PROTO(inode)->readlink(inode, &folio->page, 0, PAGE_SIZE); - if (error < 0) - goto error; - folio_mark_uptodate(folio); - folio_unlock(folio); - return 0; - -error: - folio_set_error(folio); - folio_unlock(folio); - return -EIO; + folio_end_read(folio, error == 0); + return error; } static const char *nfs_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { - struct page *page; + struct folio *folio; void *err; if (!dentry) { err = ERR_PTR(nfs_revalidate_mapping_rcu(inode)); if (err) return err; - page = find_get_page(inode->i_mapping, 0); - if (!page) + folio = filemap_get_folio(inode->i_mapping, 0); + if (IS_ERR(folio)) return ERR_PTR(-ECHILD); - if (!PageUptodate(page)) { - put_page(page); + if (!folio_test_uptodate(folio)) { + folio_put(folio); return ERR_PTR(-ECHILD); } } else { err = ERR_PTR(nfs_revalidate_mapping(inode, inode->i_mapping)); if (err) return err; - page = read_cache_page(&inode->i_data, 0, nfs_symlink_filler, + folio = read_cache_folio(&inode->i_data, 0, nfs_symlink_filler, NULL); - if (IS_ERR(page)) - return ERR_CAST(page); + if (IS_ERR(folio)) + return ERR_CAST(folio); } - set_delayed_call(done, page_put_link, page); - return page_address(page); + set_delayed_call(done, page_put_link, folio); + return folio_address(folio); } /* diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c index e645be1a3381..f579df0e8d67 100644 --- a/fs/nfs/sysctl.c +++ b/fs/nfs/sysctl.c @@ -14,7 +14,7 @@ static struct ctl_table_header *nfs_callback_sysctl_table; -static struct ctl_table nfs_cb_sysctls[] = { +static const struct ctl_table nfs_cb_sysctls[] = { { .procname = "nfs_mountpoint_timeout", .data = &nfs_mountpoint_expiry_timeout, diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c index bf378ecd5d9f..37cb2b776435 100644 --- a/fs/nfs/sysfs.c +++ b/fs/nfs/sysfs.c @@ -14,6 +14,7 @@ #include <linux/rcupdate.h> #include <linux/lockd/lockd.h> +#include "internal.h" #include "nfs4_fs.h" #include "netns.h" #include "sysfs.h" @@ -228,6 +229,25 @@ static void shutdown_client(struct rpc_clnt *clnt) rpc_cancel_tasks(clnt, -EIO, shutdown_match_client, NULL); } +/* + * Shut down the nfs_client only once all the superblocks + * have been shut down. + */ +static void shutdown_nfs_client(struct nfs_client *clp) +{ + struct nfs_server *server; + rcu_read_lock(); + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { + if (!(server->flags & NFS_MOUNT_SHUTDOWN)) { + rcu_read_unlock(); + return; + } + } + rcu_read_unlock(); + nfs_mark_client_ready(clp, -EIO); + shutdown_client(clp->cl_rpcclient); +} + static ssize_t shutdown_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) @@ -259,7 +279,6 @@ shutdown_store(struct kobject *kobj, struct kobj_attribute *attr, server->flags |= NFS_MOUNT_SHUTDOWN; shutdown_client(server->client); - shutdown_client(server->nfs_client->cl_rpcclient); if (!IS_ERR(server->client_acl)) shutdown_client(server->client_acl); @@ -267,11 +286,44 @@ shutdown_store(struct kobject *kobj, struct kobj_attribute *attr, if (server->nlm_host) shutdown_client(server->nlm_host->h_rpcclnt); out: + shutdown_nfs_client(server->nfs_client); return count; } static struct kobj_attribute nfs_sysfs_attr_shutdown = __ATTR_RW(shutdown); +#if IS_ENABLED(CONFIG_NFS_V4_1) +static ssize_t +implid_domain_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct nfs_server *server = container_of(kobj, struct nfs_server, kobj); + struct nfs41_impl_id *impl_id = server->nfs_client->cl_implid; + + if (!impl_id || strlen(impl_id->domain) == 0) + return 0; //sysfs_emit(buf, ""); + return sysfs_emit(buf, "%s\n", impl_id->domain); +} + +static struct kobj_attribute nfs_sysfs_attr_implid_domain = __ATTR_RO(implid_domain); + + +static ssize_t +implid_name_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct nfs_server *server = container_of(kobj, struct nfs_server, kobj); + struct nfs41_impl_id *impl_id = server->nfs_client->cl_implid; + + if (!impl_id || strlen(impl_id->name) == 0) + return 0; //sysfs_emit(buf, ""); + return sysfs_emit(buf, "%s\n", impl_id->name); +} + +static struct kobj_attribute nfs_sysfs_attr_implid_name = __ATTR_RO(implid_name); + +#endif /* IS_ENABLED(CONFIG_NFS_V4_1) */ + #define RPC_CLIENT_NAME_SIZE 64 void nfs_sysfs_link_rpc_client(struct nfs_server *server, @@ -280,9 +332,9 @@ void nfs_sysfs_link_rpc_client(struct nfs_server *server, char name[RPC_CLIENT_NAME_SIZE]; int ret; - strcpy(name, clnt->cl_program->name); - strcat(name, uniq ? uniq : ""); - strcat(name, "_client"); + strscpy(name, clnt->cl_program->name, sizeof(name)); + strncat(name, uniq ? uniq : "", sizeof(name) - strlen(name) - 1); + strncat(name, "_client", sizeof(name) - strlen(name) - 1); ret = sysfs_create_link_nowarn(&server->kobj, &clnt->cl_sysfs->kobject, name); @@ -309,6 +361,32 @@ static struct kobj_type nfs_sb_ktype = { .child_ns_type = nfs_netns_object_child_ns_type, }; +#if IS_ENABLED(CONFIG_NFS_V4_1) +static void nfs_sysfs_add_nfsv41_server(struct nfs_server *server) +{ + int ret; + + if (!server->nfs_client->cl_implid) + return; + + ret = sysfs_create_file_ns(&server->kobj, &nfs_sysfs_attr_implid_domain.attr, + nfs_netns_server_namespace(&server->kobj)); + if (ret < 0) + pr_warn("NFS: sysfs_create_file_ns for server-%d failed (%d)\n", + server->s_sysfs_id, ret); + + ret = sysfs_create_file_ns(&server->kobj, &nfs_sysfs_attr_implid_name.attr, + nfs_netns_server_namespace(&server->kobj)); + if (ret < 0) + pr_warn("NFS: sysfs_create_file_ns for server-%d failed (%d)\n", + server->s_sysfs_id, ret); +} +#else /* CONFIG_NFS_V4_1 */ +static inline void nfs_sysfs_add_nfsv41_server(struct nfs_server *server) +{ +} +#endif /* CONFIG_NFS_V4_1 */ + void nfs_sysfs_add_server(struct nfs_server *server) { int ret; @@ -325,6 +403,8 @@ void nfs_sysfs_add_server(struct nfs_server *server) if (ret < 0) pr_warn("NFS: sysfs_create_file_ns for server-%d failed (%d)\n", server->s_sysfs_id, ret); + + nfs_sysfs_add_nfsv41_server(server); } EXPORT_SYMBOL_GPL(nfs_sysfs_add_server); diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 0110299643a2..b55467911648 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -232,6 +232,8 @@ nfs_complete_unlink(struct dentry *dentry, struct inode *inode) dentry->d_fsdata = NULL; spin_unlock(&dentry->d_lock); + NFS_PROTO(inode)->return_delegation(inode); + if (NFS_STALE(inode) || !nfs_call_unlink(dentry, inode, data)) nfs_free_unlinkdata(data); } @@ -462,18 +464,17 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) sdentry = NULL; do { - int slen; dput(sdentry); sillycounter++; - slen = scnprintf(silly, sizeof(silly), - SILLYNAME_PREFIX "%0*llx%0*x", - SILLYNAME_FILEID_LEN, fileid, - SILLYNAME_COUNTER_LEN, sillycounter); + scnprintf(silly, sizeof(silly), + SILLYNAME_PREFIX "%0*llx%0*x", + SILLYNAME_FILEID_LEN, fileid, + SILLYNAME_COUNTER_LEN, sillycounter); dfprintk(VFS, "NFS: trying to rename %pd to %s\n", dentry, silly); - sdentry = lookup_one_len(silly, dentry->d_parent, slen); + sdentry = lookup_noperm(&QSTR(silly), dentry->d_parent); /* * N.B. Better to return EBUSY here ... it could be * dangerous to delete the file while it's in use. diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 5de85d725fb9..23df8b214474 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -63,9 +63,6 @@ static void nfs_clear_request_commit(struct nfs_commit_info *cinfo, struct nfs_page *req); static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo, struct inode *inode); -static struct nfs_page * -nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi, - struct folio *folio); static struct kmem_cache *nfs_wdata_cachep; static mempool_t *nfs_wdata_mempool; @@ -172,28 +169,23 @@ nfs_cancel_remove_inode(struct nfs_page *req, struct inode *inode) return 0; } -static struct nfs_page *nfs_folio_private_request(struct folio *folio) -{ - return folio_get_private(folio); -} - /** - * nfs_folio_find_private_request - find head request associated with a folio + * nfs_folio_find_head_request - find head request associated with a folio * @folio: pointer to folio * * must be called while holding the inode lock. * * returns matching head request with reference held, or NULL if not found. */ -static struct nfs_page *nfs_folio_find_private_request(struct folio *folio) +static struct nfs_page *nfs_folio_find_head_request(struct folio *folio) { - struct address_space *mapping = folio_file_mapping(folio); + struct address_space *mapping = folio->mapping; struct nfs_page *req; if (!folio_test_private(folio)) return NULL; spin_lock(&mapping->i_private_lock); - req = nfs_folio_private_request(folio); + req = folio->private; if (req) { WARN_ON_ONCE(req->wb_head != req); kref_get(&req->wb_kref); @@ -202,86 +194,20 @@ static struct nfs_page *nfs_folio_find_private_request(struct folio *folio) return req; } -static struct nfs_page *nfs_folio_find_swap_request(struct folio *folio) -{ - struct inode *inode = folio_file_mapping(folio)->host; - struct nfs_inode *nfsi = NFS_I(inode); - struct nfs_page *req = NULL; - if (!folio_test_swapcache(folio)) - return NULL; - mutex_lock(&nfsi->commit_mutex); - if (folio_test_swapcache(folio)) { - req = nfs_page_search_commits_for_head_request_locked(nfsi, - folio); - if (req) { - WARN_ON_ONCE(req->wb_head != req); - kref_get(&req->wb_kref); - } - } - mutex_unlock(&nfsi->commit_mutex); - return req; -} - -/** - * nfs_folio_find_head_request - find head request associated with a folio - * @folio: pointer to folio - * - * returns matching head request with reference held, or NULL if not found. - */ -static struct nfs_page *nfs_folio_find_head_request(struct folio *folio) -{ - struct nfs_page *req; - - req = nfs_folio_find_private_request(folio); - if (!req) - req = nfs_folio_find_swap_request(folio); - return req; -} - -static struct nfs_page *nfs_folio_find_and_lock_request(struct folio *folio) -{ - struct inode *inode = folio_file_mapping(folio)->host; - struct nfs_page *req, *head; - int ret; - - for (;;) { - req = nfs_folio_find_head_request(folio); - if (!req) - return req; - head = nfs_page_group_lock_head(req); - if (head != req) - nfs_release_request(req); - if (IS_ERR(head)) - return head; - ret = nfs_cancel_remove_inode(head, inode); - if (ret < 0) { - nfs_unlock_and_release_request(head); - return ERR_PTR(ret); - } - /* Ensure that nobody removed the request before we locked it */ - if (head == nfs_folio_private_request(folio)) - break; - if (folio_test_swapcache(folio)) - break; - nfs_unlock_and_release_request(head); - } - return head; -} - /* Adjust the file length if we're writing beyond the end */ static void nfs_grow_file(struct folio *folio, unsigned int offset, unsigned int count) { - struct inode *inode = folio_file_mapping(folio)->host; + struct inode *inode = folio->mapping->host; loff_t end, i_size; pgoff_t end_index; spin_lock(&inode->i_lock); i_size = i_size_read(inode); end_index = ((i_size - 1) >> folio_shift(folio)) << folio_order(folio); - if (i_size > 0 && folio_index(folio) < end_index) + if (i_size > 0 && folio->index < end_index) goto out; - end = folio_file_pos(folio) + (loff_t)offset + (loff_t)count; + end = folio_pos(folio) + (loff_t)offset + (loff_t)count; if (i_size >= end) goto out; trace_nfs_size_grow(inode, end); @@ -289,6 +215,8 @@ static void nfs_grow_file(struct folio *folio, unsigned int offset, NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_SIZE; nfs_inc_stats(inode, NFSIOS_EXTENDWRITE); out: + /* Atomically update timestamps if they are delegated to us. */ + nfs_update_delegated_mtime_locked(inode); spin_unlock(&inode->i_lock); nfs_fscache_invalidate(inode, 0); } @@ -309,9 +237,8 @@ static void nfs_set_pageerror(struct address_space *mapping) static void nfs_mapping_set_error(struct folio *folio, int error) { - struct address_space *mapping = folio_file_mapping(folio); + struct address_space *mapping = folio->mapping; - folio_set_error(folio); filemap_set_wb_err(mapping, error); if (mapping->host) errseq_set(&mapping->host->i_sb->s_wb_err, @@ -410,7 +337,7 @@ int nfs_congestion_kb; static void nfs_folio_set_writeback(struct folio *folio) { - struct nfs_server *nfss = NFS_SERVER(folio_file_mapping(folio)->host); + struct nfs_server *nfss = NFS_SERVER(folio->mapping->host); folio_start_writeback(folio); if (atomic_long_inc_return(&nfss->writeback) > NFS_CONGESTION_ON_THRESH) @@ -419,12 +346,14 @@ static void nfs_folio_set_writeback(struct folio *folio) static void nfs_folio_end_writeback(struct folio *folio) { - struct nfs_server *nfss = NFS_SERVER(folio_file_mapping(folio)->host); + struct nfs_server *nfss = NFS_SERVER(folio->mapping->host); folio_end_writeback(folio); if (atomic_long_dec_return(&nfss->writeback) < - NFS_CONGESTION_OFF_THRESH) + NFS_CONGESTION_OFF_THRESH) { nfss->write_congested = 0; + wake_up_all(&nfss->write_congestion_wait); + } } static void nfs_page_end_writeback(struct nfs_page *req) @@ -548,6 +477,74 @@ void nfs_join_page_group(struct nfs_page *head, struct nfs_commit_info *cinfo, nfs_destroy_unlinked_subrequests(destroy_list, head, inode); } +/** + * nfs_wait_on_request - Wait for a request to complete. + * @req: request to wait upon. + * + * Interruptible by fatal signals only. + * The user is responsible for holding a count on the request. + */ +static int nfs_wait_on_request(struct nfs_page *req) +{ + if (!test_bit(PG_BUSY, &req->wb_flags)) + return 0; + set_bit(PG_CONTENDED2, &req->wb_flags); + smp_mb__after_atomic(); + return wait_on_bit_io(&req->wb_flags, PG_BUSY, + TASK_UNINTERRUPTIBLE); +} + +/* + * nfs_unroll_locks - unlock all newly locked reqs and wait on @req + * @head: head request of page group, must be holding head lock + * @req: request that couldn't lock and needs to wait on the req bit lock + * + * This is a helper function for nfs_lock_and_join_requests + * returns 0 on success, < 0 on error. + */ +static void +nfs_unroll_locks(struct nfs_page *head, struct nfs_page *req) +{ + struct nfs_page *tmp; + + /* relinquish all the locks successfully grabbed this run */ + for (tmp = head->wb_this_page ; tmp != req; tmp = tmp->wb_this_page) { + if (!kref_read(&tmp->wb_kref)) + continue; + nfs_unlock_and_release_request(tmp); + } +} + +/* + * nfs_page_group_lock_subreq - try to lock a subrequest + * @head: head request of page group + * @subreq: request to lock + * + * This is a helper function for nfs_lock_and_join_requests which + * must be called with the head request and page group both locked. + * On error, it returns with the page group unlocked. + */ +static int +nfs_page_group_lock_subreq(struct nfs_page *head, struct nfs_page *subreq) +{ + int ret; + + if (!kref_get_unless_zero(&subreq->wb_kref)) + return 0; + while (!nfs_lock_request(subreq)) { + nfs_page_group_unlock(head); + ret = nfs_wait_on_request(subreq); + if (!ret) + ret = nfs_page_group_lock(head); + if (ret < 0) { + nfs_unroll_locks(head, subreq); + nfs_release_request(subreq); + return ret; + } + } + return 0; +} + /* * nfs_lock_and_join_requests - join all subreqs to the head req * @folio: the folio used to lookup the "page group" of nfs_page structures @@ -565,31 +562,61 @@ void nfs_join_page_group(struct nfs_page *head, struct nfs_commit_info *cinfo, */ static struct nfs_page *nfs_lock_and_join_requests(struct folio *folio) { - struct inode *inode = folio_file_mapping(folio)->host; - struct nfs_page *head; + struct inode *inode = folio->mapping->host; + struct nfs_page *head, *subreq; struct nfs_commit_info cinfo; int ret; - nfs_init_cinfo_from_inode(&cinfo, inode); /* * A reference is taken only on the head request which acts as a * reference to the whole page group - the group will not be destroyed * until the head reference is released. */ - head = nfs_folio_find_and_lock_request(folio); - if (IS_ERR_OR_NULL(head)) - return head; +retry: + head = nfs_folio_find_head_request(folio); + if (!head) + return NULL; - /* lock each request in the page group */ - ret = nfs_page_group_lock_subrequests(head); - if (ret < 0) { + while (!nfs_lock_request(head)) { + ret = nfs_wait_on_request(head); + if (ret < 0) { + nfs_release_request(head); + return ERR_PTR(ret); + } + } + + /* Ensure that nobody removed the request before we locked it */ + if (head != folio->private) { nfs_unlock_and_release_request(head); - return ERR_PTR(ret); + goto retry; } - nfs_join_page_group(head, &cinfo, inode); + ret = nfs_cancel_remove_inode(head, inode); + if (ret < 0) + goto out_unlock; + + ret = nfs_page_group_lock(head); + if (ret < 0) + goto out_unlock; + /* lock each request in the page group */ + for (subreq = head->wb_this_page; + subreq != head; + subreq = subreq->wb_this_page) { + ret = nfs_page_group_lock_subreq(head, subreq); + if (ret < 0) + goto out_unlock; + } + + nfs_page_group_unlock(head); + + nfs_init_cinfo_from_inode(&cinfo, inode); + nfs_join_page_group(head, &cinfo, inode); return head; + +out_unlock: + nfs_unlock_and_release_request(head); + return ERR_PTR(ret); } static void nfs_write_error(struct nfs_page *req, int error) @@ -641,7 +668,7 @@ static int nfs_page_async_flush(struct folio *folio, nfs_redirty_request(req); pgio->pg_error = 0; } else - nfs_add_stats(folio_file_mapping(folio)->host, + nfs_add_stats(folio->mapping->host, NFSIOS_WRITEPAGES, 1); out: return ret; @@ -653,7 +680,7 @@ out_launder: static int nfs_do_writepage(struct folio *folio, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio) { - nfs_pageio_cond_complete(pgio, folio_index(folio)); + nfs_pageio_cond_complete(pgio, folio->index); return nfs_page_async_flush(folio, wbc, pgio); } @@ -664,7 +691,7 @@ static int nfs_writepage_locked(struct folio *folio, struct writeback_control *wbc) { struct nfs_pageio_descriptor pgio; - struct inode *inode = folio_file_mapping(folio)->host; + struct inode *inode = folio->mapping->host; int err; nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); @@ -698,12 +725,17 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) struct nfs_pageio_descriptor pgio; struct nfs_io_completion *ioc = NULL; unsigned int mntflags = NFS_SERVER(inode)->flags; + struct nfs_server *nfss = NFS_SERVER(inode); int priority = 0; int err; - if (wbc->sync_mode == WB_SYNC_NONE && - NFS_SERVER(inode)->write_congested) - return 0; + /* Wait with writeback until write congestion eases */ + if (wbc->sync_mode == WB_SYNC_NONE && nfss->write_congested) { + err = wait_event_killable(nfss->write_congestion_wait, + nfss->write_congested == 0); + if (err) + return err; + } nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); @@ -742,24 +774,17 @@ out_err: static void nfs_inode_add_request(struct nfs_page *req) { struct folio *folio = nfs_page_to_folio(req); - struct address_space *mapping = folio_file_mapping(folio); + struct address_space *mapping = folio->mapping; struct nfs_inode *nfsi = NFS_I(mapping->host); WARN_ON_ONCE(req->wb_this_page != req); /* Lock the request! */ nfs_lock_request(req); - - /* - * Swap-space should not get truncated. Hence no need to plug the race - * with invalidate/truncate. - */ spin_lock(&mapping->i_private_lock); - if (likely(!folio_test_swapcache(folio))) { - set_bit(PG_MAPPED, &req->wb_flags); - folio_set_private(folio); - folio->private = req; - } + set_bit(PG_MAPPED, &req->wb_flags); + folio_set_private(folio); + folio->private = req; spin_unlock(&mapping->i_private_lock); atomic_long_inc(&nfsi->nrequests); /* this a head request for a page group - mark it as having an @@ -779,10 +804,10 @@ static void nfs_inode_remove_request(struct nfs_page *req) if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) { struct folio *folio = nfs_page_to_folio(req->wb_head); - struct address_space *mapping = folio_file_mapping(folio); + struct address_space *mapping = folio->mapping; spin_lock(&mapping->i_private_lock); - if (likely(folio && !folio_test_swapcache(folio))) { + if (likely(folio)) { folio->private = NULL; folio_clear_private(folio); clear_bit(PG_MAPPED, &req->wb_head->wb_flags); @@ -803,38 +828,6 @@ static void nfs_mark_request_dirty(struct nfs_page *req) filemap_dirty_folio(folio_mapping(folio), folio); } -/* - * nfs_page_search_commits_for_head_request_locked - * - * Search through commit lists on @inode for the head request for @folio. - * Must be called while holding the inode (which is cinfo) lock. - * - * Returns the head request if found, or NULL if not found. - */ -static struct nfs_page * -nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi, - struct folio *folio) -{ - struct nfs_page *freq, *t; - struct nfs_commit_info cinfo; - struct inode *inode = &nfsi->vfs_inode; - - nfs_init_cinfo_from_inode(&cinfo, inode); - - /* search through pnfs commit lists */ - freq = pnfs_search_commit_reqs(inode, &cinfo, folio); - if (freq) - return freq->wb_head; - - /* Linearly search the commit list for the correct request */ - list_for_each_entry_safe(freq, t, &cinfo.mds->list, wb_list) { - if (nfs_page_to_folio(freq) == folio) - return freq->wb_head; - } - - return NULL; -} - /** * nfs_request_add_commit_list_locked - add request to a commit list * @req: pointer to a struct nfs_page @@ -941,7 +934,7 @@ static void nfs_folio_clear_commit(struct folio *folio) long nr = folio_nr_pages(folio); node_stat_mod_folio(folio, NR_WRITEBACK, -nr); - wb_stat_mod(&inode_to_bdi(folio_file_mapping(folio)->host)->wb, + wb_stat_mod(&inode_to_bdi(folio->mapping->host)->wb, WB_WRITEBACK, -nr); } } @@ -1126,7 +1119,7 @@ out_flushme: */ nfs_mark_request_dirty(req); nfs_unlock_and_release_request(req); - error = nfs_wb_folio(folio_file_mapping(folio)->host, folio); + error = nfs_wb_folio(folio->mapping->host, folio); return (error < 0) ? ERR_PTR(error) : NULL; } @@ -1202,7 +1195,7 @@ int nfs_flush_incompatible(struct file *file, struct folio *folio) nfs_release_request(req); if (!do_flush) return 0; - status = nfs_wb_folio(folio_file_mapping(folio)->host, folio); + status = nfs_wb_folio(folio->mapping->host, folio); } while (status == 0); return status; } @@ -1276,7 +1269,7 @@ out: */ static bool nfs_folio_write_uptodate(struct folio *folio, unsigned int pagelen) { - struct inode *inode = folio_file_mapping(folio)->host; + struct inode *inode = folio->mapping->host; struct nfs_inode *nfsi = NFS_I(inode); if (nfs_have_delegated_attributes(inode)) @@ -1315,12 +1308,15 @@ static int nfs_can_extend_write(struct file *file, struct folio *folio, struct file_lock_context *flctx = locks_inode_context(inode); struct file_lock *fl; int ret; + unsigned int mntflags = NFS_SERVER(inode)->flags; + if (mntflags & NFS_MOUNT_NO_ALIGNWRITE) + return 0; if (file->f_flags & O_DSYNC) return 0; if (!nfs_folio_write_uptodate(folio, pagelen)) return 0; - if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) + if (nfs_have_write_delegation(inode)) return 1; if (!flctx || (list_empty_careful(&flctx->flc_flock) && list_empty_careful(&flctx->flc_posix))) @@ -1354,7 +1350,7 @@ int nfs_update_folio(struct file *file, struct folio *folio, unsigned int offset, unsigned int count) { struct nfs_open_context *ctx = nfs_file_open_context(file); - struct address_space *mapping = folio_file_mapping(folio); + struct address_space *mapping = folio->mapping; struct inode *inode = mapping->host; unsigned int pagelen = nfs_folio_length(folio); int status = 0; @@ -1362,14 +1358,18 @@ int nfs_update_folio(struct file *file, struct folio *folio, nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE); dprintk("NFS: nfs_update_folio(%pD2 %d@%lld)\n", file, count, - (long long)(folio_file_pos(folio) + offset)); + (long long)(folio_pos(folio) + offset)); if (!count) goto out; if (nfs_can_extend_write(file, folio, pagelen)) { - count = max(count + offset, pagelen); - offset = 0; + unsigned int end = count + offset; + + offset = round_down(offset, PAGE_SIZE); + if (end < pagelen) + end = min(round_up(end, PAGE_SIZE), pagelen); + count = end - offset; } status = nfs_writepage_setup(ctx, folio, offset, count); @@ -1514,6 +1514,13 @@ void nfs_writeback_update_inode(struct nfs_pgio_header *hdr) struct nfs_fattr *fattr = &hdr->fattr; struct inode *inode = hdr->inode; + if (nfs_have_delegated_mtime(inode)) { + spin_lock(&inode->i_lock); + nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS); + spin_unlock(&inode->i_lock); + return; + } + spin_lock(&inode->i_lock); nfs_writeback_check_extend(hdr, fattr); nfs_post_op_update_inode_force_wcc_locked(inode, fattr); @@ -1670,7 +1677,8 @@ EXPORT_SYMBOL_GPL(nfs_commitdata_release); int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, const struct nfs_rpc_ops *nfs_ops, const struct rpc_call_ops *call_ops, - int how, int flags) + int how, int flags, + struct nfsd_file *localio) { struct rpc_task *task; int priority = flush_task_priority(how); @@ -1699,6 +1707,9 @@ int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, dprintk("NFS: initiated commit call\n"); + if (localio) + return nfs_local_commit(localio, data, call_ops, how); + task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); @@ -1798,6 +1809,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how, struct nfs_commit_info *cinfo) { struct nfs_commit_data *data; + struct nfsd_file *localio; unsigned short task_flags = 0; /* another commit raced with us */ @@ -1814,9 +1826,13 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how, nfs_init_commit(data, head, NULL, cinfo); if (NFS_SERVER(inode)->nfs_client->cl_minorversion) task_flags = RPC_TASK_MOVEABLE; + + localio = nfs_local_open_fh(NFS_SERVER(inode)->nfs_client, data->cred, + data->args.fh, &data->context->nfl, + data->context->mode); return nfs_initiate_commit(NFS_CLIENT(inode), data, NFS_PROTO(inode), data->mds_ops, how, - RPC_TASK_CRED_NOREF | task_flags); + RPC_TASK_CRED_NOREF | task_flags, localio); } /* @@ -1837,7 +1853,6 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) struct nfs_page *req; int status = data->task.tk_status; struct nfs_commit_info cinfo; - struct nfs_server *nfss; struct folio *folio; while (!list_empty(&data->pages)) { @@ -1880,9 +1895,6 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) /* Latency breaker */ cond_resched(); } - nfss = NFS_SERVER(data->inode); - if (atomic_long_read(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) - nfss->write_congested = 0; nfs_init_cinfo(&cinfo, data->inode, data->dreq); nfs_commit_end(cinfo.mds); @@ -2073,17 +2085,17 @@ int nfs_wb_folio_cancel(struct inode *inode, struct folio *folio) */ int nfs_wb_folio(struct inode *inode, struct folio *folio) { - loff_t range_start = folio_file_pos(folio); - loff_t range_end = range_start + (loff_t)folio_size(folio) - 1; + loff_t range_start = folio_pos(folio); + size_t len = folio_size(folio); struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, .nr_to_write = 0, .range_start = range_start, - .range_end = range_end, + .range_end = range_start + len - 1, }; int ret; - trace_nfs_writeback_folio(inode, folio); + trace_nfs_writeback_folio(inode, range_start, len); for (;;) { folio_wait_writeback(folio); @@ -2101,7 +2113,7 @@ int nfs_wb_folio(struct inode *inode, struct folio *folio) goto out_error; } out_error: - trace_nfs_writeback_folio_done(inode, folio, ret); + trace_nfs_writeback_folio_done(inode, range_start, len, ret); return ret; } @@ -2120,10 +2132,10 @@ int nfs_migrate_folio(struct address_space *mapping, struct folio *dst, if (folio_test_private(src)) return -EBUSY; - if (folio_test_fscache(src)) { + if (folio_test_private_2(src)) { /* [DEPRECATED] */ if (mode == MIGRATE_ASYNC) return -EBUSY; - folio_wait_fscache(src); + folio_wait_private_2(src); } return migrate_folio(mapping, dst, src, mode); |