summaryrefslogtreecommitdiff
path: root/fs/nfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/nfs')
-rw-r--r--fs/nfs/Kconfig10
-rw-r--r--fs/nfs/Makefile1
-rw-r--r--fs/nfs/blocklayout/blocklayout.c38
-rw-r--r--fs/nfs/blocklayout/blocklayout.h9
-rw-r--r--fs/nfs/blocklayout/dev.c114
-rw-r--r--fs/nfs/callback.c6
-rw-r--r--fs/nfs/callback.h5
-rw-r--r--fs/nfs/callback_proc.c21
-rw-r--r--fs/nfs/callback_xdr.c48
-rw-r--r--fs/nfs/client.c104
-rw-r--r--fs/nfs/delegation.c190
-rw-r--r--fs/nfs/delegation.h46
-rw-r--r--fs/nfs/dir.c236
-rw-r--r--fs/nfs/direct.c49
-rw-r--r--fs/nfs/export.c3
-rw-r--r--fs/nfs/file.c53
-rw-r--r--fs/nfs/filelayout/filelayout.c31
-rw-r--r--fs/nfs/filelayout/filelayoutdev.c6
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c128
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.h1
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayoutdev.c12
-rw-r--r--fs/nfs/fs_context.c96
-rw-r--r--fs/nfs/fscache.c39
-rw-r--r--fs/nfs/fscache.h12
-rw-r--r--fs/nfs/getroot.c2
-rw-r--r--fs/nfs/inode.c180
-rw-r--r--fs/nfs/internal.h114
-rw-r--r--fs/nfs/io.c44
-rw-r--r--fs/nfs/iostat.h9
-rw-r--r--fs/nfs/localio.c867
-rw-r--r--fs/nfs/mount_clnt.c5
-rw-r--r--fs/nfs/namespace.c5
-rw-r--r--fs/nfs/netns.h6
-rw-r--r--fs/nfs/nfs.h4
-rw-r--r--fs/nfs/nfs2super.c1
-rw-r--r--fs/nfs/nfs2xdr.c70
-rw-r--r--fs/nfs/nfs3acl.c2
-rw-r--r--fs/nfs/nfs3client.c2
-rw-r--r--fs/nfs/nfs3proc.c93
-rw-r--r--fs/nfs/nfs3super.c1
-rw-r--r--fs/nfs/nfs3xdr.c108
-rw-r--r--fs/nfs/nfs42proc.c198
-rw-r--r--fs/nfs/nfs42xattr.c4
-rw-r--r--fs/nfs/nfs42xdr.c90
-rw-r--r--fs/nfs/nfs4_fs.h5
-rw-r--r--fs/nfs/nfs4client.c13
-rw-r--r--fs/nfs/nfs4proc.c497
-rw-r--r--fs/nfs/nfs4session.h4
-rw-r--r--fs/nfs/nfs4state.c65
-rw-r--r--fs/nfs/nfs4super.c1
-rw-r--r--fs/nfs/nfs4sysctl.c2
-rw-r--r--fs/nfs/nfs4trace.c7
-rw-r--r--fs/nfs/nfs4trace.h175
-rw-r--r--fs/nfs/nfs4xdr.c258
-rw-r--r--fs/nfs/nfstrace.h106
-rw-r--r--fs/nfs/pagelist.c139
-rw-r--r--fs/nfs/pnfs.c278
-rw-r--r--fs/nfs/pnfs.h60
-rw-r--r--fs/nfs/pnfs_dev.c3
-rw-r--r--fs/nfs/pnfs_nfs.c81
-rw-r--r--fs/nfs/proc.c29
-rw-r--r--fs/nfs/read.c83
-rw-r--r--fs/nfs/super.c30
-rw-r--r--fs/nfs/symlink.c32
-rw-r--r--fs/nfs/sysctl.c2
-rw-r--r--fs/nfs/sysfs.c88
-rw-r--r--fs/nfs/unlink.c13
-rw-r--r--fs/nfs/write.c352
68 files changed, 3814 insertions, 1572 deletions
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index f7e32d76e34d..07932ce9246c 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -2,8 +2,10 @@
config NFS_FS
tristate "NFS client support"
depends on INET && FILE_LOCKING && MULTIUSER
+ select CRC32
select LOCKD
select SUNRPC
+ select NFS_COMMON
select NFS_ACL_SUPPORT if NFS_V3_ACL
help
Choose Y here if you want to access files residing on other
@@ -33,12 +35,12 @@ config NFS_FS
config NFS_V2
tristate "NFS client support for NFS version 2"
depends on NFS_FS
- default y
+ default n
help
This option enables support for version 2 of the NFS protocol
(RFC 1094) in the kernel's NFS client.
- If unsure, say Y.
+ If unsure, say N.
config NFS_V3
tristate "NFS client support for NFS version 3"
@@ -169,7 +171,8 @@ config ROOT_NFS
config NFS_FSCACHE
bool "Provide NFS client caching support"
- depends on NFS_FS=m && NETFS_SUPPORT || NFS_FS=y && NETFS_SUPPORT=y
+ depends on NFS_FS
+ select NETFS_SUPPORT
select FSCACHE
help
Say Y here if you want NFS data to be cached locally on disc through
@@ -194,7 +197,6 @@ config NFS_USE_KERNEL_DNS
config NFS_DEBUG
bool
depends on NFS_FS && SUNRPC_DEBUG
- select CRC32
default y
config NFS_DISABLE_UDP_SUPPORT
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 5f6db37f461e..9fb2f2cac87e 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -13,6 +13,7 @@ nfs-y := client.o dir.o file.o getroot.o inode.o super.o \
nfs-$(CONFIG_ROOT_NFS) += nfsroot.o
nfs-$(CONFIG_SYSCTL) += sysctl.o
nfs-$(CONFIG_NFS_FSCACHE) += fscache.o
+nfs-$(CONFIG_NFS_LOCALIO) += localio.o
obj-$(CONFIG_NFS_V2) += nfsv2.o
nfsv2-y := nfs2super.o proc.o nfs2xdr.o
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 6be13e0ec170..47189476b553 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -564,25 +564,45 @@ bl_find_get_deviceid(struct nfs_server *server,
gfp_t gfp_mask)
{
struct nfs4_deviceid_node *node;
- unsigned long start, end;
+ int err = -ENODEV;
retry:
node = nfs4_find_get_deviceid(server, id, cred, gfp_mask);
if (!node)
return ERR_PTR(-ENODEV);
- if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags) == 0)
- return node;
+ /*
+ * Devices that are marked unavailable are left in the cache with a
+ * timeout to avoid sending GETDEVINFO after every LAYOUTGET, or
+ * constantly attempting to register the device. Once marked as
+ * unavailable they must be deleted and never reused.
+ */
+ if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags)) {
+ unsigned long end = jiffies;
+ unsigned long start = end - PNFS_DEVICE_RETRY_TIMEOUT;
+
+ if (!time_in_range(node->timestamp_unavailable, start, end)) {
+ /* Uncork subsequent GETDEVINFO operations for this device */
+ nfs4_delete_deviceid(node->ld, node->nfs_client, id);
+ goto retry;
+ }
+ goto out_put;
+ }
- end = jiffies;
- start = end - PNFS_DEVICE_RETRY_TIMEOUT;
- if (!time_in_range(node->timestamp_unavailable, start, end)) {
- nfs4_delete_deviceid(node->ld, node->nfs_client, id);
- goto retry;
+ if (!bl_register_dev(container_of(node, struct pnfs_block_dev, node))) {
+ /*
+ * If we cannot register, treat this device as transient:
+ * Make a negative cache entry for the device
+ */
+ nfs4_mark_deviceid_unavailable(node);
+ goto out_put;
}
+ return node;
+
+out_put:
nfs4_put_deviceid_node(node);
- return ERR_PTR(-ENODEV);
+ return ERR_PTR(err);
}
static int
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index f1eeb4914199..6da40ca19570 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -104,20 +104,26 @@ struct pnfs_block_dev {
u64 start;
u64 len;
+ enum pnfs_block_volume_type type;
u32 nr_children;
struct pnfs_block_dev *children;
u64 chunk_size;
struct file *bdev_file;
u64 disk_offset;
+ unsigned long flags;
u64 pr_key;
- bool pr_registered;
bool (*map)(struct pnfs_block_dev *dev, u64 offset,
struct pnfs_block_dev_map *map);
};
+/* pnfs_block_dev flag bits */
+enum {
+ PNFS_BDEV_REGISTERED = 0,
+};
+
/* sector_t fields are all in 512-byte sectors */
struct pnfs_block_extent {
union {
@@ -172,6 +178,7 @@ struct bl_msg_hdr {
#define BL_DEVICE_REQUEST_ERR 0x2 /* User level process fails */
/* dev.c */
+bool bl_register_dev(struct pnfs_block_dev *d);
struct nfs4_deviceid_node *bl_alloc_deviceid_node(struct nfs_server *server,
struct pnfs_device *pdev, gfp_t gfp_mask);
void bl_free_deviceid_node(struct nfs4_deviceid_node *d);
diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c
index 93ef7f864980..cab8809f0e0f 100644
--- a/fs/nfs/blocklayout/dev.c
+++ b/fs/nfs/blocklayout/dev.c
@@ -10,12 +10,81 @@
#include <linux/pr.h>
#include "blocklayout.h"
+#include "../nfs4trace.h"
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
+static void bl_unregister_scsi(struct pnfs_block_dev *dev)
+{
+ struct block_device *bdev = file_bdev(dev->bdev_file);
+ const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
+ int status;
+
+ status = ops->pr_register(bdev, dev->pr_key, 0, false);
+ if (status)
+ trace_bl_pr_key_unreg_err(bdev, dev->pr_key, status);
+ else
+ trace_bl_pr_key_unreg(bdev, dev->pr_key);
+}
+
+static bool bl_register_scsi(struct pnfs_block_dev *dev)
+{
+ struct block_device *bdev = file_bdev(dev->bdev_file);
+ const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
+ int status;
+
+ if (test_and_set_bit(PNFS_BDEV_REGISTERED, &dev->flags))
+ return true;
+
+ status = ops->pr_register(bdev, 0, dev->pr_key, true);
+ if (status) {
+ trace_bl_pr_key_reg_err(bdev, dev->pr_key, status);
+ return false;
+ }
+ trace_bl_pr_key_reg(bdev, dev->pr_key);
+ return true;
+}
+
+static void bl_unregister_dev(struct pnfs_block_dev *dev)
+{
+ u32 i;
+
+ if (dev->nr_children) {
+ for (i = 0; i < dev->nr_children; i++)
+ bl_unregister_dev(&dev->children[i]);
+ return;
+ }
+
+ if (dev->type == PNFS_BLOCK_VOLUME_SCSI &&
+ test_and_clear_bit(PNFS_BDEV_REGISTERED, &dev->flags))
+ bl_unregister_scsi(dev);
+}
+
+bool bl_register_dev(struct pnfs_block_dev *dev)
+{
+ u32 i;
+
+ if (dev->nr_children) {
+ for (i = 0; i < dev->nr_children; i++) {
+ if (!bl_register_dev(&dev->children[i])) {
+ while (i > 0)
+ bl_unregister_dev(&dev->children[--i]);
+ return false;
+ }
+ }
+ return true;
+ }
+
+ if (dev->type == PNFS_BLOCK_VOLUME_SCSI)
+ return bl_register_scsi(dev);
+ return true;
+}
+
static void
bl_free_device(struct pnfs_block_dev *dev)
{
+ bl_unregister_dev(dev);
+
if (dev->nr_children) {
int i;
@@ -23,17 +92,6 @@ bl_free_device(struct pnfs_block_dev *dev)
bl_free_device(&dev->children[i]);
kfree(dev->children);
} else {
- if (dev->pr_registered) {
- const struct pr_ops *ops =
- file_bdev(dev->bdev_file)->bd_disk->fops->pr_ops;
- int error;
-
- error = ops->pr_register(file_bdev(dev->bdev_file),
- dev->pr_key, 0, false);
- if (error)
- pr_err("failed to unregister PR key.\n");
- }
-
if (dev->bdev_file)
fput(dev->bdev_file);
}
@@ -314,7 +372,7 @@ bl_open_path(struct pnfs_block_volume *v, const char *prefix)
bdev_file = bdev_file_open_by_path(devname, BLK_OPEN_READ | BLK_OPEN_WRITE,
NULL, NULL);
if (IS_ERR(bdev_file)) {
- pr_warn("pNFS: failed to open device %s (%ld)\n",
+ dprintk("failed to open device %s (%ld)\n",
devname, PTR_ERR(bdev_file));
}
@@ -327,8 +385,9 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
{
struct pnfs_block_volume *v = &volumes[idx];
- struct file *bdev_file;
+ struct block_device *bdev;
const struct pr_ops *ops;
+ struct file *bdev_file;
int error;
if (!bl_validate_designator(v))
@@ -344,35 +403,30 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
if (IS_ERR(bdev_file))
bdev_file = bl_open_path(v, "wwn-0x");
if (IS_ERR(bdev_file))
+ bdev_file = bl_open_path(v, "nvme-eui.");
+ if (IS_ERR(bdev_file)) {
+ pr_warn("pNFS: no device found for volume %*phN\n",
+ v->scsi.designator_len, v->scsi.designator);
return PTR_ERR(bdev_file);
+ }
d->bdev_file = bdev_file;
+ bdev = file_bdev(bdev_file);
- d->len = bdev_nr_bytes(file_bdev(d->bdev_file));
+ d->len = bdev_nr_bytes(bdev);
d->map = bl_map_simple;
d->pr_key = v->scsi.pr_key;
if (d->len == 0)
return -ENODEV;
- pr_info("pNFS: using block device %s (reservation key 0x%llx)\n",
- file_bdev(d->bdev_file)->bd_disk->disk_name, d->pr_key);
-
- ops = file_bdev(d->bdev_file)->bd_disk->fops->pr_ops;
+ ops = bdev->bd_disk->fops->pr_ops;
if (!ops) {
pr_err("pNFS: block device %s does not support reservations.",
- file_bdev(d->bdev_file)->bd_disk->disk_name);
+ bdev->bd_disk->disk_name);
error = -EINVAL;
goto out_blkdev_put;
}
- error = ops->pr_register(file_bdev(d->bdev_file), 0, d->pr_key, true);
- if (error) {
- pr_err("pNFS: failed to register key for block device %s.",
- file_bdev(d->bdev_file)->bd_disk->disk_name);
- goto out_blkdev_put;
- }
-
- d->pr_registered = true;
return 0;
out_blkdev_put:
@@ -458,7 +512,9 @@ static int
bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
{
- switch (volumes[idx].type) {
+ d->type = volumes[idx].type;
+
+ switch (d->type) {
case PNFS_BLOCK_VOLUME_SIMPLE:
return bl_parse_simple(server, d, volumes, idx, gfp_mask);
case PNFS_BLOCK_VOLUME_SLICE:
@@ -470,7 +526,7 @@ bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
case PNFS_BLOCK_VOLUME_SCSI:
return bl_parse_scsi(server, d, volumes, idx, gfp_mask);
default:
- dprintk("unsupported volume type: %d\n", volumes[idx].type);
+ dprintk("unsupported volume type: %d\n", d->type);
return -EIO;
}
}
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 8adfcd4c8c1a..86bdc7d23fb9 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -76,6 +76,8 @@ nfs4_callback_svc(void *vrqstp)
{
struct svc_rqst *rqstp = vrqstp;
+ svc_thread_init_status(rqstp, 0);
+
set_freezable();
while (!svc_thread_should_stop(rqstp))
@@ -209,10 +211,6 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion)
return ERR_PTR(-ENOMEM);
}
cb_info->serv = serv;
- /* As there is only one thread we need to over-ride the
- * default maximum of 80 connections
- */
- serv->sv_maxconn = 1024;
dprintk("nfs_callback_create_svc: service created\n");
return serv;
}
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index 650758ee0d5f..154a6ed1299f 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -46,14 +46,15 @@ struct cb_compound_hdr_res {
struct cb_getattrargs {
struct nfs_fh fh;
- uint32_t bitmap[2];
+ uint32_t bitmap[3];
};
struct cb_getattrres {
__be32 status;
- uint32_t bitmap[2];
+ uint32_t bitmap[3];
uint64_t size;
uint64_t change_attr;
+ struct timespec64 atime;
struct timespec64 ctime;
struct timespec64 mtime;
};
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 76cea34477ae..8397c43358bd 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -37,7 +37,7 @@ __be32 nfs4_callback_getattr(void *argp, void *resp,
if (!cps->clp) /* Always set for v4.0. Set in cb_sequence for v4.1 */
goto out;
- res->bitmap[0] = res->bitmap[1] = 0;
+ memset(res->bitmap, 0, sizeof(res->bitmap));
res->status = htonl(NFS4ERR_BADHANDLE);
dprintk_rcu("NFS: GETATTR callback request from %s\n",
@@ -59,12 +59,16 @@ __be32 nfs4_callback_getattr(void *argp, void *resp,
res->change_attr = delegation->change_attr;
if (nfs_have_writebacks(inode))
res->change_attr++;
+ res->atime = inode_get_atime(inode);
res->ctime = inode_get_ctime(inode);
res->mtime = inode_get_mtime(inode);
- res->bitmap[0] = (FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE) &
- args->bitmap[0];
- res->bitmap[1] = (FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY) &
- args->bitmap[1];
+ res->bitmap[0] = (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE) &
+ args->bitmap[0];
+ res->bitmap[1] = (FATTR4_WORD1_TIME_ACCESS |
+ FATTR4_WORD1_TIME_METADATA |
+ FATTR4_WORD1_TIME_MODIFY) & args->bitmap[1];
+ res->bitmap[2] = (FATTR4_WORD2_TIME_DELEG_ACCESS |
+ FATTR4_WORD2_TIME_DELEG_MODIFY) & args->bitmap[2];
res->status = 0;
out_iput:
rcu_read_unlock();
@@ -319,9 +323,10 @@ static u32 initiate_bulk_draining(struct nfs_client *clp,
int stat;
if (args->cbl_recall_type == RETURN_FSID)
- stat = pnfs_destroy_layouts_byfsid(clp, &args->cbl_fsid, true);
+ stat = pnfs_layout_destroy_byfsid(clp, &args->cbl_fsid,
+ PNFS_LAYOUT_BULK_RETURN);
else
- stat = pnfs_destroy_layouts_byclid(clp, true);
+ stat = pnfs_layout_destroy_byclid(clp, PNFS_LAYOUT_BULK_RETURN);
if (stat != 0)
return NFS4ERR_DELAY;
return NFS4ERR_NOMATCHING_LAYOUT;
@@ -713,7 +718,7 @@ __be32 nfs4_callback_offload(void *data, void *dummy,
copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_KERNEL);
if (!copy)
- return htonl(NFS4ERR_SERVERFAULT);
+ return cpu_to_be32(NFS4ERR_DELAY);
spin_lock(&cps->clp->cl_lock);
rcu_read_lock();
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 9369488f2ed4..4254ba3ee7c5 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -25,8 +25,9 @@
#define CB_OP_GETATTR_BITMAP_MAXSZ (4 * 4) // bitmap length, 3 bitmaps
#define CB_OP_GETATTR_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \
CB_OP_GETATTR_BITMAP_MAXSZ + \
- /* change, size, ctime, mtime */\
- (2 + 2 + 3 + 3) * 4)
+ /* change, size, atime, ctime,
+ * mtime, deleg_atime, deleg_mtime */\
+ (2 + 2 + 3 + 3 + 3 + 3 + 3) * 4)
#define CB_OP_RECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ)
#if defined(CONFIG_NFS_V4_1)
@@ -117,7 +118,9 @@ static __be32 decode_bitmap(struct xdr_stream *xdr, uint32_t *bitmap)
if (likely(attrlen > 0))
bitmap[0] = ntohl(*p++);
if (attrlen > 1)
- bitmap[1] = ntohl(*p);
+ bitmap[1] = ntohl(*p++);
+ if (attrlen > 2)
+ bitmap[2] = ntohl(*p);
return 0;
}
@@ -372,6 +375,8 @@ static __be32 decode_rc_list(struct xdr_stream *xdr,
rc_list->rcl_nrefcalls = ntohl(*p++);
if (rc_list->rcl_nrefcalls) {
+ if (unlikely(rc_list->rcl_nrefcalls > xdr->buf->len))
+ goto out;
p = xdr_inline_decode(xdr,
rc_list->rcl_nrefcalls * 2 * sizeof(uint32_t));
if (unlikely(p == NULL))
@@ -445,7 +450,7 @@ static __be32 decode_recallany_args(struct svc_rqst *rqstp,
void *argp)
{
struct cb_recallanyargs *args = argp;
- uint32_t bitmap[2];
+ uint32_t bitmap[3];
__be32 *p, status;
p = xdr_inline_decode(xdr, 4);
@@ -635,6 +640,13 @@ static __be32 encode_attr_time(struct xdr_stream *xdr, const struct timespec64 *
return 0;
}
+static __be32 encode_attr_atime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec64 *time)
+{
+ if (!(bitmap[1] & FATTR4_WORD1_TIME_ACCESS))
+ return 0;
+ return encode_attr_time(xdr,time);
+}
+
static __be32 encode_attr_ctime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec64 *time)
{
if (!(bitmap[1] & FATTR4_WORD1_TIME_METADATA))
@@ -649,6 +661,24 @@ static __be32 encode_attr_mtime(struct xdr_stream *xdr, const uint32_t *bitmap,
return encode_attr_time(xdr,time);
}
+static __be32 encode_attr_delegatime(struct xdr_stream *xdr,
+ const uint32_t *bitmap,
+ const struct timespec64 *time)
+{
+ if (!(bitmap[2] & FATTR4_WORD2_TIME_DELEG_ACCESS))
+ return 0;
+ return encode_attr_time(xdr,time);
+}
+
+static __be32 encode_attr_delegmtime(struct xdr_stream *xdr,
+ const uint32_t *bitmap,
+ const struct timespec64 *time)
+{
+ if (!(bitmap[2] & FATTR4_WORD2_TIME_DELEG_MODIFY))
+ return 0;
+ return encode_attr_time(xdr,time);
+}
+
static __be32 encode_compound_hdr_res(struct xdr_stream *xdr, struct cb_compound_hdr_res *hdr)
{
__be32 status;
@@ -699,10 +729,19 @@ static __be32 encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr,
status = encode_attr_size(xdr, res->bitmap, res->size);
if (unlikely(status != 0))
goto out;
+ status = encode_attr_atime(xdr, res->bitmap, &res->atime);
+ if (unlikely(status != 0))
+ goto out;
status = encode_attr_ctime(xdr, res->bitmap, &res->ctime);
if (unlikely(status != 0))
goto out;
status = encode_attr_mtime(xdr, res->bitmap, &res->mtime);
+ if (unlikely(status != 0))
+ goto out;
+ status = encode_attr_delegatime(xdr, res->bitmap, &res->atime);
+ if (unlikely(status != 0))
+ goto out;
+ status = encode_attr_delegmtime(xdr, res->bitmap, &res->mtime);
*savep = htonl((unsigned int)((char *)xdr->p - (char *)(savep+1)));
out:
return status;
@@ -945,6 +984,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp)
nfs_put_client(cps.clp);
goto out_invalidcred;
}
+ svc_xprt_set_valid(rqstp->rq_xprt);
}
cps.minorversion = hdr_arg.minorversion;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index de77848ae654..6d63b958c4bb 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -38,7 +38,7 @@
#include <linux/sunrpc/bc_xprt.h>
#include <linux/nsproxy.h>
#include <linux/pid_namespace.h>
-
+#include <linux/nfslocalio.h>
#include "nfs4_fs.h"
#include "callback.h"
@@ -55,9 +55,13 @@
#define NFSDBG_FACILITY NFSDBG_CLIENT
static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq);
-static DEFINE_SPINLOCK(nfs_version_lock);
-static DEFINE_MUTEX(nfs_version_mutex);
-static LIST_HEAD(nfs_versions);
+static DEFINE_RWLOCK(nfs_version_lock);
+
+static struct nfs_subversion *nfs_version_mods[5] = {
+ [2] = NULL,
+ [3] = NULL,
+ [4] = NULL,
+};
/*
* RPC cruft for NFS
@@ -76,38 +80,38 @@ const struct rpc_program nfs_program = {
.pipe_dir_name = NFS_PIPE_DIRNAME,
};
-static struct nfs_subversion *find_nfs_version(unsigned int version)
+static struct nfs_subversion *__find_nfs_version(unsigned int version)
{
struct nfs_subversion *nfs;
- spin_lock(&nfs_version_lock);
- list_for_each_entry(nfs, &nfs_versions, list) {
- if (nfs->rpc_ops->version == version) {
- spin_unlock(&nfs_version_lock);
- return nfs;
- }
- }
-
- spin_unlock(&nfs_version_lock);
- return ERR_PTR(-EPROTONOSUPPORT);
+ read_lock(&nfs_version_lock);
+ nfs = nfs_version_mods[version];
+ read_unlock(&nfs_version_lock);
+ return nfs;
}
-struct nfs_subversion *get_nfs_version(unsigned int version)
+struct nfs_subversion *find_nfs_version(unsigned int version)
{
- struct nfs_subversion *nfs = find_nfs_version(version);
+ struct nfs_subversion *nfs = __find_nfs_version(version);
- if (IS_ERR(nfs)) {
- mutex_lock(&nfs_version_mutex);
- request_module("nfsv%d", version);
- nfs = find_nfs_version(version);
- mutex_unlock(&nfs_version_mutex);
- }
+ if (!nfs && request_module("nfsv%d", version) == 0)
+ nfs = __find_nfs_version(version);
+
+ if (!nfs)
+ return ERR_PTR(-EPROTONOSUPPORT);
- if (!IS_ERR(nfs) && !try_module_get(nfs->owner))
+ if (!get_nfs_version(nfs))
return ERR_PTR(-EAGAIN);
+
return nfs;
}
+int get_nfs_version(struct nfs_subversion *nfs)
+{
+ return try_module_get(nfs->owner);
+}
+EXPORT_SYMBOL_GPL(get_nfs_version);
+
void put_nfs_version(struct nfs_subversion *nfs)
{
module_put(nfs->owner);
@@ -115,23 +119,23 @@ void put_nfs_version(struct nfs_subversion *nfs)
void register_nfs_version(struct nfs_subversion *nfs)
{
- spin_lock(&nfs_version_lock);
+ write_lock(&nfs_version_lock);
- list_add(&nfs->list, &nfs_versions);
+ nfs_version_mods[nfs->rpc_ops->version] = nfs;
nfs_version[nfs->rpc_ops->version] = nfs->rpc_vers;
- spin_unlock(&nfs_version_lock);
+ write_unlock(&nfs_version_lock);
}
EXPORT_SYMBOL_GPL(register_nfs_version);
void unregister_nfs_version(struct nfs_subversion *nfs)
{
- spin_lock(&nfs_version_lock);
+ write_lock(&nfs_version_lock);
nfs_version[nfs->rpc_ops->version] = NULL;
- list_del(&nfs->list);
+ nfs_version_mods[nfs->rpc_ops->version] = NULL;
- spin_unlock(&nfs_version_lock);
+ write_unlock(&nfs_version_lock);
}
EXPORT_SYMBOL_GPL(unregister_nfs_version);
@@ -151,7 +155,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
clp->cl_minorversion = cl_init->minorversion;
clp->cl_nfs_mod = cl_init->nfs_mod;
- if (!try_module_get(clp->cl_nfs_mod->owner))
+ if (!get_nfs_version(clp->cl_nfs_mod))
goto error_dealloc;
clp->rpc_ops = clp->cl_nfs_mod->rpc_ops;
@@ -178,6 +182,13 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
clp->cl_max_connect = cl_init->max_connect ? cl_init->max_connect : 1;
clp->cl_net = get_net(cl_init->net);
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+ seqlock_init(&clp->cl_boot_lock);
+ ktime_get_real_ts64(&clp->cl_nfssvc_boot);
+ nfs_uuid_init(&clp->cl_uuid);
+ INIT_WORK(&clp->cl_local_probe_work, nfs_local_probe_async_work);
+#endif /* CONFIG_NFS_LOCALIO */
+
clp->cl_principal = "*";
clp->cl_xprtsec = cl_init->xprtsec;
return clp;
@@ -233,6 +244,8 @@ static void pnfs_init_server(struct nfs_server *server)
*/
void nfs_free_client(struct nfs_client *clp)
{
+ nfs_localio_disable_client(clp);
+
/* -EIO all pending I/O */
if (!IS_ERR(clp->cl_rpcclient))
rpc_shutdown_client(clp->cl_rpcclient);
@@ -424,7 +437,10 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init)
list_add_tail(&new->cl_share_link,
&nn->nfs_client_list);
spin_unlock(&nn->nfs_client_lock);
- return rpc_ops->init_client(new, cl_init);
+ new = rpc_ops->init_client(new, cl_init);
+ if (!IS_ERR(new))
+ nfs_local_probe(new);
+ return new;
}
spin_unlock(&nn->nfs_client_lock);
@@ -530,6 +546,8 @@ int nfs_create_rpc_client(struct nfs_client *clp,
args.flags |= RPC_CLNT_CREATE_NOPING;
if (test_bit(NFS_CS_REUSEPORT, &clp->cl_flags))
args.flags |= RPC_CLNT_CREATE_REUSEPORT;
+ if (test_bit(NFS_CS_NETUNREACH_FATAL, &clp->cl_flags))
+ args.flags |= RPC_CLNT_CREATE_NETUNREACH_FATAL;
if (!IS_ERR(clp->cl_rpcclient))
return 0;
@@ -693,6 +711,9 @@ static int nfs_init_server(struct nfs_server *server,
if (ctx->flags & NFS_MOUNT_NORESVPORT)
set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
+ if (ctx->flags & NFS_MOUNT_NETUNREACH_FATAL)
+ __set_bit(NFS_CS_NETUNREACH_FATAL, &cl_init.init_flags);
+
/* Allocate or find a client reference we can use */
clp = nfs_get_client(&cl_init);
if (IS_ERR(clp))
@@ -983,6 +1004,7 @@ struct nfs_server *nfs_alloc_server(void)
INIT_LIST_HEAD(&server->layouts);
INIT_LIST_HEAD(&server->state_owners_lru);
INIT_LIST_HEAD(&server->ss_copies);
+ INIT_LIST_HEAD(&server->ss_src_copies);
atomic_set(&server->active, 0);
@@ -994,8 +1016,11 @@ struct nfs_server *nfs_alloc_server(void)
server->change_attr_type = NFS4_CHANGE_TYPE_IS_UNDEFINED;
- ida_init(&server->openowner_id);
- ida_init(&server->lockowner_id);
+ init_waitqueue_head(&server->write_congestion_wait);
+ atomic_long_set(&server->writeback, 0);
+
+ atomic64_set(&server->owner_ctr, 0);
+
pnfs_init_server(server);
rpc_init_wait_queue(&server->uoc_rpcwaitq, "NFS UOC");
@@ -1034,8 +1059,6 @@ void nfs_free_server(struct nfs_server *server)
}
ida_free(&s_sysfs_ids, server->s_sysfs_id);
- ida_destroy(&server->lockowner_id);
- ida_destroy(&server->openowner_id);
put_cred(server->cred);
nfs_release_automount_timer();
call_rcu(&server->rcu, delayed_free);
@@ -1082,6 +1105,8 @@ struct nfs_server *nfs_create_server(struct fs_context *fc)
if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN)
server->namelen = NFS2_MAXNAMLEN;
}
+ /* Linux 'subtree_check' borkenness mandates this setting */
+ server->fh_expire_type = NFS_FH_VOL_RENAME;
if (!(fattr->valid & NFS_ATTR_FATTR)) {
error = ctx->nfs_mod->rpc_ops->getattr(server, ctx->mntfh,
@@ -1177,6 +1202,10 @@ void nfs_clients_init(struct net *net)
#if IS_ENABLED(CONFIG_NFS_V4)
idr_init(&nn->cb_ident_idr);
#endif
+#if IS_ENABLED(CONFIG_NFS_V4_1)
+ INIT_LIST_HEAD(&nn->nfs4_data_server_cache);
+ spin_lock_init(&nn->nfs4_data_server_lock);
+#endif
spin_lock_init(&nn->nfs_client_lock);
nn->boot_time = ktime_get_real();
memset(&nn->rpcstats, 0, sizeof(nn->rpcstats));
@@ -1193,6 +1222,9 @@ void nfs_clients_exit(struct net *net)
nfs_cleanup_cb_ident_idr(net);
WARN_ON_ONCE(!list_empty(&nn->nfs_client_list));
WARN_ON_ONCE(!list_empty(&nn->nfs_volume_list));
+#if IS_ENABLED(CONFIG_NFS_V4_1)
+ WARN_ON_ONCE(!list_empty(&nn->nfs4_data_server_cache));
+#endif
}
#ifdef CONFIG_PROC_FS
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 6bace5fece04..8bdbc4dca89c 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -79,14 +79,14 @@ static void nfs_mark_return_delegation(struct nfs_server *server,
struct nfs_delegation *delegation)
{
set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
+ set_bit(NFS4SERV_DELEGRETURN, &server->delegation_flags);
set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state);
}
-static bool
-nfs4_is_valid_delegation(const struct nfs_delegation *delegation,
- fmode_t flags)
+static bool nfs4_is_valid_delegation(const struct nfs_delegation *delegation,
+ fmode_t type)
{
- if (delegation != NULL && (delegation->type & flags) == flags &&
+ if (delegation != NULL && (delegation->type & type) == type &&
!test_bit(NFS_DELEGATION_REVOKED, &delegation->flags) &&
!test_bit(NFS_DELEGATION_RETURNING, &delegation->flags))
return true;
@@ -103,19 +103,22 @@ struct nfs_delegation *nfs4_get_valid_delegation(const struct inode *inode)
return NULL;
}
-static int
-nfs4_do_check_delegation(struct inode *inode, fmode_t flags, bool mark)
+static int nfs4_do_check_delegation(struct inode *inode, fmode_t type,
+ int flags, bool mark)
{
struct nfs_delegation *delegation;
int ret = 0;
- flags &= FMODE_READ|FMODE_WRITE;
+ type &= FMODE_READ|FMODE_WRITE;
rcu_read_lock();
delegation = rcu_dereference(NFS_I(inode)->delegation);
- if (nfs4_is_valid_delegation(delegation, flags)) {
+ if (nfs4_is_valid_delegation(delegation, type)) {
if (mark)
nfs_mark_delegation_referenced(delegation);
ret = 1;
+ if ((flags & NFS_DELEGATION_FLAG_TIME) &&
+ !test_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags))
+ ret = 0;
}
rcu_read_unlock();
return ret;
@@ -124,22 +127,23 @@ nfs4_do_check_delegation(struct inode *inode, fmode_t flags, bool mark)
* nfs4_have_delegation - check if inode has a delegation, mark it
* NFS_DELEGATION_REFERENCED if there is one.
* @inode: inode to check
- * @flags: delegation types to check for
+ * @type: delegation types to check for
+ * @flags: various modifiers
*
* Returns one if inode has the indicated delegation, otherwise zero.
*/
-int nfs4_have_delegation(struct inode *inode, fmode_t flags)
+int nfs4_have_delegation(struct inode *inode, fmode_t type, int flags)
{
- return nfs4_do_check_delegation(inode, flags, true);
+ return nfs4_do_check_delegation(inode, type, flags, true);
}
/*
* nfs4_check_delegation - check if inode has a delegation, do not mark
* NFS_DELEGATION_REFERENCED if it has one.
*/
-int nfs4_check_delegation(struct inode *inode, fmode_t flags)
+int nfs4_check_delegation(struct inode *inode, fmode_t type)
{
- return nfs4_do_check_delegation(inode, flags, false);
+ return nfs4_do_check_delegation(inode, type, 0, false);
}
static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_stateid *stateid)
@@ -221,11 +225,12 @@ again:
* @type: delegation type
* @stateid: delegation stateid
* @pagemod_limit: write delegation "space_limit"
+ * @deleg_type: raw delegation type
*
*/
void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred,
fmode_t type, const nfs4_stateid *stateid,
- unsigned long pagemod_limit)
+ unsigned long pagemod_limit, u32 deleg_type)
{
struct nfs_delegation *delegation;
const struct cred *oldcred = NULL;
@@ -239,6 +244,14 @@ void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred,
delegation->pagemod_limit = pagemod_limit;
oldcred = delegation->cred;
delegation->cred = get_cred(cred);
+ switch (deleg_type) {
+ case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG:
+ case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG:
+ set_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags);
+ break;
+ default:
+ clear_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags);
+ }
clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags);
if (test_and_clear_bit(NFS_DELEGATION_REVOKED,
&delegation->flags))
@@ -250,11 +263,13 @@ void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred,
} else {
rcu_read_unlock();
nfs_inode_set_delegation(inode, cred, type, stateid,
- pagemod_limit);
+ pagemod_limit, deleg_type);
}
}
-static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync)
+static int nfs_do_return_delegation(struct inode *inode,
+ struct nfs_delegation *delegation,
+ int issync)
{
const struct cred *cred;
int res = 0;
@@ -263,9 +278,8 @@ static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *
spin_lock(&delegation->lock);
cred = get_cred(delegation->cred);
spin_unlock(&delegation->lock);
- res = nfs4_proc_delegreturn(inode, cred,
- &delegation->stateid,
- issync);
+ res = nfs4_proc_delegreturn(inode, cred, &delegation->stateid,
+ delegation, issync);
put_cred(cred);
}
return res;
@@ -293,7 +307,8 @@ nfs_start_delegation_return_locked(struct nfs_inode *nfsi)
if (delegation == NULL)
goto out;
spin_lock(&delegation->lock);
- if (!test_and_set_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) {
+ if (delegation->inode &&
+ !test_and_set_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) {
clear_bit(NFS_DELEGATION_RETURN_DELAYED, &delegation->flags);
/* Refcount matched in nfs_end_delegation_return() */
ret = nfs_get_delegation(delegation);
@@ -317,14 +332,16 @@ nfs_start_delegation_return(struct nfs_inode *nfsi)
}
static void nfs_abort_delegation_return(struct nfs_delegation *delegation,
- struct nfs_client *clp, int err)
+ struct nfs_server *server, int err)
{
-
spin_lock(&delegation->lock);
clear_bit(NFS_DELEGATION_RETURNING, &delegation->flags);
if (err == -EAGAIN) {
set_bit(NFS_DELEGATION_RETURN_DELAYED, &delegation->flags);
- set_bit(NFS4CLNT_DELEGRETURN_DELAYED, &clp->cl_state);
+ set_bit(NFS4SERV_DELEGRETURN_DELAYED,
+ &server->delegation_flags);
+ set_bit(NFS4CLNT_DELEGRETURN_DELAYED,
+ &server->nfs_client->cl_state);
}
spin_unlock(&delegation->lock);
}
@@ -418,13 +435,13 @@ nfs_update_inplace_delegation(struct nfs_delegation *delegation,
* @type: delegation type
* @stateid: delegation stateid
* @pagemod_limit: write delegation "space_limit"
+ * @deleg_type: raw delegation type
*
* Returns zero on success, or a negative errno value.
*/
int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred,
- fmode_t type,
- const nfs4_stateid *stateid,
- unsigned long pagemod_limit)
+ fmode_t type, const nfs4_stateid *stateid,
+ unsigned long pagemod_limit, u32 deleg_type)
{
struct nfs_server *server = NFS_SERVER(inode);
struct nfs_client *clp = server->nfs_client;
@@ -444,6 +461,11 @@ int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred,
delegation->cred = get_cred(cred);
delegation->inode = inode;
delegation->flags = 1<<NFS_DELEGATION_REFERENCED;
+ switch (deleg_type) {
+ case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG:
+ case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG:
+ delegation->flags |= BIT(NFS_DELEGATION_DELEGTIME);
+ }
delegation->test_gen = 0;
spin_lock_init(&delegation->lock);
@@ -508,6 +530,11 @@ add_new:
atomic_long_inc(&nfs_active_delegations);
trace_nfs4_set_delegation(inode, type);
+
+ /* If we hold writebacks and have delegated mtime then update */
+ if (deleg_type == NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG &&
+ nfs_have_writebacks(inode))
+ nfs_update_delegated_mtime(inode);
out:
spin_unlock(&clp->cl_lock);
if (delegation != NULL)
@@ -524,7 +551,7 @@ out:
*/
static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation *delegation, int issync)
{
- struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+ struct nfs_server *server = NFS_SERVER(inode);
unsigned int mode = O_WRONLY | O_RDWR;
int err = 0;
@@ -546,11 +573,11 @@ static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation
/*
* Guard against state recovery
*/
- err = nfs4_wait_clnt_recover(clp);
+ err = nfs4_wait_clnt_recover(server->nfs_client);
}
if (err) {
- nfs_abort_delegation_return(delegation, clp, err);
+ nfs_abort_delegation_return(delegation, server, err);
goto out;
}
@@ -567,17 +594,6 @@ static bool nfs_delegation_need_return(struct nfs_delegation *delegation)
if (test_and_clear_bit(NFS_DELEGATION_RETURN, &delegation->flags))
ret = true;
- else if (test_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags)) {
- struct inode *inode;
-
- spin_lock(&delegation->lock);
- inode = delegation->inode;
- if (inode && list_empty(&NFS_I(inode)->open_files))
- ret = true;
- spin_unlock(&delegation->lock);
- }
- if (ret)
- clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags);
if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags) ||
test_bit(NFS_DELEGATION_RETURN_DELAYED, &delegation->flags) ||
test_bit(NFS_DELEGATION_REVOKED, &delegation->flags))
@@ -596,6 +612,9 @@ static int nfs_server_return_marked_delegations(struct nfs_server *server,
struct nfs_delegation *place_holder_deleg = NULL;
int err = 0;
+ if (!test_and_clear_bit(NFS4SERV_DELEGRETURN,
+ &server->delegation_flags))
+ return 0;
restart:
/*
* To avoid quadratic looping we hold a reference
@@ -624,6 +643,9 @@ restart:
prev = delegation;
continue;
}
+ inode = nfs_delegation_grab_inode(delegation);
+ if (inode == NULL)
+ continue;
if (prev) {
struct inode *tmp = nfs_delegation_grab_inode(prev);
@@ -634,12 +656,6 @@ restart:
}
}
- inode = nfs_delegation_grab_inode(delegation);
- if (inode == NULL) {
- rcu_read_unlock();
- iput(to_put);
- goto restart;
- }
delegation = nfs_start_delegation_return_locked(NFS_I(inode));
rcu_read_unlock();
@@ -650,6 +666,7 @@ restart:
cond_resched();
if (!err)
goto restart;
+ set_bit(NFS4SERV_DELEGRETURN, &server->delegation_flags);
set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state);
goto out;
}
@@ -664,6 +681,9 @@ static bool nfs_server_clear_delayed_delegations(struct nfs_server *server)
struct nfs_delegation *d;
bool ret = false;
+ if (!test_and_clear_bit(NFS4SERV_DELEGRETURN_DELAYED,
+ &server->delegation_flags))
+ goto out;
list_for_each_entry_rcu (d, &server->delegations, super_list) {
if (!test_bit(NFS_DELEGATION_RETURN_DELAYED, &d->flags))
continue;
@@ -671,6 +691,7 @@ static bool nfs_server_clear_delayed_delegations(struct nfs_server *server)
clear_bit(NFS_DELEGATION_RETURN_DELAYED, &d->flags);
ret = true;
}
+out:
return ret;
}
@@ -761,6 +782,43 @@ int nfs4_inode_return_delegation(struct inode *inode)
}
/**
+ * nfs4_inode_set_return_delegation_on_close - asynchronously return a delegation
+ * @inode: inode to process
+ *
+ * This routine is called to request that the delegation be returned as soon
+ * as the file is closed. If the file is already closed, the delegation is
+ * immediately returned.
+ */
+void nfs4_inode_set_return_delegation_on_close(struct inode *inode)
+{
+ struct nfs_delegation *delegation;
+ struct nfs_delegation *ret = NULL;
+
+ if (!inode)
+ return;
+ rcu_read_lock();
+ delegation = nfs4_get_valid_delegation(inode);
+ if (!delegation)
+ goto out;
+ spin_lock(&delegation->lock);
+ if (!delegation->inode)
+ goto out_unlock;
+ if (list_empty(&NFS_I(inode)->open_files) &&
+ !test_and_set_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) {
+ /* Refcount matched in nfs_end_delegation_return() */
+ ret = nfs_get_delegation(delegation);
+ } else
+ set_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags);
+out_unlock:
+ spin_unlock(&delegation->lock);
+ if (ret)
+ nfs_clear_verifier_delegated(inode);
+out:
+ rcu_read_unlock();
+ nfs_end_delegation_return(inode, ret, 0);
+}
+
+/**
* nfs4_inode_return_delegation_on_close - asynchronously return a delegation
* @inode: inode to process
*
@@ -821,11 +879,25 @@ int nfs4_inode_make_writeable(struct inode *inode)
return nfs4_inode_return_delegation(inode);
}
-static void nfs_mark_return_if_closed_delegation(struct nfs_server *server,
- struct nfs_delegation *delegation)
+static void
+nfs_mark_return_if_closed_delegation(struct nfs_server *server,
+ struct nfs_delegation *delegation)
{
- set_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags);
- set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state);
+ struct inode *inode;
+
+ if (test_bit(NFS_DELEGATION_RETURN, &delegation->flags) ||
+ test_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags))
+ return;
+ spin_lock(&delegation->lock);
+ inode = delegation->inode;
+ if (!inode)
+ goto out;
+ if (list_empty(&NFS_I(inode)->open_files))
+ nfs_mark_return_delegation(server, delegation);
+ else
+ set_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags);
+out:
+ spin_unlock(&delegation->lock);
}
static bool nfs_server_mark_return_all_delegations(struct nfs_server *server)
@@ -981,6 +1053,11 @@ void nfs_delegation_mark_returned(struct inode *inode,
}
nfs_mark_delegation_revoked(delegation);
+ clear_bit(NFS_DELEGATION_RETURNING, &delegation->flags);
+ spin_unlock(&delegation->lock);
+ if (nfs_detach_delegation(NFS_I(inode), delegation, NFS_SERVER(inode)))
+ nfs_put_delegation(delegation);
+ goto out_rcu_unlock;
out_clear_returning:
clear_bit(NFS_DELEGATION_RETURNING, &delegation->flags);
@@ -1161,7 +1238,6 @@ static int nfs_server_reap_unclaimed_delegations(struct nfs_server *server,
struct inode *inode;
restart:
rcu_read_lock();
-restart_locked:
list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
if (test_bit(NFS_DELEGATION_INODE_FREEING,
&delegation->flags) ||
@@ -1172,7 +1248,7 @@ restart_locked:
continue;
inode = nfs_delegation_grab_inode(delegation);
if (inode == NULL)
- goto restart_locked;
+ continue;
delegation = nfs_start_delegation_return_locked(NFS_I(inode));
rcu_read_unlock();
if (delegation != NULL) {
@@ -1215,6 +1291,7 @@ static void nfs_mark_test_expired_delegation(struct nfs_server *server,
return;
clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags);
set_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags);
+ set_bit(NFS4SERV_DELEGATION_EXPIRED, &server->delegation_flags);
set_bit(NFS4CLNT_DELEGATION_EXPIRED, &server->nfs_client->cl_state);
}
@@ -1293,9 +1370,11 @@ static int nfs_server_reap_expired_delegations(struct nfs_server *server,
nfs4_stateid stateid;
unsigned long gen = ++server->delegation_gen;
+ if (!test_and_clear_bit(NFS4SERV_DELEGATION_EXPIRED,
+ &server->delegation_flags))
+ return 0;
restart:
rcu_read_lock();
-restart_locked:
list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
if (test_bit(NFS_DELEGATION_INODE_FREEING,
&delegation->flags) ||
@@ -1307,7 +1386,7 @@ restart_locked:
continue;
inode = nfs_delegation_grab_inode(delegation);
if (inode == NULL)
- goto restart_locked;
+ continue;
spin_lock(&delegation->lock);
cred = get_cred_rcu(delegation->cred);
nfs4_stateid_copy(&stateid, &delegation->stateid);
@@ -1323,6 +1402,9 @@ restart_locked:
goto restart;
}
nfs_inode_mark_test_expired_delegation(server,inode);
+ set_bit(NFS4SERV_DELEGATION_EXPIRED, &server->delegation_flags);
+ set_bit(NFS4CLNT_DELEGATION_EXPIRED,
+ &server->nfs_client->cl_state);
iput(inode);
return -EAGAIN;
}
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index a6f495d012cf..8ff5ab9c5c25 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -38,14 +38,18 @@ enum {
NFS_DELEGATION_TEST_EXPIRED,
NFS_DELEGATION_INODE_FREEING,
NFS_DELEGATION_RETURN_DELAYED,
+ NFS_DELEGATION_DELEGTIME,
};
int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred,
- fmode_t type, const nfs4_stateid *stateid, unsigned long pagemod_limit);
+ fmode_t type, const nfs4_stateid *stateid,
+ unsigned long pagemod_limit, u32 deleg_type);
void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred,
- fmode_t type, const nfs4_stateid *stateid, unsigned long pagemod_limit);
+ fmode_t type, const nfs4_stateid *stateid,
+ unsigned long pagemod_limit, u32 deleg_type);
int nfs4_inode_return_delegation(struct inode *inode);
void nfs4_inode_return_delegation_on_close(struct inode *inode);
+void nfs4_inode_set_return_delegation_on_close(struct inode *inode);
int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
void nfs_inode_evict_delegation(struct inode *inode);
@@ -67,7 +71,9 @@ void nfs_test_expired_all_delegations(struct nfs_client *clp);
void nfs_reap_expired_delegations(struct nfs_client *clp);
/* NFSv4 delegation-related procedures */
-int nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, const nfs4_stateid *stateid, int issync);
+int nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred,
+ const nfs4_stateid *stateid,
+ struct nfs_delegation *delegation, int issync);
int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid);
int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid);
bool nfs4_copy_delegation_stateid(struct inode *inode, fmode_t flags, nfs4_stateid *dst, const struct cred **cred);
@@ -75,8 +81,8 @@ bool nfs4_refresh_delegation_stateid(nfs4_stateid *dst, struct inode *inode);
struct nfs_delegation *nfs4_get_valid_delegation(const struct inode *inode);
void nfs_mark_delegation_referenced(struct nfs_delegation *delegation);
-int nfs4_have_delegation(struct inode *inode, fmode_t flags);
-int nfs4_check_delegation(struct inode *inode, fmode_t flags);
+int nfs4_have_delegation(struct inode *inode, fmode_t type, int flags);
+int nfs4_check_delegation(struct inode *inode, fmode_t type);
bool nfs4_delegation_flush_on_close(const struct inode *inode);
void nfs_inode_find_delegation_state_and_recover(struct inode *inode,
const nfs4_stateid *stateid);
@@ -84,9 +90,37 @@ int nfs4_inode_make_writeable(struct inode *inode);
#endif
+#define NFS_DELEGATION_FLAG_TIME BIT(1)
+
+void nfs_update_delegated_atime(struct inode *inode);
+void nfs_update_delegated_mtime(struct inode *inode);
+void nfs_update_delegated_mtime_locked(struct inode *inode);
+
+static inline int nfs_have_read_or_write_delegation(struct inode *inode)
+{
+ return NFS_PROTO(inode)->have_delegation(inode, FMODE_READ, 0);
+}
+
+static inline int nfs_have_write_delegation(struct inode *inode)
+{
+ return NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE, 0);
+}
+
static inline int nfs_have_delegated_attributes(struct inode *inode)
{
- return NFS_PROTO(inode)->have_delegation(inode, FMODE_READ);
+ return NFS_PROTO(inode)->have_delegation(inode, FMODE_READ, 0);
+}
+
+static inline int nfs_have_delegated_atime(struct inode *inode)
+{
+ return NFS_PROTO(inode)->have_delegation(inode, FMODE_READ,
+ NFS_DELEGATION_FLAG_TIME);
+}
+
+static inline int nfs_have_delegated_mtime(struct inode *inode)
+{
+ return NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE,
+ NFS_DELEGATION_FLAG_TIME);
}
#endif
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index ac505671efbd..d0e0b435a843 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -56,6 +56,8 @@ static int nfs_readdir(struct file *, struct dir_context *);
static int nfs_fsync_dir(struct file *, loff_t, loff_t, int);
static loff_t nfs_llseek_dir(struct file *, loff_t, int);
static void nfs_readdir_clear_array(struct folio *);
+static int nfs_do_create(struct inode *dir, struct dentry *dentry,
+ umode_t mode, int open_flags);
const struct file_operations nfs_dir_operations = {
.llseek = nfs_llseek_dir,
@@ -149,7 +151,7 @@ struct nfs_cache_array {
unsigned char folio_full : 1,
folio_is_eof : 1,
cookies_are_ordered : 1;
- struct nfs_cache_array_entry array[];
+ struct nfs_cache_array_entry array[] __counted_by(size);
};
struct nfs_readdir_descriptor {
@@ -326,7 +328,8 @@ static int nfs_readdir_folio_array_append(struct folio *folio,
goto out;
}
- cache_entry = &array->array[array->size];
+ array->size++;
+ cache_entry = &array->array[array->size - 1];
cache_entry->cookie = array->last_cookie;
cache_entry->ino = entry->ino;
cache_entry->d_type = entry->d_type;
@@ -335,7 +338,6 @@ static int nfs_readdir_folio_array_append(struct folio *folio,
array->last_cookie = entry->cookie;
if (array->last_cookie <= cache_entry->cookie)
array->cookies_are_ordered = 0;
- array->size++;
if (entry->eof != 0)
nfs_readdir_array_set_eof(array);
out:
@@ -664,6 +666,8 @@ static bool nfs_use_readdirplus(struct inode *dir, struct dir_context *ctx,
{
if (!nfs_server_capable(dir, NFS_CAP_READDIRPLUS))
return false;
+ if (NFS_SERVER(dir)->flags & NFS_MOUNT_FORCE_RDIRPLUS)
+ return true;
if (ctx->pos == 0 ||
cache_hits + cache_misses > NFS_READDIR_CACHE_USAGE_THRESHOLD)
return true;
@@ -1435,7 +1439,7 @@ static void nfs_set_verifier_locked(struct dentry *dentry, unsigned long verf)
if (!dir || !nfs_verify_change_attribute(dir, verf))
return;
- if (inode && NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
+ if (inode && NFS_PROTO(inode)->have_delegation(inode, FMODE_READ, 0))
nfs_set_verifier_delegated(&verf);
dentry->d_time = verf;
}
@@ -1530,7 +1534,8 @@ static int nfs_is_exclusive_create(struct inode *dir, unsigned int flags)
{
if (NFS_PROTO(dir)->version == 2)
return 0;
- return flags & LOOKUP_EXCL;
+ return (flags & (LOOKUP_CREATE | LOOKUP_EXCL)) ==
+ (LOOKUP_CREATE | LOOKUP_EXCL);
}
/*
@@ -1625,7 +1630,16 @@ nfs_lookup_revalidate_done(struct inode *dir, struct dentry *dentry,
switch (error) {
case 1:
break;
- case 0:
+ case -ETIMEDOUT:
+ if (inode && (IS_ROOT(dentry) ||
+ NFS_SERVER(inode)->flags & NFS_MOUNT_SOFTREVAL))
+ error = 1;
+ break;
+ case -ESTALE:
+ case -ENOENT:
+ error = 0;
+ fallthrough;
+ default:
/*
* We can't d_drop the root of a disconnected tree:
* its d_hash is on the s_anon list and d_drop() would hide
@@ -1661,7 +1675,7 @@ nfs_lookup_revalidate_delegated(struct inode *dir, struct dentry *dentry,
return nfs_lookup_revalidate_done(dir, dentry, inode, 1);
}
-static int nfs_lookup_revalidate_dentry(struct inode *dir,
+static int nfs_lookup_revalidate_dentry(struct inode *dir, const struct qstr *name,
struct dentry *dentry,
struct inode *inode, unsigned int flags)
{
@@ -1679,19 +1693,9 @@ static int nfs_lookup_revalidate_dentry(struct inode *dir,
goto out;
dir_verifier = nfs_save_change_attribute(dir);
- ret = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr);
- if (ret < 0) {
- switch (ret) {
- case -ESTALE:
- case -ENOENT:
- ret = 0;
- break;
- case -ETIMEDOUT:
- if (NFS_SERVER(inode)->flags & NFS_MOUNT_SOFTREVAL)
- ret = 1;
- }
+ ret = NFS_PROTO(dir)->lookup(dir, dentry, name, fhandle, fattr);
+ if (ret < 0)
goto out;
- }
/* Request help from readdirplus */
nfs_lookup_advise_force_readdirplus(dir, flags);
@@ -1731,11 +1735,11 @@ out:
* cached dentry and do a new lookup.
*/
static int
-nfs_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
- unsigned int flags)
+nfs_do_lookup_revalidate(struct inode *dir, const struct qstr *name,
+ struct dentry *dentry, unsigned int flags)
{
struct inode *inode;
- int error;
+ int error = 0;
nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
inode = d_inode(dentry);
@@ -1774,47 +1778,59 @@ nfs_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
if (NFS_STALE(inode))
goto out_bad;
- return nfs_lookup_revalidate_dentry(dir, dentry, inode, flags);
+ return nfs_lookup_revalidate_dentry(dir, name, dentry, inode, flags);
out_valid:
return nfs_lookup_revalidate_done(dir, dentry, inode, 1);
out_bad:
if (flags & LOOKUP_RCU)
return -ECHILD;
- return nfs_lookup_revalidate_done(dir, dentry, inode, 0);
+ return nfs_lookup_revalidate_done(dir, dentry, inode, error);
}
static int
-__nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags,
- int (*reval)(struct inode *, struct dentry *, unsigned int))
+__nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
{
- struct dentry *parent;
- struct inode *dir;
- int ret;
-
if (flags & LOOKUP_RCU) {
if (dentry->d_fsdata == NFS_FSDATA_BLOCKED)
return -ECHILD;
- parent = READ_ONCE(dentry->d_parent);
- dir = d_inode_rcu(parent);
- if (!dir)
- return -ECHILD;
- ret = reval(dir, dentry, flags);
- if (parent != READ_ONCE(dentry->d_parent))
- return -ECHILD;
} else {
- /* Wait for unlink to complete */
+ /* Wait for unlink to complete - see unblock_revalidate() */
wait_var_event(&dentry->d_fsdata,
- dentry->d_fsdata != NFS_FSDATA_BLOCKED);
- parent = dget_parent(dentry);
- ret = reval(d_inode(parent), dentry, flags);
- dput(parent);
+ smp_load_acquire(&dentry->d_fsdata)
+ != NFS_FSDATA_BLOCKED);
}
- return ret;
+ return 0;
}
-static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
+static int nfs_lookup_revalidate(struct inode *dir, const struct qstr *name,
+ struct dentry *dentry, unsigned int flags)
{
- return __nfs_lookup_revalidate(dentry, flags, nfs_do_lookup_revalidate);
+ if (__nfs_lookup_revalidate(dentry, flags))
+ return -ECHILD;
+ return nfs_do_lookup_revalidate(dir, name, dentry, flags);
+}
+
+static void block_revalidate(struct dentry *dentry)
+{
+ /* old devname - just in case */
+ kfree(dentry->d_fsdata);
+
+ /* Any new reference that could lead to an open
+ * will take ->d_lock in lookup_open() -> d_lookup().
+ * Holding this lock ensures we cannot race with
+ * __nfs_lookup_revalidate() and removes and need
+ * for further barriers.
+ */
+ lockdep_assert_held(&dentry->d_lock);
+
+ dentry->d_fsdata = NFS_FSDATA_BLOCKED;
+}
+
+static void unblock_revalidate(struct dentry *dentry)
+{
+ /* store_release ensures wait_var_event() sees the update */
+ smp_store_release(&dentry->d_fsdata, NULL);
+ wake_up_var(&dentry->d_fsdata);
}
/*
@@ -1957,7 +1973,8 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in
dir_verifier = nfs_save_change_attribute(dir);
trace_nfs_lookup_enter(dir, dentry, flags);
- error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr);
+ error = NFS_PROTO(dir)->lookup(dir, dentry, &dentry->d_name,
+ fhandle, fattr);
if (error == -ENOENT) {
if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE))
dir_verifier = inode_peek_iversion_raw(dir);
@@ -2000,7 +2017,8 @@ void nfs_d_prune_case_insensitive_aliases(struct inode *inode)
EXPORT_SYMBOL_GPL(nfs_d_prune_case_insensitive_aliases);
#if IS_ENABLED(CONFIG_NFS_V4)
-static int nfs4_lookup_revalidate(struct dentry *, unsigned int);
+static int nfs4_lookup_revalidate(struct inode *, const struct qstr *,
+ struct dentry *, unsigned int);
const struct dentry_operations nfs4_dentry_operations = {
.d_revalidate = nfs4_lookup_revalidate,
@@ -2189,11 +2207,14 @@ no_open:
EXPORT_SYMBOL_GPL(nfs_atomic_open);
static int
-nfs4_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
- unsigned int flags)
+nfs4_lookup_revalidate(struct inode *dir, const struct qstr *name,
+ struct dentry *dentry, unsigned int flags)
{
struct inode *inode;
+ if (__nfs_lookup_revalidate(dentry, flags))
+ return -ECHILD;
+
trace_nfs_lookup_revalidate_enter(dir, dentry, flags);
if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY))
@@ -2229,19 +2250,51 @@ nfs4_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
reval_dentry:
if (flags & LOOKUP_RCU)
return -ECHILD;
- return nfs_lookup_revalidate_dentry(dir, dentry, inode, flags);
+ return nfs_lookup_revalidate_dentry(dir, name, dentry, inode, flags);
full_reval:
- return nfs_do_lookup_revalidate(dir, dentry, flags);
+ return nfs_do_lookup_revalidate(dir, name, dentry, flags);
}
-static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
+#endif /* CONFIG_NFSV4 */
+
+int nfs_atomic_open_v23(struct inode *dir, struct dentry *dentry,
+ struct file *file, unsigned int open_flags,
+ umode_t mode)
{
- return __nfs_lookup_revalidate(dentry, flags,
- nfs4_do_lookup_revalidate);
-}
-#endif /* CONFIG_NFSV4 */
+ /* Same as look+open from lookup_open(), but with different O_TRUNC
+ * handling.
+ */
+ int error = 0;
+
+ if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
+ return -ENAMETOOLONG;
+
+ if (open_flags & O_CREAT) {
+ file->f_mode |= FMODE_CREATED;
+ error = nfs_do_create(dir, dentry, mode, open_flags);
+ if (error)
+ return error;
+ return finish_open(file, dentry, NULL);
+ } else if (d_in_lookup(dentry)) {
+ /* The only flags nfs_lookup considers are
+ * LOOKUP_EXCL and LOOKUP_RENAME_TARGET, and
+ * we want those to be zero so the lookup isn't skipped.
+ */
+ struct dentry *res = nfs_lookup(dir, dentry, 0);
+
+ d_lookup_done(dentry);
+ if (unlikely(res)) {
+ if (IS_ERR(res))
+ return PTR_ERR(res);
+ return finish_no_open(file, res);
+ }
+ }
+ return finish_no_open(file, NULL);
+
+}
+EXPORT_SYMBOL_GPL(nfs_atomic_open_v23);
struct dentry *
nfs_add_or_obtain(struct dentry *dentry, struct nfs_fh *fhandle,
@@ -2256,7 +2309,8 @@ nfs_add_or_obtain(struct dentry *dentry, struct nfs_fh *fhandle,
d_drop(dentry);
if (fhandle->size == 0) {
- error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr);
+ error = NFS_PROTO(dir)->lookup(dir, dentry, &dentry->d_name,
+ fhandle, fattr);
if (error)
goto out_error;
}
@@ -2303,18 +2357,23 @@ EXPORT_SYMBOL_GPL(nfs_instantiate);
* that the operation succeeded on the server, but an error in the
* reply path made it appear to have failed.
*/
-int nfs_create(struct mnt_idmap *idmap, struct inode *dir,
- struct dentry *dentry, umode_t mode, bool excl)
+static int nfs_do_create(struct inode *dir, struct dentry *dentry,
+ umode_t mode, int open_flags)
{
struct iattr attr;
- int open_flags = excl ? O_CREAT | O_EXCL : O_CREAT;
int error;
+ open_flags |= O_CREAT;
+
dfprintk(VFS, "NFS: create(%s/%lu), %pd\n",
dir->i_sb->s_id, dir->i_ino, dentry);
attr.ia_mode = mode;
attr.ia_valid = ATTR_MODE;
+ if (open_flags & O_TRUNC) {
+ attr.ia_size = 0;
+ attr.ia_valid |= ATTR_SIZE;
+ }
trace_nfs_create_enter(dir, dentry, open_flags);
error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags);
@@ -2326,6 +2385,12 @@ out_err:
d_drop(dentry);
return error;
}
+
+int nfs_create(struct mnt_idmap *idmap, struct inode *dir,
+ struct dentry *dentry, umode_t mode, bool excl)
+{
+ return nfs_do_create(dir, dentry, mode, excl ? O_EXCL : 0);
+}
EXPORT_SYMBOL_GPL(nfs_create);
/*
@@ -2359,11 +2424,11 @@ EXPORT_SYMBOL_GPL(nfs_mknod);
/*
* See comments for nfs_proc_create regarding failed operations.
*/
-int nfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
- struct dentry *dentry, umode_t mode)
+struct dentry *nfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
struct iattr attr;
- int error;
+ struct dentry *ret;
dfprintk(VFS, "NFS: mkdir(%s/%lu), %pd\n",
dir->i_sb->s_id, dir->i_ino, dentry);
@@ -2372,14 +2437,9 @@ int nfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
attr.ia_mode = mode | S_IFDIR;
trace_nfs_mkdir_enter(dir, dentry);
- error = NFS_PROTO(dir)->mkdir(dir, dentry, &attr);
- trace_nfs_mkdir_exit(dir, dentry, error);
- if (error != 0)
- goto out_err;
- return 0;
-out_err:
- d_drop(dentry);
- return error;
+ ret = NFS_PROTO(dir)->mkdir(dir, dentry, &attr);
+ trace_nfs_mkdir_exit(dir, dentry, PTR_ERR_OR_ZERO(ret));
+ return ret;
}
EXPORT_SYMBOL_GPL(nfs_mkdir);
@@ -2501,15 +2561,12 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry)
spin_unlock(&dentry->d_lock);
goto out;
}
- /* old devname */
- kfree(dentry->d_fsdata);
- dentry->d_fsdata = NFS_FSDATA_BLOCKED;
+ block_revalidate(dentry);
spin_unlock(&dentry->d_lock);
error = nfs_safe_remove(dentry);
nfs_dentry_remove_handle_error(dir, dentry, error);
- dentry->d_fsdata = NULL;
- wake_up_var(&dentry->d_fsdata);
+ unblock_revalidate(dentry);
out:
trace_nfs_unlink_exit(dir, dentry, error);
return error;
@@ -2616,8 +2673,19 @@ nfs_unblock_rename(struct rpc_task *task, struct nfs_renamedata *data)
{
struct dentry *new_dentry = data->new_dentry;
- new_dentry->d_fsdata = NULL;
- wake_up_var(&new_dentry->d_fsdata);
+ unblock_revalidate(new_dentry);
+}
+
+static bool nfs_rename_is_unsafe_cross_dir(struct dentry *old_dentry,
+ struct dentry *new_dentry)
+{
+ struct nfs_server *server = NFS_SB(old_dentry->d_sb);
+
+ if (old_dentry->d_parent != new_dentry->d_parent)
+ return false;
+ if (server->fh_expire_type & NFS_FH_RENAME_UNSAFE)
+ return !(server->fh_expire_type & NFS_FH_NOEXPIRE_WITH_OPEN);
+ return true;
}
/*
@@ -2679,11 +2747,6 @@ int nfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
if (WARN_ON(new_dentry->d_flags & DCACHE_NFSFS_RENAMED) ||
WARN_ON(new_dentry->d_fsdata == NFS_FSDATA_BLOCKED))
goto out;
- if (new_dentry->d_fsdata) {
- /* old devname */
- kfree(new_dentry->d_fsdata);
- new_dentry->d_fsdata = NULL;
- }
spin_lock(&new_dentry->d_lock);
if (d_count(new_dentry) > 2) {
@@ -2705,18 +2768,21 @@ int nfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
new_dentry = dentry;
new_inode = NULL;
} else {
- new_dentry->d_fsdata = NFS_FSDATA_BLOCKED;
+ block_revalidate(new_dentry);
must_unblock = true;
spin_unlock(&new_dentry->d_lock);
}
}
- if (S_ISREG(old_inode->i_mode))
+ if (S_ISREG(old_inode->i_mode) &&
+ nfs_rename_is_unsafe_cross_dir(old_dentry, new_dentry))
nfs_sync_inode(old_inode);
task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry,
must_unblock ? nfs_unblock_rename : NULL);
if (IS_ERR(task)) {
+ if (must_unblock)
+ unblock_revalidate(new_dentry);
error = PTR_ERR(task);
goto out;
}
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index bb2f583eb28b..48d89716193a 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -56,6 +56,7 @@
#include <linux/uaccess.h>
#include <linux/atomic.h>
+#include "delegation.h"
#include "internal.h"
#include "iostat.h"
#include "pnfs.h"
@@ -130,6 +131,20 @@ static void nfs_direct_truncate_request(struct nfs_direct_req *dreq,
dreq->count = req_start;
}
+static void nfs_direct_file_adjust_size_locked(struct inode *inode,
+ loff_t offset, size_t count)
+{
+ loff_t newsize = offset + (loff_t)count;
+ loff_t oldsize = i_size_read(inode);
+
+ if (newsize > oldsize) {
+ i_size_write(inode, newsize);
+ NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_SIZE;
+ trace_nfs_size_grow(inode, newsize);
+ nfs_inc_stats(inode, NFSIOS_EXTENDWRITE);
+ }
+}
+
/**
* nfs_swap_rw - NFS address space operation for swap I/O
* @iocb: target I/O control block
@@ -141,8 +156,6 @@ int nfs_swap_rw(struct kiocb *iocb, struct iov_iter *iter)
{
ssize_t ret;
- VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE);
-
if (iov_iter_rw(iter) == READ)
ret = nfs_file_direct_read(iocb, iter, true);
else
@@ -274,6 +287,8 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
nfs_direct_count_bytes(dreq, hdr);
spin_unlock(&dreq->lock);
+ nfs_update_delegated_atime(dreq->inode);
+
while (!list_empty(&hdr->pages)) {
struct nfs_page *req = nfs_list_entry(hdr->pages.next);
struct page *page = req->wb_page;
@@ -305,6 +320,7 @@ static void nfs_read_sync_pgio_error(struct list_head *head, int error)
static void nfs_direct_pgio_init(struct nfs_pgio_header *hdr)
{
get_dreq(hdr->dreq);
+ set_bit(NFS_IOHDR_ODIRECT, &hdr->flags);
}
static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = {
@@ -456,8 +472,16 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
if (user_backed_iter(iter))
dreq->flags = NFS_ODIRECT_SHOULD_DIRTY;
- if (!swap)
- nfs_start_io_direct(inode);
+ if (!swap) {
+ result = nfs_start_io_direct(inode);
+ if (result) {
+ /* release the reference that would usually be
+ * consumed by nfs_direct_read_schedule_iovec()
+ */
+ nfs_direct_req_release(dreq);
+ goto out_release;
+ }
+ }
NFS_I(inode)->read_io += count;
requested = nfs_direct_read_schedule_iovec(dreq, iter, iocb->ki_pos);
@@ -733,7 +757,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
{
struct nfs_direct_req *dreq = hdr->dreq;
struct nfs_commit_info cinfo;
- struct nfs_page *req = nfs_list_entry(hdr->pages.next);
+ struct inode *inode = dreq->inode;
int flags = NFS_ODIRECT_DONE;
trace_nfs_direct_write_completion(dreq);
@@ -755,7 +779,13 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
}
spin_unlock(&dreq->lock);
+ spin_lock(&inode->i_lock);
+ nfs_direct_file_adjust_size_locked(inode, dreq->io_start, dreq->count);
+ nfs_update_delegated_mtime_locked(dreq->inode);
+ spin_unlock(&inode->i_lock);
+
while (!list_empty(&hdr->pages)) {
+ struct nfs_page *req;
req = nfs_list_entry(hdr->pages.next);
nfs_list_remove_request(req);
@@ -1009,7 +1039,14 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
requested = nfs_direct_write_schedule_iovec(dreq, iter, pos,
FLUSH_STABLE);
} else {
- nfs_start_io_direct(inode);
+ result = nfs_start_io_direct(inode);
+ if (result) {
+ /* release the reference that would usually be
+ * consumed by nfs_direct_write_schedule_iovec()
+ */
+ nfs_direct_req_release(dreq);
+ goto out_release;
+ }
requested = nfs_direct_write_schedule_iovec(dreq, iter, pos,
FLUSH_COND_STABLE);
diff --git a/fs/nfs/export.c b/fs/nfs/export.c
index be686b8e0c54..e9c233b6fd20 100644
--- a/fs/nfs/export.c
+++ b/fs/nfs/export.c
@@ -154,5 +154,6 @@ const struct export_operations nfs_export_ops = {
EXPORT_OP_CLOSE_BEFORE_UNLINK |
EXPORT_OP_REMOTE_FS |
EXPORT_OP_NOATOMIC_ATTR |
- EXPORT_OP_FLUSH_ON_CLOSE,
+ EXPORT_OP_FLUSH_ON_CLOSE |
+ EXPORT_OP_NOLOCKS,
};
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 407c6e15afe2..033feeab8c34 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -29,6 +29,7 @@
#include <linux/pagemap.h>
#include <linux/gfp.h>
#include <linux/swap.h>
+#include <linux/compaction.h>
#include <linux/uaccess.h>
#include <linux/filelock.h>
@@ -166,7 +167,10 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
iocb->ki_filp,
iov_iter_count(to), (unsigned long) iocb->ki_pos);
- nfs_start_io_read(inode);
+ result = nfs_start_io_read(inode);
+ if (result)
+ return result;
+
result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
if (!result) {
result = generic_file_read_iter(iocb, to);
@@ -187,7 +191,10 @@ nfs_file_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe
dprintk("NFS: splice_read(%pD2, %zu@%llu)\n", in, len, *ppos);
- nfs_start_io_read(inode);
+ result = nfs_start_io_read(inode);
+ if (result)
+ return result;
+
result = nfs_revalidate_mapping(inode, in->f_mapping);
if (!result) {
result = filemap_splice_read(in, ppos, pipe, len, flags);
@@ -336,9 +343,10 @@ static bool nfs_want_read_modify_write(struct file *file, struct folio *folio,
* increment the page use counts until he is done with the page.
*/
static int nfs_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, struct page **pagep,
+ loff_t pos, unsigned len, struct folio **foliop,
void **fsdata)
{
+ fgf_t fgp = FGP_WRITEBEGIN;
struct folio *folio;
int once_thru = 0;
int ret;
@@ -346,12 +354,13 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
dfprintk(PAGECACHE, "NFS: write_begin(%pD2(%lu), %u@%lld)\n",
file, mapping->host->i_ino, len, (long long) pos);
+ fgp |= fgf_set_order(len);
start:
- folio = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, FGP_WRITEBEGIN,
+ folio = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, fgp,
mapping_gfp_mask(mapping));
if (IS_ERR(folio))
return PTR_ERR(folio);
- *pagep = &folio->page;
+ *foliop = folio;
ret = nfs_flush_incompatible(file, folio);
if (ret) {
@@ -370,10 +379,9 @@ start:
static int nfs_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+ struct folio *folio, void *fsdata)
{
struct nfs_open_context *ctx = nfs_file_open_context(file);
- struct folio *folio = page_folio(page);
unsigned offset = offset_in_folio(folio, pos);
int status;
@@ -425,7 +433,7 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
static void nfs_invalidate_folio(struct folio *folio, size_t offset,
size_t length)
{
- struct inode *inode = folio_file_mapping(folio)->host;
+ struct inode *inode = folio->mapping->host;
dfprintk(PAGECACHE, "NFS: invalidate_folio(%lu, %zu, %zu)\n",
folio->index, offset, length);
@@ -433,8 +441,8 @@ static void nfs_invalidate_folio(struct folio *folio, size_t offset,
return;
/* Cancel any unstarted writes on this page */
nfs_wb_folio_cancel(inode, folio);
- folio_wait_fscache(folio);
- trace_nfs_invalidate_folio(inode, folio);
+ folio_wait_private_2(folio); /* [DEPRECATED] */
+ trace_nfs_invalidate_folio(inode, folio_pos(folio) + offset, length);
}
/*
@@ -450,9 +458,9 @@ static bool nfs_release_folio(struct folio *folio, gfp_t gfp)
/* If the private flag is set, then the folio is not freeable */
if (folio_test_private(folio)) {
if ((current_gfp_context(gfp) & GFP_KERNEL) != GFP_KERNEL ||
- current_is_kswapd())
+ current_is_kswapd() || current_is_kcompactd())
return false;
- if (nfs_wb_folio(folio_file_mapping(folio)->host, folio) < 0)
+ if (nfs_wb_folio(folio->mapping->host, folio) < 0)
return false;
}
return nfs_fscache_release_folio(folio, gfp);
@@ -500,9 +508,10 @@ static int nfs_launder_folio(struct folio *folio)
dfprintk(PAGECACHE, "NFS: launder_folio(%ld, %llu)\n",
inode->i_ino, folio_pos(folio));
- folio_wait_fscache(folio);
+ folio_wait_private_2(folio); /* [DEPRECATED] */
ret = nfs_wb_folio(inode, folio);
- trace_nfs_launder_folio_done(inode, folio, ret);
+ trace_nfs_launder_folio_done(inode, folio_pos(folio),
+ folio_size(folio), ret);
return ret;
}
@@ -588,13 +597,13 @@ static vm_fault_t nfs_vm_page_mkwrite(struct vm_fault *vmf)
dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%pD2(%lu), offset %lld)\n",
filp, filp->f_mapping->host->i_ino,
- (long long)folio_file_pos(folio));
+ (long long)folio_pos(folio));
sb_start_pagefault(inode->i_sb);
/* make sure the cache has finished storing the page */
- if (folio_test_fscache(folio) &&
- folio_wait_fscache_killable(folio) < 0) {
+ if (folio_test_private_2(folio) && /* [DEPRECATED] */
+ folio_wait_private_2_killable(folio) < 0) {
ret = VM_FAULT_RETRY;
goto out;
}
@@ -604,7 +613,7 @@ static vm_fault_t nfs_vm_page_mkwrite(struct vm_fault *vmf)
TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
folio_lock(folio);
- mapping = folio_file_mapping(folio);
+ mapping = folio->mapping;
if (mapping != inode->i_mapping)
goto out_unlock;
@@ -666,7 +675,9 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
nfs_clear_invalid_mapping(file->f_mapping);
since = filemap_sample_wb_err(file->f_mapping);
- nfs_start_io_write(inode);
+ error = nfs_start_io_write(inode);
+ if (error)
+ return error;
result = generic_write_checks(iocb, from);
if (result > 0)
result = generic_perform_write(iocb, from);
@@ -730,7 +741,7 @@ do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
}
fl->c.flc_type = saved_type;
- if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
+ if (nfs_have_read_or_write_delegation(inode))
goto out_noconflict;
if (is_local)
@@ -813,7 +824,7 @@ do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
* This makes locking act as a cache coherency point.
*/
nfs_sync_mapping(filp->f_mapping);
- if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) {
+ if (!nfs_have_read_or_write_delegation(inode)) {
nfs_zap_caches(inode);
if (mapping_mapped(filp->f_mapping))
nfs_revalidate_mapping(inode, filp->f_mapping);
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index ce8f8934bca5..d39a1f58e18d 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -488,7 +488,7 @@ filelayout_read_pagelist(struct nfs_pgio_header *hdr)
/* Perform an asynchronous read to ds */
nfs_initiate_pgio(ds_clnt, hdr, hdr->cred,
NFS_PROTO(hdr->inode), &filelayout_read_call_ops,
- 0, RPC_TASK_SOFTCONN);
+ 0, RPC_TASK_SOFTCONN, NULL);
return PNFS_ATTEMPTED;
}
@@ -530,7 +530,7 @@ filelayout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
/* Perform an asynchronous write */
nfs_initiate_pgio(ds_clnt, hdr, hdr->cred,
NFS_PROTO(hdr->inode), &filelayout_write_call_ops,
- sync, RPC_TASK_SOFTCONN);
+ sync, RPC_TASK_SOFTCONN, NULL);
return PNFS_ATTEMPTED;
}
@@ -605,14 +605,6 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
dprintk("--> %s\n", __func__);
- /* FIXME: remove this check when layout segment support is added */
- if (lgr->range.offset != 0 ||
- lgr->range.length != NFS4_MAX_UINT64) {
- dprintk("%s Only whole file layouts supported. Use MDS i/o\n",
- __func__);
- goto out;
- }
-
if (fl->pattern_offset > lgr->range.offset) {
dprintk("%s pattern_offset %lld too large\n",
__func__, fl->pattern_offset);
@@ -875,15 +867,15 @@ static void
filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req)
{
- pnfs_generic_pg_check_layout(pgio);
+ pnfs_generic_pg_check_layout(pgio, req);
if (!pgio->pg_lseg) {
pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode,
nfs_req_openctx(req),
- 0,
- NFS4_MAX_UINT64,
+ req_offset(req),
+ req->wb_bytes,
IOMODE_READ,
false,
- GFP_KERNEL);
+ nfs_io_gfp_mask());
if (IS_ERR(pgio->pg_lseg)) {
pgio->pg_error = PTR_ERR(pgio->pg_lseg);
pgio->pg_lseg = NULL;
@@ -899,15 +891,15 @@ static void
filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req)
{
- pnfs_generic_pg_check_layout(pgio);
+ pnfs_generic_pg_check_layout(pgio, req);
if (!pgio->pg_lseg) {
pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode,
nfs_req_openctx(req),
- 0,
- NFS4_MAX_UINT64,
+ req_offset(req),
+ req->wb_bytes,
IOMODE_RW,
false,
- GFP_NOFS);
+ nfs_io_gfp_mask());
if (IS_ERR(pgio->pg_lseg)) {
pgio->pg_error = PTR_ERR(pgio->pg_lseg);
pgio->pg_lseg = NULL;
@@ -1019,7 +1011,7 @@ static int filelayout_initiate_commit(struct nfs_commit_data *data, int how)
data->args.fh = fh;
return nfs_initiate_commit(ds_clnt, data, NFS_PROTO(data->inode),
&filelayout_commit_call_ops, how,
- RPC_TASK_SOFTCONN);
+ RPC_TASK_SOFTCONN, NULL);
out_err:
pnfs_generic_prepare_to_resend_writes(data);
pnfs_generic_commit_release(data);
@@ -1118,7 +1110,6 @@ static const struct pnfs_commit_ops filelayout_commit_ops = {
.clear_request_commit = pnfs_generic_clear_request_commit,
.scan_commit_lists = pnfs_generic_scan_commit_lists,
.recover_commit_reqs = pnfs_generic_recover_commit_reqs,
- .search_commit_reqs = pnfs_generic_search_commit_reqs,
.commit_pagelist = filelayout_commit_pagelist,
};
diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c
index 4fa304fa5bc4..29d9234d5c08 100644
--- a/fs/nfs/filelayout/filelayoutdev.c
+++ b/fs/nfs/filelayout/filelayoutdev.c
@@ -76,6 +76,7 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
struct page *scratch;
struct list_head dsaddrs;
struct nfs4_pnfs_ds_addr *da;
+ struct net *net = server->nfs_client->cl_net;
/* set up xdr stream */
scratch = alloc_page(gfp_flags);
@@ -159,8 +160,7 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
mp_count = be32_to_cpup(p); /* multipath count */
for (j = 0; j < mp_count; j++) {
- da = nfs4_decode_mp_ds_addr(server->nfs_client->cl_net,
- &stream, gfp_flags);
+ da = nfs4_decode_mp_ds_addr(net, &stream, gfp_flags);
if (da)
list_add_tail(&da->da_node, &dsaddrs);
}
@@ -170,7 +170,7 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
goto out_err_free_deviceid;
}
- dsaddr->ds_list[i] = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags);
+ dsaddr->ds_list[i] = nfs4_pnfs_ds_add(net, &dsaddrs, gfp_flags);
if (!dsaddr->ds_list[i])
goto out_err_drain_dsaddrs;
trace_fl_getdevinfo(server, &pdev->dev_id, dsaddr->ds_list[i]->ds_remotestr);
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 3e724cb7ef01..e6909cafab68 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -11,6 +11,7 @@
#include <linux/nfs_mount.h>
#include <linux/nfs_page.h>
#include <linux/module.h>
+#include <linux/file.h>
#include <linux/sched/mm.h>
#include <linux/sunrpc/metrics.h>
@@ -162,6 +163,20 @@ decode_name(struct xdr_stream *xdr, u32 *id)
return 0;
}
+static struct nfsd_file *
+ff_local_open_fh(struct pnfs_layout_segment *lseg, u32 ds_idx,
+ struct nfs_client *clp, const struct cred *cred,
+ struct nfs_fh *fh, fmode_t mode)
+{
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+ struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx);
+
+ return nfs_local_open_fh(clp, cred, fh, &mirror->nfl, mode);
+#else
+ return NULL;
+#endif
+}
+
static bool ff_mirror_match_fh(const struct nfs4_ff_layout_mirror *m1,
const struct nfs4_ff_layout_mirror *m2)
{
@@ -231,16 +246,18 @@ static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags)
spin_lock_init(&mirror->lock);
refcount_set(&mirror->ref, 1);
INIT_LIST_HEAD(&mirror->mirrors);
+ nfs_localio_file_init(&mirror->nfl);
}
return mirror;
}
static void ff_layout_free_mirror(struct nfs4_ff_layout_mirror *mirror)
{
- const struct cred *cred;
+ const struct cred *cred;
ff_layout_remove_mirror(mirror);
kfree(mirror->fh_versions);
+ nfs_close_local_fh(&mirror->nfl);
cred = rcu_access_pointer(mirror->ro_cred);
put_cred(cred);
cred = rcu_access_pointer(mirror->rw_cred);
@@ -823,14 +840,6 @@ ff_layout_pg_get_read(struct nfs_pageio_descriptor *pgio,
}
static void
-ff_layout_pg_check_layout(struct nfs_pageio_descriptor *pgio,
- struct nfs_page *req)
-{
- pnfs_generic_pg_check_layout(pgio);
- pnfs_generic_pg_check_range(pgio, req);
-}
-
-static void
ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req)
{
@@ -839,8 +848,11 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
struct nfs4_pnfs_ds *ds;
u32 ds_idx;
+ if (NFS_SERVER(pgio->pg_inode)->flags &
+ (NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR))
+ pgio->pg_maxretrans = io_maxretrans;
retry:
- ff_layout_pg_check_layout(pgio, req);
+ pnfs_generic_pg_check_layout(pgio, req);
/* Use full layout for now */
if (!pgio->pg_lseg) {
ff_layout_pg_get_read(pgio, req, false);
@@ -852,6 +864,8 @@ retry:
if (!pgio->pg_lseg)
goto out_nolseg;
}
+ /* Reset wb_nio, since getting layout segment was successful */
+ req->wb_nio = 0;
ds = ff_layout_get_ds_for_read(pgio, &ds_idx);
if (!ds) {
@@ -868,14 +882,24 @@ retry:
pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize;
pgio->pg_mirror_idx = ds_idx;
-
- if (NFS_SERVER(pgio->pg_inode)->flags &
- (NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR))
- pgio->pg_maxretrans = io_maxretrans;
return;
out_nolseg:
- if (pgio->pg_error < 0)
- return;
+ if (pgio->pg_error < 0) {
+ if (pgio->pg_error != -EAGAIN)
+ return;
+ /* Retry getting layout segment if lower layer returned -EAGAIN */
+ if (pgio->pg_maxretrans && req->wb_nio++ > pgio->pg_maxretrans) {
+ if (NFS_SERVER(pgio->pg_inode)->flags & NFS_MOUNT_SOFTERR)
+ pgio->pg_error = -ETIMEDOUT;
+ else
+ pgio->pg_error = -EIO;
+ return;
+ }
+ pgio->pg_error = 0;
+ /* Sleep for 1 second before retrying */
+ ssleep(1);
+ goto retry;
+ }
out_mds:
trace_pnfs_mds_fallback_pg_init_read(pgio->pg_inode,
0, NFS4_MAX_UINT64, IOMODE_READ,
@@ -895,7 +919,7 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio,
u32 i;
retry:
- ff_layout_pg_check_layout(pgio, req);
+ pnfs_generic_pg_check_layout(pgio, req);
if (!pgio->pg_lseg) {
pgio->pg_lseg =
pnfs_update_layout(pgio->pg_inode, nfs_req_openctx(req),
@@ -1130,10 +1154,14 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
rpc_wake_up(&tbl->slot_tbl_waitq);
goto reset;
/* RPC connection errors */
+ case -ENETDOWN:
+ case -ENETUNREACH:
+ if (test_bit(NFS_CS_NETUNREACH_FATAL, &clp->cl_flags))
+ return -NFS4ERR_FATAL_IOERROR;
+ fallthrough;
case -ECONNREFUSED:
case -EHOSTDOWN:
case -EHOSTUNREACH:
- case -ENETUNREACH:
case -EIO:
case -ETIMEDOUT:
case -EPIPE:
@@ -1159,6 +1187,7 @@ reset:
/* Retry all errors through either pNFS or MDS except for -EJUKEBOX */
static int ff_layout_async_handle_error_v3(struct rpc_task *task,
+ struct nfs_client *clp,
struct pnfs_layout_segment *lseg,
u32 idx)
{
@@ -1176,6 +1205,11 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task,
case -EJUKEBOX:
nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY);
goto out_retry;
+ case -ENETDOWN:
+ case -ENETUNREACH:
+ if (test_bit(NFS_CS_NETUNREACH_FATAL, &clp->cl_flags))
+ return -NFS4ERR_FATAL_IOERROR;
+ fallthrough;
default:
dprintk("%s DS connection error %d\n", __func__,
task->tk_status);
@@ -1210,7 +1244,7 @@ static int ff_layout_async_handle_error(struct rpc_task *task,
switch (vers) {
case 3:
- return ff_layout_async_handle_error_v3(task, lseg, idx);
+ return ff_layout_async_handle_error_v3(task, clp, lseg, idx);
case 4:
return ff_layout_async_handle_error_v4(task, state, clp,
lseg, idx);
@@ -1240,6 +1274,7 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
case -ECONNRESET:
case -EHOSTDOWN:
case -EHOSTUNREACH:
+ case -ENETDOWN:
case -ENETUNREACH:
case -EADDRINUSE:
case -ENOBUFS:
@@ -1294,7 +1329,7 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
hdr->args.offset, hdr->args.count,
&hdr->res.op_status, OP_READ,
task->tk_status);
- trace_ff_layout_read_error(hdr);
+ trace_ff_layout_read_error(hdr, task->tk_status);
}
err = ff_layout_async_handle_error(task, hdr->args.context->state,
@@ -1313,6 +1348,9 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
return task->tk_status;
case -EAGAIN:
goto out_eagain;
+ case -NFS4ERR_FATAL_IOERROR:
+ task->tk_status = -EIO;
+ return 0;
}
return 0;
@@ -1464,7 +1502,7 @@ static int ff_layout_write_done_cb(struct rpc_task *task,
hdr->args.offset, hdr->args.count,
&hdr->res.op_status, OP_WRITE,
task->tk_status);
- trace_ff_layout_write_error(hdr);
+ trace_ff_layout_write_error(hdr, task->tk_status);
}
err = ff_layout_async_handle_error(task, hdr->args.context->state,
@@ -1483,6 +1521,9 @@ static int ff_layout_write_done_cb(struct rpc_task *task,
return task->tk_status;
case -EAGAIN:
return -EAGAIN;
+ case -NFS4ERR_FATAL_IOERROR:
+ task->tk_status = -EIO;
+ return 0;
}
if (hdr->res.verf->committed == NFS_FILE_SYNC ||
@@ -1510,7 +1551,7 @@ static int ff_layout_commit_done_cb(struct rpc_task *task,
data->args.offset, data->args.count,
&data->res.op_status, OP_COMMIT,
task->tk_status);
- trace_ff_layout_commit_error(data);
+ trace_ff_layout_commit_error(data, task->tk_status);
}
err = ff_layout_async_handle_error(task, NULL, data->ds_clp,
@@ -1527,6 +1568,9 @@ static int ff_layout_commit_done_cb(struct rpc_task *task,
case -EAGAIN:
rpc_restart_call_prepare(task);
return -EAGAIN;
+ case -NFS4ERR_FATAL_IOERROR:
+ task->tk_status = -EIO;
+ return 0;
}
ff_layout_set_layoutcommit(data->inode, data->lseg, data->lwb);
@@ -1764,6 +1808,7 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
struct pnfs_layout_segment *lseg = hdr->lseg;
struct nfs4_pnfs_ds *ds;
struct rpc_clnt *ds_clnt;
+ struct nfsd_file *localio;
struct nfs4_ff_layout_mirror *mirror;
const struct cred *ds_cred;
loff_t offset = hdr->args.offset;
@@ -1810,11 +1855,18 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
hdr->args.offset = offset;
hdr->mds_offset = offset;
+ /* Start IO accounting for local read */
+ localio = ff_local_open_fh(lseg, idx, ds->ds_clp, ds_cred, fh, FMODE_READ);
+ if (localio) {
+ hdr->task.tk_start = ktime_get();
+ ff_layout_read_record_layoutstats_start(&hdr->task, hdr);
+ }
+
/* Perform an asynchronous read to ds */
nfs_initiate_pgio(ds_clnt, hdr, ds_cred, ds->ds_clp->rpc_ops,
vers == 3 ? &ff_layout_read_call_ops_v3 :
&ff_layout_read_call_ops_v4,
- 0, RPC_TASK_SOFTCONN);
+ 0, RPC_TASK_SOFTCONN, localio);
put_cred(ds_cred);
return PNFS_ATTEMPTED;
@@ -1834,6 +1886,7 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
struct pnfs_layout_segment *lseg = hdr->lseg;
struct nfs4_pnfs_ds *ds;
struct rpc_clnt *ds_clnt;
+ struct nfsd_file *localio;
struct nfs4_ff_layout_mirror *mirror;
const struct cred *ds_cred;
loff_t offset = hdr->args.offset;
@@ -1878,11 +1931,19 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
*/
hdr->args.offset = offset;
+ /* Start IO accounting for local write */
+ localio = ff_local_open_fh(lseg, idx, ds->ds_clp, ds_cred, fh,
+ FMODE_READ|FMODE_WRITE);
+ if (localio) {
+ hdr->task.tk_start = ktime_get();
+ ff_layout_write_record_layoutstats_start(&hdr->task, hdr);
+ }
+
/* Perform an asynchronous write */
nfs_initiate_pgio(ds_clnt, hdr, ds_cred, ds->ds_clp->rpc_ops,
vers == 3 ? &ff_layout_write_call_ops_v3 :
&ff_layout_write_call_ops_v4,
- sync, RPC_TASK_SOFTCONN);
+ sync, RPC_TASK_SOFTCONN, localio);
put_cred(ds_cred);
return PNFS_ATTEMPTED;
@@ -1916,6 +1977,7 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how)
struct pnfs_layout_segment *lseg = data->lseg;
struct nfs4_pnfs_ds *ds;
struct rpc_clnt *ds_clnt;
+ struct nfsd_file *localio;
struct nfs4_ff_layout_mirror *mirror;
const struct cred *ds_cred;
u32 idx;
@@ -1954,10 +2016,18 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how)
if (fh)
data->args.fh = fh;
+ /* Start IO accounting for local commit */
+ localio = ff_local_open_fh(lseg, idx, ds->ds_clp, ds_cred, fh,
+ FMODE_READ|FMODE_WRITE);
+ if (localio) {
+ data->task.tk_start = ktime_get();
+ ff_layout_commit_record_layoutstats_start(&data->task, data);
+ }
+
ret = nfs_initiate_commit(ds_clnt, data, ds->ds_clp->rpc_ops,
vers == 3 ? &ff_layout_commit_call_ops_v3 :
&ff_layout_commit_call_ops_v4,
- how, RPC_TASK_SOFTCONN);
+ how, RPC_TASK_SOFTCONN, localio);
put_cred(ds_cred);
return ret;
out_err:
@@ -2095,12 +2165,6 @@ static int ff_layout_encode_ioerr(struct xdr_stream *xdr,
}
static void
-encode_opaque_fixed(struct xdr_stream *xdr, const void *buf, size_t len)
-{
- WARN_ON_ONCE(xdr_stream_encode_opaque_fixed(xdr, buf, len) < 0);
-}
-
-static void
ff_layout_encode_ff_iostat_head(struct xdr_stream *xdr,
const nfs4_stateid *stateid,
const struct nfs42_layoutstat_devinfo *devinfo)
@@ -2556,7 +2620,7 @@ ff_layout_set_layoutdriver(struct nfs_server *server,
const struct nfs_fh *dummy)
{
#if IS_ENABLED(CONFIG_NFS_V4_2)
- server->caps |= NFS_CAP_LAYOUTSTATS;
+ server->caps |= NFS_CAP_LAYOUTSTATS | NFS_CAP_REBOOT_LAYOUTRETURN;
#endif
return 0;
}
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h
index f84b3fb0dddd..095df09017a5 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.h
+++ b/fs/nfs/flexfilelayout/flexfilelayout.h
@@ -83,6 +83,7 @@ struct nfs4_ff_layout_mirror {
nfs4_stateid stateid;
const struct cred __rcu *ro_cred;
const struct cred __rcu *rw_cred;
+ struct nfs_file_localio nfl;
refcount_t ref;
spinlock_t lock;
unsigned long flags;
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index e028f5a0ef5f..4a304cf17c4b 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -49,6 +49,7 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
struct nfs4_pnfs_ds_addr *da;
struct nfs4_ff_layout_ds *new_ds = NULL;
struct nfs4_ff_ds_version *ds_versions = NULL;
+ struct net *net = server->nfs_client->cl_net;
u32 mp_count;
u32 version_count;
__be32 *p;
@@ -80,8 +81,7 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
for (i = 0; i < mp_count; i++) {
/* multipath ds */
- da = nfs4_decode_mp_ds_addr(server->nfs_client->cl_net,
- &stream, gfp_flags);
+ da = nfs4_decode_mp_ds_addr(net, &stream, gfp_flags);
if (da)
list_add_tail(&da->da_node, &dsaddrs);
}
@@ -149,7 +149,7 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
new_ds->ds_versions = ds_versions;
new_ds->ds_versions_cnt = version_count;
- new_ds->ds = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags);
+ new_ds->ds = nfs4_pnfs_ds_add(net, &dsaddrs, gfp_flags);
if (!new_ds->ds)
goto out_err_drain_dsaddrs;
@@ -395,6 +395,12 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg,
/* connect success, check rsize/wsize limit */
if (!status) {
+ /*
+ * ds_clp is put in destroy_ds().
+ * keep ds_clp even if DS is local, so that if local IO cannot
+ * proceed somehow, we can fall back to NFS whenever we want.
+ */
+ nfs_local_probe(ds->ds_clp);
max_payload =
nfs_block_size(rpc_max_payload(ds->ds_clp->cl_rpcclient),
NULL);
diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
index d0a0956f8a13..13f71ca8c974 100644
--- a/fs/nfs/fs_context.c
+++ b/fs/nfs/fs_context.c
@@ -49,6 +49,8 @@ enum nfs_param {
Opt_bsize,
Opt_clientaddr,
Opt_cto,
+ Opt_alignwrite,
+ Opt_fatal_neterrors,
Opt_fg,
Opt_fscache,
Opt_fscache_flag,
@@ -71,6 +73,8 @@ enum nfs_param {
Opt_posix,
Opt_proto,
Opt_rdirplus,
+ Opt_rdirplus_none,
+ Opt_rdirplus_force,
Opt_rdma,
Opt_resvport,
Opt_retrans,
@@ -95,6 +99,20 @@ enum nfs_param {
};
enum {
+ Opt_fatal_neterrors_default,
+ Opt_fatal_neterrors_enetunreach,
+ Opt_fatal_neterrors_none,
+};
+
+static const struct constant_table nfs_param_enums_fatal_neterrors[] = {
+ { "default", Opt_fatal_neterrors_default },
+ { "ENETDOWN:ENETUNREACH", Opt_fatal_neterrors_enetunreach },
+ { "ENETUNREACH:ENETDOWN", Opt_fatal_neterrors_enetunreach },
+ { "none", Opt_fatal_neterrors_none },
+ {}
+};
+
+enum {
Opt_local_lock_all,
Opt_local_lock_flock,
Opt_local_lock_none,
@@ -149,6 +167,9 @@ static const struct fs_parameter_spec nfs_fs_parameters[] = {
fsparam_u32 ("bsize", Opt_bsize),
fsparam_string("clientaddr", Opt_clientaddr),
fsparam_flag_no("cto", Opt_cto),
+ fsparam_flag_no("alignwrite", Opt_alignwrite),
+ fsparam_enum("fatal_neterrors", Opt_fatal_neterrors,
+ nfs_param_enums_fatal_neterrors),
fsparam_flag ("fg", Opt_fg),
fsparam_flag_no("fsc", Opt_fscache_flag),
fsparam_string("fsc", Opt_fscache),
@@ -172,7 +193,8 @@ static const struct fs_parameter_spec nfs_fs_parameters[] = {
fsparam_u32 ("port", Opt_port),
fsparam_flag_no("posix", Opt_posix),
fsparam_string("proto", Opt_proto),
- fsparam_flag_no("rdirplus", Opt_rdirplus),
+ fsparam_flag_no("rdirplus", Opt_rdirplus), // rdirplus|nordirplus
+ fsparam_string("rdirplus", Opt_rdirplus), // rdirplus=...
fsparam_flag ("rdma", Opt_rdma),
fsparam_flag_no("resvport", Opt_resvport),
fsparam_u32 ("retrans", Opt_retrans),
@@ -286,6 +308,12 @@ static const struct constant_table nfs_xprtsec_policies[] = {
{}
};
+static const struct constant_table nfs_rdirplus_tokens[] = {
+ { "none", Opt_rdirplus_none },
+ { "force", Opt_rdirplus_force },
+ {}
+};
+
/*
* Sanity-check a server address provided by the mount command.
*
@@ -592,6 +620,12 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
else
ctx->flags |= NFS_MOUNT_TRUNK_DISCOVERY;
break;
+ case Opt_alignwrite:
+ if (result.negated)
+ ctx->flags |= NFS_MOUNT_NO_ALIGNWRITE;
+ else
+ ctx->flags &= ~NFS_MOUNT_NO_ALIGNWRITE;
+ break;
case Opt_ac:
if (result.negated)
ctx->flags |= NFS_MOUNT_NOAC;
@@ -600,9 +634,11 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
break;
case Opt_lock:
if (result.negated) {
+ ctx->lock_status = NFS_LOCK_NOLOCK;
ctx->flags |= NFS_MOUNT_NONLM;
ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL);
} else {
+ ctx->lock_status = NFS_LOCK_LOCK;
ctx->flags &= ~NFS_MOUNT_NONLM;
ctx->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL);
}
@@ -626,10 +662,25 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
ctx->flags &= ~NFS_MOUNT_NOACL;
break;
case Opt_rdirplus:
- if (result.negated)
+ if (result.negated) {
+ ctx->flags &= ~NFS_MOUNT_FORCE_RDIRPLUS;
ctx->flags |= NFS_MOUNT_NORDIRPLUS;
- else
- ctx->flags &= ~NFS_MOUNT_NORDIRPLUS;
+ } else if (!param->string) {
+ ctx->flags &= ~(NFS_MOUNT_NORDIRPLUS | NFS_MOUNT_FORCE_RDIRPLUS);
+ } else {
+ switch (lookup_constant(nfs_rdirplus_tokens, param->string, -1)) {
+ case Opt_rdirplus_none:
+ ctx->flags &= ~NFS_MOUNT_FORCE_RDIRPLUS;
+ ctx->flags |= NFS_MOUNT_NORDIRPLUS;
+ break;
+ case Opt_rdirplus_force:
+ ctx->flags &= ~NFS_MOUNT_NORDIRPLUS;
+ ctx->flags |= NFS_MOUNT_FORCE_RDIRPLUS;
+ break;
+ default:
+ goto out_invalid_value;
+ }
+ }
break;
case Opt_sharecache:
if (result.negated)
@@ -862,6 +913,25 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
goto out_of_bounds;
ctx->nfs_server.max_connect = result.uint_32;
break;
+ case Opt_fatal_neterrors:
+ trace_nfs_mount_assign(param->key, param->string);
+ switch (result.uint_32) {
+ case Opt_fatal_neterrors_default:
+ if (fc->net_ns != &init_net)
+ ctx->flags |= NFS_MOUNT_NETUNREACH_FATAL;
+ else
+ ctx->flags &= ~NFS_MOUNT_NETUNREACH_FATAL;
+ break;
+ case Opt_fatal_neterrors_enetunreach:
+ ctx->flags |= NFS_MOUNT_NETUNREACH_FATAL;
+ break;
+ case Opt_fatal_neterrors_none:
+ ctx->flags &= ~NFS_MOUNT_NETUNREACH_FATAL;
+ break;
+ default:
+ goto out_invalid_value;
+ }
+ break;
case Opt_lookupcache:
trace_nfs_mount_assign(param->key, param->string);
switch (result.uint_32) {
@@ -1112,9 +1182,12 @@ static int nfs23_parse_monolithic(struct fs_context *fc,
ctx->acdirmax = data->acdirmax;
ctx->need_mount = false;
- memcpy(sap, &data->addr, sizeof(data->addr));
- ctx->nfs_server.addrlen = sizeof(data->addr);
- ctx->nfs_server.port = ntohs(data->addr.sin_port);
+ if (!is_remount_fc(fc)) {
+ memcpy(sap, &data->addr, sizeof(data->addr));
+ ctx->nfs_server.addrlen = sizeof(data->addr);
+ ctx->nfs_server.port = ntohs(data->addr.sin_port);
+ }
+
if (sap->ss_family != AF_INET ||
!nfs_verify_server_address(sap))
goto out_no_address;
@@ -1454,7 +1527,7 @@ static int nfs_fs_context_validate(struct fs_context *fc)
/* Load the NFS protocol module if we haven't done so yet */
if (!ctx->nfs_mod) {
- nfs_mod = get_nfs_version(ctx->version);
+ nfs_mod = find_nfs_version(ctx->version);
if (IS_ERR(nfs_mod)) {
ret = PTR_ERR(nfs_mod);
goto out_version_unavailable;
@@ -1528,7 +1601,7 @@ static int nfs_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc)
}
nfs_copy_fh(ctx->mntfh, src->mntfh);
- __module_get(ctx->nfs_mod->owner);
+ get_nfs_version(ctx->nfs_mod);
ctx->client_address = NULL;
ctx->mount_server.hostname = NULL;
ctx->nfs_server.export_path = NULL;
@@ -1620,7 +1693,7 @@ static int nfs_init_fs_context(struct fs_context *fc)
}
ctx->nfs_mod = nfss->nfs_client->cl_nfs_mod;
- __module_get(ctx->nfs_mod->owner);
+ get_nfs_version(ctx->nfs_mod);
} else {
/* defaults */
ctx->timeo = NFS_UNSPEC_TIMEO;
@@ -1638,6 +1711,9 @@ static int nfs_init_fs_context(struct fs_context *fc)
ctx->xprtsec.cert_serial = TLS_NO_CERT;
ctx->xprtsec.privkey_serial = TLS_NO_PRIVKEY;
+ if (fc->net_ns != &init_net)
+ ctx->flags |= NFS_MOUNT_NETUNREACH_FATAL;
+
fc->s_iflags |= SB_I_STABLE_WRITES;
}
fc->fs_private = ctx;
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index ddc1ee031955..e278a1ad1ca3 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -263,15 +263,25 @@ int nfs_netfs_readahead(struct readahead_control *ractl)
static atomic_t nfs_netfs_debug_id;
static int nfs_netfs_init_request(struct netfs_io_request *rreq, struct file *file)
{
+ if (!file) {
+ if (WARN_ON_ONCE(rreq->origin != NETFS_PGPRIV2_COPY_TO_CACHE))
+ return -EIO;
+ return 0;
+ }
+
rreq->netfs_priv = get_nfs_open_context(nfs_file_open_context(file));
rreq->debug_id = atomic_inc_return(&nfs_netfs_debug_id);
+ /* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */
+ __set_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags);
+ rreq->io_streams[0].sreq_max_len = NFS_SB(rreq->inode->i_sb)->rsize;
return 0;
}
static void nfs_netfs_free_request(struct netfs_io_request *rreq)
{
- put_nfs_open_context(rreq->netfs_priv);
+ if (rreq->netfs_priv)
+ put_nfs_open_context(rreq->netfs_priv);
}
static struct nfs_netfs_io_data *nfs_netfs_alloc(struct netfs_io_subrequest *sreq)
@@ -286,14 +296,6 @@ static struct nfs_netfs_io_data *nfs_netfs_alloc(struct netfs_io_subrequest *sre
return netfs;
}
-static bool nfs_netfs_clamp_length(struct netfs_io_subrequest *sreq)
-{
- size_t rsize = NFS_SB(sreq->rreq->inode->i_sb)->rsize;
-
- sreq->len = min(sreq->len, rsize);
- return true;
-}
-
static void nfs_netfs_issue_read(struct netfs_io_subrequest *sreq)
{
struct nfs_netfs_io_data *netfs;
@@ -302,17 +304,20 @@ static void nfs_netfs_issue_read(struct netfs_io_subrequest *sreq)
struct nfs_open_context *ctx = sreq->rreq->netfs_priv;
struct page *page;
unsigned long idx;
+ pgoff_t start, last;
int err;
- pgoff_t start = (sreq->start + sreq->transferred) >> PAGE_SHIFT;
- pgoff_t last = ((sreq->start + sreq->len -
- sreq->transferred - 1) >> PAGE_SHIFT);
+
+ start = (sreq->start + sreq->transferred) >> PAGE_SHIFT;
+ last = ((sreq->start + sreq->len - sreq->transferred - 1) >> PAGE_SHIFT);
nfs_pageio_init_read(&pgio, inode, false,
&nfs_async_read_completion_ops);
netfs = nfs_netfs_alloc(sreq);
- if (!netfs)
- return netfs_subreq_terminated(sreq, -ENOMEM, false);
+ if (!netfs) {
+ sreq->error = -ENOMEM;
+ return netfs_read_subreq_terminated(sreq);
+ }
pgio.pg_netfs = netfs; /* used in completion */
@@ -341,7 +346,7 @@ void nfs_netfs_initiate_read(struct nfs_pgio_header *hdr)
int nfs_netfs_folio_unlock(struct folio *folio)
{
- struct inode *inode = folio_file_mapping(folio)->host;
+ struct inode *inode = folio->mapping->host;
/*
* If fscache is enabled, netfs will unlock pages.
@@ -361,7 +366,8 @@ void nfs_netfs_read_completion(struct nfs_pgio_header *hdr)
return;
sreq = netfs->sreq;
- if (test_bit(NFS_IOHDR_EOF, &hdr->flags))
+ if (test_bit(NFS_IOHDR_EOF, &hdr->flags) &&
+ sreq->rreq->origin != NETFS_DIO_READ)
__set_bit(NETFS_SREQ_CLEAR_TAIL, &sreq->flags);
if (hdr->error)
@@ -377,5 +383,4 @@ const struct netfs_request_ops nfs_netfs_ops = {
.init_request = nfs_netfs_init_request,
.free_request = nfs_netfs_free_request,
.issue_read = nfs_netfs_issue_read,
- .clamp_length = nfs_netfs_clamp_length
};
diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h
index e3cb4923316b..9d86868f4998 100644
--- a/fs/nfs/fscache.h
+++ b/fs/nfs/fscache.h
@@ -60,8 +60,6 @@ static inline void nfs_netfs_get(struct nfs_netfs_io_data *netfs)
static inline void nfs_netfs_put(struct nfs_netfs_io_data *netfs)
{
- ssize_t final_len;
-
/* Only the last RPC completion should call netfs_subreq_terminated() */
if (!refcount_dec_and_test(&netfs->refcount))
return;
@@ -74,8 +72,10 @@ static inline void nfs_netfs_put(struct nfs_netfs_io_data *netfs)
* Correct the final length here to be no larger than the netfs subrequest
* length, and thus avoid netfs's "Subreq overread" warning message.
*/
- final_len = min_t(s64, netfs->sreq->len, atomic64_read(&netfs->transferred));
- netfs_subreq_terminated(netfs->sreq, netfs->error ?: final_len, false);
+ netfs->sreq->transferred = min_t(s64, netfs->sreq->len,
+ atomic64_read(&netfs->transferred));
+ netfs->sreq->error = netfs->error;
+ netfs_read_subreq_terminated(netfs->sreq);
kfree(netfs);
}
static inline void nfs_netfs_inode_init(struct nfs_inode *nfsi)
@@ -101,10 +101,10 @@ extern int nfs_netfs_read_folio(struct file *file, struct folio *folio);
static inline bool nfs_fscache_release_folio(struct folio *folio, gfp_t gfp)
{
- if (folio_test_fscache(folio)) {
+ if (folio_test_private_2(folio)) { /* [DEPRECATED] */
if (current_is_kswapd() || !(gfp & __GFP_FS))
return false;
- folio_wait_fscache(folio);
+ folio_wait_private_2(folio);
}
fscache_note_page_release(netfs_i_cookie(netfs_inode(folio->mapping->host)));
return true;
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 11ff2b2e060f..f13d25d95b85 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -62,7 +62,7 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i
}
/*
- * get an NFS2/NFS3 root dentry from the root filehandle
+ * get a root dentry from the root filehandle
*/
int nfs_get_root(struct super_block *s, struct fs_context *fc)
{
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index acef52ecb1bb..119e447758b9 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -74,6 +74,8 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
int nfs_wait_bit_killable(struct wait_bit_key *key, int mode)
{
+ if (unlikely(nfs_current_task_exiting()))
+ return -EINTR;
schedule();
if (signal_pending_state(mode, current))
return -ERESTARTSYS;
@@ -190,9 +192,8 @@ static bool nfs_has_xattr_cache(const struct nfs_inode *nfsi)
void nfs_set_cache_invalid(struct inode *inode, unsigned long flags)
{
struct nfs_inode *nfsi = NFS_I(inode);
- bool have_delegation = NFS_PROTO(inode)->have_delegation(inode, FMODE_READ);
- if (have_delegation) {
+ if (nfs_have_delegated_attributes(inode)) {
if (!(flags & NFS_INO_REVAL_FORCED))
flags &= ~(NFS_INO_INVALID_MODE |
NFS_INO_INVALID_OTHER |
@@ -206,12 +207,15 @@ void nfs_set_cache_invalid(struct inode *inode, unsigned long flags)
nfs_fscache_invalidate(inode, 0);
flags &= ~NFS_INO_REVAL_FORCED;
- nfsi->cache_validity |= flags;
+ flags |= nfsi->cache_validity;
+ if (inode->i_mapping->nrpages == 0)
+ flags &= ~NFS_INO_INVALID_DATA;
- if (inode->i_mapping->nrpages == 0) {
- nfsi->cache_validity &= ~NFS_INO_INVALID_DATA;
- nfs_ooo_clear(nfsi);
- } else if (nfsi->cache_validity & NFS_INO_INVALID_DATA) {
+ /* pairs with nfs_clear_invalid_mapping()'s smp_load_acquire() */
+ smp_store_release(&nfsi->cache_validity, flags);
+
+ if (inode->i_mapping->nrpages == 0 ||
+ nfsi->cache_validity & NFS_INO_INVALID_DATA) {
nfs_ooo_clear(nfsi);
}
trace_nfs_set_cache_invalid(inode, 0);
@@ -276,6 +280,8 @@ EXPORT_SYMBOL_GPL(nfs_zap_acl_cache);
void nfs_invalidate_atime(struct inode *inode)
{
+ if (nfs_have_delegated_atime(inode))
+ return;
spin_lock(&inode->i_lock);
nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME);
spin_unlock(&inode->i_lock);
@@ -491,6 +497,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
inode->i_fop = NFS_SB(sb)->nfs_client->rpc_ops->file_ops;
inode->i_data.a_ops = &nfs_file_aops;
nfs_inode_init_regular(nfsi);
+ mapping_set_large_folios(inode->i_mapping);
} else if (S_ISDIR(inode->i_mode)) {
inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops;
inode->i_fop = &nfs_dir_operations;
@@ -604,6 +611,67 @@ out_no_inode:
}
EXPORT_SYMBOL_GPL(nfs_fhget);
+static void
+nfs_fattr_fixup_delegated(struct inode *inode, struct nfs_fattr *fattr)
+{
+ unsigned long cache_validity = NFS_I(inode)->cache_validity;
+
+ if (nfs_have_delegated_mtime(inode)) {
+ if (!(cache_validity & NFS_INO_INVALID_CTIME))
+ fattr->valid &= ~(NFS_ATTR_FATTR_PRECTIME |
+ NFS_ATTR_FATTR_CTIME);
+
+ if (!(cache_validity & NFS_INO_INVALID_MTIME))
+ fattr->valid &= ~(NFS_ATTR_FATTR_PREMTIME |
+ NFS_ATTR_FATTR_MTIME);
+
+ if (!(cache_validity & NFS_INO_INVALID_ATIME))
+ fattr->valid &= ~NFS_ATTR_FATTR_ATIME;
+ } else if (nfs_have_delegated_atime(inode)) {
+ if (!(cache_validity & NFS_INO_INVALID_ATIME))
+ fattr->valid &= ~NFS_ATTR_FATTR_ATIME;
+ }
+}
+
+static void nfs_update_timestamps(struct inode *inode, unsigned int ia_valid)
+{
+ enum file_time_flags time_flags = 0;
+ unsigned int cache_flags = 0;
+
+ if (ia_valid & ATTR_MTIME) {
+ time_flags |= S_MTIME | S_CTIME;
+ cache_flags |= NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME;
+ }
+ if (ia_valid & ATTR_ATIME) {
+ time_flags |= S_ATIME;
+ cache_flags |= NFS_INO_INVALID_ATIME;
+ }
+ inode_update_timestamps(inode, time_flags);
+ NFS_I(inode)->cache_validity &= ~cache_flags;
+}
+
+void nfs_update_delegated_atime(struct inode *inode)
+{
+ spin_lock(&inode->i_lock);
+ if (nfs_have_delegated_atime(inode))
+ nfs_update_timestamps(inode, ATTR_ATIME);
+ spin_unlock(&inode->i_lock);
+}
+
+void nfs_update_delegated_mtime_locked(struct inode *inode)
+{
+ if (nfs_have_delegated_mtime(inode))
+ nfs_update_timestamps(inode, ATTR_MTIME);
+}
+
+void nfs_update_delegated_mtime(struct inode *inode)
+{
+ spin_lock(&inode->i_lock);
+ nfs_update_delegated_mtime_locked(inode);
+ spin_unlock(&inode->i_lock);
+}
+EXPORT_SYMBOL_GPL(nfs_update_delegated_mtime);
+
#define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET|ATTR_FILE|ATTR_OPEN)
int
@@ -631,6 +699,18 @@ nfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
attr->ia_valid &= ~ATTR_SIZE;
}
+ if (nfs_have_delegated_mtime(inode) && attr->ia_valid & ATTR_MTIME) {
+ spin_lock(&inode->i_lock);
+ nfs_update_timestamps(inode, attr->ia_valid);
+ spin_unlock(&inode->i_lock);
+ attr->ia_valid &= ~(ATTR_MTIME | ATTR_ATIME);
+ } else if (nfs_have_delegated_atime(inode) &&
+ attr->ia_valid & ATTR_ATIME &&
+ !(attr->ia_valid & ATTR_MTIME)) {
+ nfs_update_delegated_atime(inode);
+ attr->ia_valid &= ~ATTR_ATIME;
+ }
+
/* Optimization: if the end result is no change, don't RPC */
if (((attr->ia_valid & NFS_VALID_ATTRS) & ~(ATTR_FILE|ATTR_OPEN)) == 0)
return 0;
@@ -686,6 +766,7 @@ static int nfs_vmtruncate(struct inode * inode, loff_t offset)
spin_unlock(&inode->i_lock);
truncate_pagecache(inode, offset);
+ nfs_update_delegated_mtime_locked(inode);
spin_lock(&inode->i_lock);
out:
return err;
@@ -709,8 +790,9 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr,
spin_lock(&inode->i_lock);
NFS_I(inode)->attr_gencount = fattr->gencount;
if ((attr->ia_valid & ATTR_SIZE) != 0) {
- nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME |
- NFS_INO_INVALID_BLOCKS);
+ if (!nfs_have_delegated_mtime(inode))
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME);
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS);
nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC);
nfs_vmtruncate(inode, attr->ia_size);
}
@@ -856,8 +938,12 @@ int nfs_getattr(struct mnt_idmap *idmap, const struct path *path,
/* Flush out writes to the server in order to update c/mtime/version. */
if ((request_mask & (STATX_CTIME | STATX_MTIME | STATX_CHANGE_COOKIE)) &&
- S_ISREG(inode->i_mode))
- filemap_write_and_wait(inode->i_mapping);
+ S_ISREG(inode->i_mode)) {
+ if (nfs_have_delegated_mtime(inode))
+ filemap_fdatawrite(inode->i_mapping);
+ else
+ filemap_write_and_wait(inode->i_mapping);
+ }
/*
* We may force a getattr if the user cares about atime.
@@ -1012,7 +1098,7 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync)
if (!is_sync)
return;
inode = d_inode(ctx->dentry);
- if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
+ if (nfs_have_read_or_write_delegation(inode))
return;
nfsi = NFS_I(inode);
if (inode->i_mapping->nrpages == 0)
@@ -1053,6 +1139,8 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry,
ctx->lock_context.open_context = ctx;
INIT_LIST_HEAD(&ctx->list);
ctx->mdsthreshold = NULL;
+ nfs_localio_file_init(&ctx->nfl);
+
return ctx;
}
EXPORT_SYMBOL_GPL(alloc_nfs_open_context);
@@ -1084,6 +1172,7 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
nfs_sb_deactive(sb);
put_rpccred(rcu_dereference_protected(ctx->ll_cred, 1));
kfree(ctx->mdsthreshold);
+ nfs_close_local_fh(&ctx->nfl);
kfree_rcu(ctx, rcu_head);
}
@@ -1340,6 +1429,13 @@ int nfs_clear_invalid_mapping(struct address_space *mapping)
TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
if (ret)
goto out;
+ smp_rmb(); /* pairs with smp_wmb() below */
+ if (test_bit(NFS_INO_INVALIDATING, bitlock))
+ continue;
+ /* pairs with nfs_set_cache_invalid()'s smp_store_release() */
+ if (!(smp_load_acquire(&nfsi->cache_validity) & NFS_INO_INVALID_DATA))
+ goto out;
+ /* Slow-path that double-checks with spinlock held */
spin_lock(&inode->i_lock);
if (test_bit(NFS_INO_INVALIDATING, bitlock)) {
spin_unlock(&inode->i_lock);
@@ -1482,7 +1578,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
unsigned long invalid = 0;
struct timespec64 ts;
- if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
+ if (nfs_have_delegated_attributes(inode))
return 0;
if (!(fattr->valid & NFS_ATTR_FATTR_FILEID)) {
@@ -1565,6 +1661,7 @@ void nfs_fattr_init(struct nfs_fattr *fattr)
fattr->gencount = nfs_inc_attr_generation_counter();
fattr->owner_name = NULL;
fattr->group_name = NULL;
+ fattr->mdsthreshold = NULL;
}
EXPORT_SYMBOL_GPL(nfs_fattr_init);
@@ -2118,6 +2215,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
*/
nfsi->read_cache_jiffies = fattr->time_start;
+ /* Fix up any delegated attributes in the struct nfs_fattr */
+ nfs_fattr_fixup_delegated(inode, fattr);
+
save_cache_validity = nfsi->cache_validity;
nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR
| NFS_INO_INVALID_ATIME
@@ -2390,35 +2490,54 @@ static void nfs_destroy_inodecache(void)
kmem_cache_destroy(nfs_inode_cachep);
}
+struct workqueue_struct *nfslocaliod_workqueue;
struct workqueue_struct *nfsiod_workqueue;
EXPORT_SYMBOL_GPL(nfsiod_workqueue);
/*
- * start up the nfsiod workqueue
+ * Destroy the nfsiod workqueues
*/
-static int nfsiod_start(void)
+static void nfsiod_stop(void)
{
struct workqueue_struct *wq;
- dprintk("RPC: creating workqueue nfsiod\n");
- wq = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
- if (wq == NULL)
- return -ENOMEM;
- nfsiod_workqueue = wq;
- return 0;
+
+ wq = nfsiod_workqueue;
+ if (wq != NULL) {
+ nfsiod_workqueue = NULL;
+ destroy_workqueue(wq);
+ }
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+ wq = nfslocaliod_workqueue;
+ if (wq != NULL) {
+ nfslocaliod_workqueue = NULL;
+ destroy_workqueue(wq);
+ }
+#endif /* CONFIG_NFS_LOCALIO */
}
/*
- * Destroy the nfsiod workqueue
+ * Start the nfsiod workqueues
*/
-static void nfsiod_stop(void)
+static int nfsiod_start(void)
{
- struct workqueue_struct *wq;
-
- wq = nfsiod_workqueue;
- if (wq == NULL)
- return;
- nfsiod_workqueue = NULL;
- destroy_workqueue(wq);
+ dprintk("RPC: creating workqueue nfsiod\n");
+ nfsiod_workqueue = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
+ if (nfsiod_workqueue == NULL)
+ return -ENOMEM;
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+ /*
+ * localio writes need to use a normal (non-memreclaim) workqueue.
+ * When we start getting low on space, XFS goes and calls flush_work() on
+ * a non-memreclaim work queue, which causes a priority inversion problem.
+ */
+ dprintk("RPC: creating workqueue nfslocaliod\n");
+ nfslocaliod_workqueue = alloc_workqueue("nfslocaliod", WQ_UNBOUND, 0);
+ if (unlikely(nfslocaliod_workqueue == NULL)) {
+ nfsiod_stop();
+ return -ENOMEM;
+ }
+#endif /* CONFIG_NFS_LOCALIO */
+ return 0;
}
unsigned int nfs_net_id;
@@ -2538,6 +2657,7 @@ static void __exit exit_nfs_fs(void)
/* Not quite true; I just maintain it */
MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>");
+MODULE_DESCRIPTION("NFS client support");
MODULE_LICENSE("GPL");
module_param(enable_ino64, bool, 0644);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 06253695fe53..6655e5f32ec6 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -6,12 +6,14 @@
#include "nfs4_fs.h"
#include <linux/fs_context.h>
#include <linux/security.h>
+#include <linux/compiler_attributes.h>
#include <linux/crc32.h>
#include <linux/sunrpc/addr.h>
#include <linux/nfs_page.h>
+#include <linux/nfslocalio.h>
#include <linux/wait_bit.h>
-#define NFS_SB_MASK (SB_RDONLY|SB_NOSUID|SB_NODEV|SB_NOEXEC|SB_SYNCHRONOUS)
+#define NFS_SB_MASK (SB_NOSUID|SB_NODEV|SB_NOEXEC|SB_SYNCHRONOUS)
extern const struct export_operations nfs_export_ops;
@@ -112,6 +114,7 @@ struct nfs_fs_context {
unsigned short protofamily;
unsigned short mountfamily;
bool has_sec_mnt_opts;
+ int lock_status;
struct {
union {
@@ -153,6 +156,12 @@ struct nfs_fs_context {
} clone_data;
};
+enum nfs_lock_status {
+ NFS_LOCK_NOT_SET = 0,
+ NFS_LOCK_LOCK = 1,
+ NFS_LOCK_NOLOCK = 2,
+};
+
#define nfs_errorf(fc, fmt, ...) ((fc)->log.log ? \
errorf(fc, fmt, ## __VA_ARGS__) : \
({ dprintk(fmt "\n", ## __VA_ARGS__); }))
@@ -301,7 +310,8 @@ void nfs_pgio_header_free(struct nfs_pgio_header *);
int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *);
int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr,
const struct cred *cred, const struct nfs_rpc_ops *rpc_ops,
- const struct rpc_call_ops *call_ops, int how, int flags);
+ const struct rpc_call_ops *call_ops, int how, int flags,
+ struct nfsd_file *localio);
void nfs_free_request(struct nfs_page *req);
struct nfs_pgio_mirror *
nfs_pgio_current_mirror(struct nfs_pageio_descriptor *desc);
@@ -390,8 +400,8 @@ struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int);
void nfs_d_prune_case_insensitive_aliases(struct inode *inode);
int nfs_create(struct mnt_idmap *, struct inode *, struct dentry *,
umode_t, bool);
-int nfs_mkdir(struct mnt_idmap *, struct inode *, struct dentry *,
- umode_t);
+struct dentry *nfs_mkdir(struct mnt_idmap *, struct inode *, struct dentry *,
+ umode_t);
int nfs_rmdir(struct inode *, struct dentry *);
int nfs_unlink(struct inode *, struct dentry *);
int nfs_symlink(struct mnt_idmap *, struct inode *, struct dentry *,
@@ -431,6 +441,7 @@ int nfs_check_flags(int);
/* inode.c */
extern struct workqueue_struct *nfsiod_workqueue;
+extern struct workqueue_struct *nfslocaliod_workqueue;
extern struct inode *nfs_alloc_inode(struct super_block *sb);
extern void nfs_free_inode(struct inode *);
extern int nfs_write_inode(struct inode *, struct writeback_control *);
@@ -442,6 +453,54 @@ extern void nfs_set_cache_invalid(struct inode *inode, unsigned long flags);
extern bool nfs_check_cache_invalid(struct inode *, unsigned long);
extern int nfs_wait_bit_killable(struct wait_bit_key *key, int mode);
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+/* localio.c */
+extern void nfs_local_probe(struct nfs_client *);
+extern void nfs_local_probe_async(struct nfs_client *);
+extern void nfs_local_probe_async_work(struct work_struct *);
+extern struct nfsd_file *nfs_local_open_fh(struct nfs_client *,
+ const struct cred *,
+ struct nfs_fh *,
+ struct nfs_file_localio *,
+ const fmode_t);
+extern int nfs_local_doio(struct nfs_client *,
+ struct nfsd_file *,
+ struct nfs_pgio_header *,
+ const struct rpc_call_ops *);
+extern int nfs_local_commit(struct nfsd_file *,
+ struct nfs_commit_data *,
+ const struct rpc_call_ops *, int);
+extern bool nfs_server_is_local(const struct nfs_client *clp);
+
+#else /* CONFIG_NFS_LOCALIO */
+static inline void nfs_local_probe(struct nfs_client *clp) {}
+static inline void nfs_local_probe_async(struct nfs_client *clp) {}
+static inline struct nfsd_file *
+nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred,
+ struct nfs_fh *fh, struct nfs_file_localio *nfl,
+ const fmode_t mode)
+{
+ return NULL;
+}
+static inline int nfs_local_doio(struct nfs_client *clp,
+ struct nfsd_file *localio,
+ struct nfs_pgio_header *hdr,
+ const struct rpc_call_ops *call_ops)
+{
+ return -EINVAL;
+}
+static inline int nfs_local_commit(struct nfsd_file *localio,
+ struct nfs_commit_data *data,
+ const struct rpc_call_ops *call_ops, int how)
+{
+ return -EINVAL;
+}
+static inline bool nfs_server_is_local(const struct nfs_client *clp)
+{
+ return false;
+}
+#endif /* CONFIG_NFS_LOCALIO */
+
/* super.c */
extern const struct super_operations nfs_sops;
bool nfs_auth_info_match(const struct nfs_auth_info *, rpc_authflavor_t);
@@ -461,11 +520,11 @@ extern const struct netfs_request_ops nfs_netfs_ops;
#endif
/* io.c */
-extern void nfs_start_io_read(struct inode *inode);
+extern __must_check int nfs_start_io_read(struct inode *inode);
extern void nfs_end_io_read(struct inode *inode);
-extern void nfs_start_io_write(struct inode *inode);
+extern __must_check int nfs_start_io_write(struct inode *inode);
extern void nfs_end_io_write(struct inode *inode);
-extern void nfs_start_io_direct(struct inode *inode);
+extern __must_check int nfs_start_io_direct(struct inode *inode);
extern void nfs_end_io_direct(struct inode *inode);
static inline bool nfs_file_io_is_buffered(struct nfs_inode *nfsi)
@@ -498,7 +557,6 @@ extern int nfs_read_add_folio(struct nfs_pageio_descriptor *pgio,
struct nfs_open_context *ctx,
struct folio *folio);
extern void nfs_pageio_complete_read(struct nfs_pageio_descriptor *pgio);
-extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
/* super.c */
@@ -521,7 +579,8 @@ extern int nfs_initiate_commit(struct rpc_clnt *clnt,
struct nfs_commit_data *data,
const struct nfs_rpc_ops *nfs_ops,
const struct rpc_call_ops *call_ops,
- int how, int flags);
+ int how, int flags,
+ struct nfsd_file *localio);
extern void nfs_init_commit(struct nfs_commit_data *data,
struct list_head *head,
struct pnfs_layout_segment *lseg,
@@ -710,9 +769,9 @@ unsigned long nfs_block_bits(unsigned long bsize, unsigned char *nrbitsp)
if ((bsize & (bsize - 1)) || nrbitsp) {
unsigned char nrbits;
- for (nrbits = 31; nrbits && !(bsize & (1 << nrbits)); nrbits--)
+ for (nrbits = 31; nrbits && !(bsize & (1UL << nrbits)); nrbits--)
;
- bsize = 1 << nrbits;
+ bsize = 1UL << nrbits;
if (nrbitsp)
*nrbitsp = nrbits;
}
@@ -778,7 +837,7 @@ static inline void nfs_folio_mark_unstable(struct folio *folio,
struct nfs_commit_info *cinfo)
{
if (folio && !cinfo->dreq) {
- struct inode *inode = folio_file_mapping(folio)->host;
+ struct inode *inode = folio->mapping->host;
long nr = folio_nr_pages(folio);
/* This page is really still in write-back - just that the
@@ -793,31 +852,12 @@ static inline void nfs_folio_mark_unstable(struct folio *folio,
/*
* Determine the number of bytes of data the page contains
*/
-static inline
-unsigned int nfs_page_length(struct page *page)
-{
- loff_t i_size = i_size_read(page_file_mapping(page)->host);
-
- if (i_size > 0) {
- pgoff_t index = page_index(page);
- pgoff_t end_index = (i_size - 1) >> PAGE_SHIFT;
- if (index < end_index)
- return PAGE_SIZE;
- if (index == end_index)
- return ((i_size - 1) & ~PAGE_MASK) + 1;
- }
- return 0;
-}
-
-/*
- * Determine the number of bytes of data the page contains
- */
static inline size_t nfs_folio_length(struct folio *folio)
{
- loff_t i_size = i_size_read(folio_file_mapping(folio)->host);
+ loff_t i_size = i_size_read(folio->mapping->host);
if (i_size > 0) {
- pgoff_t index = folio_index(folio) >> folio_order(folio);
+ pgoff_t index = folio->index >> folio_order(folio);
pgoff_t end_index = (i_size - 1) >> folio_shift(folio);
if (index < end_index)
return folio_size(folio);
@@ -859,18 +899,16 @@ u64 nfs_timespec_to_change_attr(const struct timespec64 *ts)
return ((u64)ts->tv_sec << 30) + ts->tv_nsec;
}
-#ifdef CONFIG_CRC32
static inline u32 nfs_stateid_hash(const nfs4_stateid *stateid)
{
return ~crc32_le(0xFFFFFFFF, &stateid->other[0],
NFS4_STATEID_OTHER_SIZE);
}
-#else
-static inline u32 nfs_stateid_hash(nfs4_stateid *stateid)
+
+static inline bool nfs_current_task_exiting(void)
{
- return 0;
+ return (current->flags & PF_EXITING) != 0;
}
-#endif
static inline bool nfs_error_is_fatal(int err)
{
diff --git a/fs/nfs/io.c b/fs/nfs/io.c
index b5551ed8f648..3388faf2acb9 100644
--- a/fs/nfs/io.c
+++ b/fs/nfs/io.c
@@ -39,19 +39,28 @@ static void nfs_block_o_direct(struct nfs_inode *nfsi, struct inode *inode)
* Note that buffered writes and truncates both take a write lock on
* inode->i_rwsem, meaning that those are serialised w.r.t. the reads.
*/
-void
+int
nfs_start_io_read(struct inode *inode)
{
struct nfs_inode *nfsi = NFS_I(inode);
+ int err;
+
/* Be an optimist! */
- down_read(&inode->i_rwsem);
+ err = down_read_killable(&inode->i_rwsem);
+ if (err)
+ return err;
if (test_bit(NFS_INO_ODIRECT, &nfsi->flags) == 0)
- return;
+ return 0;
up_read(&inode->i_rwsem);
+
/* Slow path.... */
- down_write(&inode->i_rwsem);
+ err = down_write_killable(&inode->i_rwsem);
+ if (err)
+ return err;
nfs_block_o_direct(nfsi, inode);
downgrade_write(&inode->i_rwsem);
+
+ return 0;
}
/**
@@ -74,11 +83,15 @@ nfs_end_io_read(struct inode *inode)
* Declare that a buffered read operation is about to start, and ensure
* that we block all direct I/O.
*/
-void
+int
nfs_start_io_write(struct inode *inode)
{
- down_write(&inode->i_rwsem);
- nfs_block_o_direct(NFS_I(inode), inode);
+ int err;
+
+ err = down_write_killable(&inode->i_rwsem);
+ if (!err)
+ nfs_block_o_direct(NFS_I(inode), inode);
+ return err;
}
/**
@@ -119,19 +132,28 @@ static void nfs_block_buffered(struct nfs_inode *nfsi, struct inode *inode)
* Note that buffered writes and truncates both take a write lock on
* inode->i_rwsem, meaning that those are serialised w.r.t. O_DIRECT.
*/
-void
+int
nfs_start_io_direct(struct inode *inode)
{
struct nfs_inode *nfsi = NFS_I(inode);
+ int err;
+
/* Be an optimist! */
- down_read(&inode->i_rwsem);
+ err = down_read_killable(&inode->i_rwsem);
+ if (err)
+ return err;
if (test_bit(NFS_INO_ODIRECT, &nfsi->flags) != 0)
- return;
+ return 0;
up_read(&inode->i_rwsem);
+
/* Slow path.... */
- down_write(&inode->i_rwsem);
+ err = down_write_killable(&inode->i_rwsem);
+ if (err)
+ return err;
nfs_block_buffered(nfsi, inode);
downgrade_write(&inode->i_rwsem);
+
+ return 0;
}
/**
diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h
index 5aa776b5a3e7..49862c95b224 100644
--- a/fs/nfs/iostat.h
+++ b/fs/nfs/iostat.h
@@ -46,10 +46,11 @@ static inline void nfs_add_stats(const struct inode *inode,
nfs_add_server_stats(NFS_SERVER(inode), stat, addend);
}
-static inline struct nfs_iostats __percpu *nfs_alloc_iostats(void)
-{
- return alloc_percpu(struct nfs_iostats);
-}
+/*
+ * This specialized allocator has to be a macro for its allocations to be
+ * accounted separately (to have a separate alloc_tag).
+ */
+#define nfs_alloc_iostats() alloc_percpu(struct nfs_iostats)
static inline void nfs_free_iostats(struct nfs_iostats __percpu *stats)
{
diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c
new file mode 100644
index 000000000000..4ec952f9f47d
--- /dev/null
+++ b/fs/nfs/localio.c
@@ -0,0 +1,867 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * NFS client support for local clients to bypass network stack
+ *
+ * Copyright (C) 2014 Weston Andros Adamson <dros@primarydata.com>
+ * Copyright (C) 2019 Trond Myklebust <trond.myklebust@hammerspace.com>
+ * Copyright (C) 2024 Mike Snitzer <snitzer@hammerspace.com>
+ * Copyright (C) 2024 NeilBrown <neilb@suse.de>
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/vfs.h>
+#include <linux/file.h>
+#include <linux/inet.h>
+#include <linux/sunrpc/addr.h>
+#include <linux/inetdevice.h>
+#include <net/addrconf.h>
+#include <linux/nfs_common.h>
+#include <linux/nfslocalio.h>
+#include <linux/bvec.h>
+
+#include <linux/nfs.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_xdr.h>
+
+#include "internal.h"
+#include "pnfs.h"
+#include "nfstrace.h"
+
+#define NFSDBG_FACILITY NFSDBG_VFS
+
+struct nfs_local_kiocb {
+ struct kiocb kiocb;
+ struct bio_vec *bvec;
+ struct nfs_pgio_header *hdr;
+ struct work_struct work;
+ void (*aio_complete_work)(struct work_struct *);
+ struct nfsd_file *localio;
+};
+
+struct nfs_local_fsync_ctx {
+ struct nfsd_file *localio;
+ struct nfs_commit_data *data;
+ struct work_struct work;
+ struct completion *done;
+};
+
+static bool localio_enabled __read_mostly = true;
+module_param(localio_enabled, bool, 0644);
+
+static bool localio_O_DIRECT_semantics __read_mostly = false;
+module_param(localio_O_DIRECT_semantics, bool, 0644);
+MODULE_PARM_DESC(localio_O_DIRECT_semantics,
+ "LOCALIO will use O_DIRECT semantics to filesystem.");
+
+static inline bool nfs_client_is_local(const struct nfs_client *clp)
+{
+ return !!rcu_access_pointer(clp->cl_uuid.net);
+}
+
+bool nfs_server_is_local(const struct nfs_client *clp)
+{
+ return nfs_client_is_local(clp) && localio_enabled;
+}
+EXPORT_SYMBOL_GPL(nfs_server_is_local);
+
+/*
+ * UUID_IS_LOCAL XDR functions
+ */
+
+static void localio_xdr_enc_uuidargs(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const void *data)
+{
+ const u8 *uuid = data;
+
+ encode_opaque_fixed(xdr, uuid, UUID_SIZE);
+}
+
+static int localio_xdr_dec_uuidres(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ void *result)
+{
+ /* void return */
+ return 0;
+}
+
+static const struct rpc_procinfo nfs_localio_procedures[] = {
+ [LOCALIOPROC_UUID_IS_LOCAL] = {
+ .p_proc = LOCALIOPROC_UUID_IS_LOCAL,
+ .p_encode = localio_xdr_enc_uuidargs,
+ .p_decode = localio_xdr_dec_uuidres,
+ .p_arglen = XDR_QUADLEN(UUID_SIZE),
+ .p_replen = 0,
+ .p_statidx = LOCALIOPROC_UUID_IS_LOCAL,
+ .p_name = "UUID_IS_LOCAL",
+ },
+};
+
+static unsigned int nfs_localio_counts[ARRAY_SIZE(nfs_localio_procedures)];
+static const struct rpc_version nfslocalio_version1 = {
+ .number = 1,
+ .nrprocs = ARRAY_SIZE(nfs_localio_procedures),
+ .procs = nfs_localio_procedures,
+ .counts = nfs_localio_counts,
+};
+
+static const struct rpc_version *nfslocalio_version[] = {
+ [1] = &nfslocalio_version1,
+};
+
+extern const struct rpc_program nfslocalio_program;
+static struct rpc_stat nfslocalio_rpcstat = { &nfslocalio_program };
+
+const struct rpc_program nfslocalio_program = {
+ .name = "nfslocalio",
+ .number = NFS_LOCALIO_PROGRAM,
+ .nrvers = ARRAY_SIZE(nfslocalio_version),
+ .version = nfslocalio_version,
+ .stats = &nfslocalio_rpcstat,
+};
+
+/*
+ * nfs_init_localioclient - Initialise an NFS localio client connection
+ */
+static struct rpc_clnt *nfs_init_localioclient(struct nfs_client *clp)
+{
+ struct rpc_clnt *rpcclient_localio;
+
+ rpcclient_localio = rpc_bind_new_program(clp->cl_rpcclient,
+ &nfslocalio_program, 1);
+
+ dprintk_rcu("%s: server (%s) %s NFS LOCALIO.\n",
+ __func__, rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR),
+ (IS_ERR(rpcclient_localio) ? "does not support" : "supports"));
+
+ return rpcclient_localio;
+}
+
+static bool nfs_server_uuid_is_local(struct nfs_client *clp)
+{
+ u8 uuid[UUID_SIZE];
+ struct rpc_message msg = {
+ .rpc_argp = &uuid,
+ };
+ struct rpc_clnt *rpcclient_localio;
+ int status;
+
+ rpcclient_localio = nfs_init_localioclient(clp);
+ if (IS_ERR(rpcclient_localio))
+ return false;
+
+ export_uuid(uuid, &clp->cl_uuid.uuid);
+
+ msg.rpc_proc = &nfs_localio_procedures[LOCALIOPROC_UUID_IS_LOCAL];
+ status = rpc_call_sync(rpcclient_localio, &msg, 0);
+ dprintk("%s: NFS reply UUID_IS_LOCAL: status=%d\n",
+ __func__, status);
+ rpc_shutdown_client(rpcclient_localio);
+
+ /* Server is only local if it initialized required struct members */
+ if (status || !rcu_access_pointer(clp->cl_uuid.net) || !clp->cl_uuid.dom)
+ return false;
+
+ return true;
+}
+
+/*
+ * nfs_local_probe - probe local i/o support for an nfs_server and nfs_client
+ * - called after alloc_client and init_client (so cl_rpcclient exists)
+ * - this function is idempotent, it can be called for old or new clients
+ */
+void nfs_local_probe(struct nfs_client *clp)
+{
+ /* Disallow localio if disabled via sysfs or AUTH_SYS isn't used */
+ if (!localio_enabled ||
+ clp->cl_rpcclient->cl_auth->au_flavor != RPC_AUTH_UNIX) {
+ nfs_localio_disable_client(clp);
+ return;
+ }
+
+ if (nfs_client_is_local(clp)) {
+ /* If already enabled, disable and re-enable */
+ nfs_localio_disable_client(clp);
+ }
+
+ if (!nfs_uuid_begin(&clp->cl_uuid))
+ return;
+ if (nfs_server_uuid_is_local(clp))
+ nfs_localio_enable_client(clp);
+ nfs_uuid_end(&clp->cl_uuid);
+}
+EXPORT_SYMBOL_GPL(nfs_local_probe);
+
+void nfs_local_probe_async_work(struct work_struct *work)
+{
+ struct nfs_client *clp =
+ container_of(work, struct nfs_client, cl_local_probe_work);
+
+ nfs_local_probe(clp);
+}
+
+void nfs_local_probe_async(struct nfs_client *clp)
+{
+ queue_work(nfsiod_workqueue, &clp->cl_local_probe_work);
+}
+EXPORT_SYMBOL_GPL(nfs_local_probe_async);
+
+static inline struct nfsd_file *nfs_local_file_get(struct nfsd_file *nf)
+{
+ return nfs_to->nfsd_file_get(nf);
+}
+
+static inline void nfs_local_file_put(struct nfsd_file *nf)
+{
+ nfs_to->nfsd_file_put(nf);
+}
+
+/*
+ * __nfs_local_open_fh - open a local filehandle in terms of nfsd_file.
+ *
+ * Returns a pointer to a struct nfsd_file or ERR_PTR.
+ * Caller must release returned nfsd_file with nfs_to_nfsd_file_put_local().
+ */
+static struct nfsd_file *
+__nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred,
+ struct nfs_fh *fh, struct nfs_file_localio *nfl,
+ const fmode_t mode)
+{
+ struct nfsd_file *localio;
+
+ localio = nfs_open_local_fh(&clp->cl_uuid, clp->cl_rpcclient,
+ cred, fh, nfl, mode);
+ if (IS_ERR(localio)) {
+ int status = PTR_ERR(localio);
+ trace_nfs_local_open_fh(fh, mode, status);
+ switch (status) {
+ case -ENOMEM:
+ case -ENXIO:
+ case -ENOENT:
+ /* Revalidate localio, will disable if unsupported */
+ nfs_local_probe(clp);
+ }
+ }
+ return localio;
+}
+
+/*
+ * nfs_local_open_fh - open a local filehandle in terms of nfsd_file.
+ * First checking if the open nfsd_file is already cached, otherwise
+ * must __nfs_local_open_fh and insert the nfsd_file in nfs_file_localio.
+ *
+ * Returns a pointer to a struct nfsd_file or NULL.
+ */
+struct nfsd_file *
+nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred,
+ struct nfs_fh *fh, struct nfs_file_localio *nfl,
+ const fmode_t mode)
+{
+ struct nfsd_file *nf, *new, __rcu **pnf;
+
+ if (!nfs_server_is_local(clp))
+ return NULL;
+ if (mode & ~(FMODE_READ | FMODE_WRITE))
+ return NULL;
+
+ if (mode & FMODE_WRITE)
+ pnf = &nfl->rw_file;
+ else
+ pnf = &nfl->ro_file;
+
+ new = NULL;
+ rcu_read_lock();
+ nf = rcu_dereference(*pnf);
+ if (!nf) {
+ rcu_read_unlock();
+ new = __nfs_local_open_fh(clp, cred, fh, nfl, mode);
+ if (IS_ERR(new))
+ return NULL;
+ rcu_read_lock();
+ /* try to swap in the pointer */
+ spin_lock(&clp->cl_uuid.lock);
+ nf = rcu_dereference_protected(*pnf, 1);
+ if (!nf) {
+ nf = new;
+ new = NULL;
+ rcu_assign_pointer(*pnf, nf);
+ }
+ spin_unlock(&clp->cl_uuid.lock);
+ }
+ nf = nfs_local_file_get(nf);
+ rcu_read_unlock();
+ if (new)
+ nfs_to_nfsd_file_put_local(new);
+ return nf;
+}
+EXPORT_SYMBOL_GPL(nfs_local_open_fh);
+
+static struct bio_vec *
+nfs_bvec_alloc_and_import_pagevec(struct page **pagevec,
+ unsigned int npages, gfp_t flags)
+{
+ struct bio_vec *bvec, *p;
+
+ bvec = kmalloc_array(npages, sizeof(*bvec), flags);
+ if (bvec != NULL) {
+ for (p = bvec; npages > 0; p++, pagevec++, npages--) {
+ p->bv_page = *pagevec;
+ p->bv_len = PAGE_SIZE;
+ p->bv_offset = 0;
+ }
+ }
+ return bvec;
+}
+
+static void
+nfs_local_iocb_free(struct nfs_local_kiocb *iocb)
+{
+ kfree(iocb->bvec);
+ kfree(iocb);
+}
+
+static struct nfs_local_kiocb *
+nfs_local_iocb_alloc(struct nfs_pgio_header *hdr,
+ struct file *file, gfp_t flags)
+{
+ struct nfs_local_kiocb *iocb;
+
+ iocb = kmalloc(sizeof(*iocb), flags);
+ if (iocb == NULL)
+ return NULL;
+ iocb->bvec = nfs_bvec_alloc_and_import_pagevec(hdr->page_array.pagevec,
+ hdr->page_array.npages, flags);
+ if (iocb->bvec == NULL) {
+ kfree(iocb);
+ return NULL;
+ }
+
+ if (localio_O_DIRECT_semantics &&
+ test_bit(NFS_IOHDR_ODIRECT, &hdr->flags)) {
+ iocb->kiocb.ki_filp = file;
+ iocb->kiocb.ki_flags = IOCB_DIRECT;
+ } else
+ init_sync_kiocb(&iocb->kiocb, file);
+
+ iocb->kiocb.ki_pos = hdr->args.offset;
+ iocb->hdr = hdr;
+ iocb->kiocb.ki_flags &= ~IOCB_APPEND;
+ iocb->aio_complete_work = NULL;
+
+ return iocb;
+}
+
+static void
+nfs_local_iter_init(struct iov_iter *i, struct nfs_local_kiocb *iocb, int dir)
+{
+ struct nfs_pgio_header *hdr = iocb->hdr;
+
+ iov_iter_bvec(i, dir, iocb->bvec, hdr->page_array.npages,
+ hdr->args.count + hdr->args.pgbase);
+ if (hdr->args.pgbase != 0)
+ iov_iter_advance(i, hdr->args.pgbase);
+}
+
+static void
+nfs_local_hdr_release(struct nfs_pgio_header *hdr,
+ const struct rpc_call_ops *call_ops)
+{
+ call_ops->rpc_call_done(&hdr->task, hdr);
+ call_ops->rpc_release(hdr);
+}
+
+static void
+nfs_local_pgio_init(struct nfs_pgio_header *hdr,
+ const struct rpc_call_ops *call_ops)
+{
+ hdr->task.tk_ops = call_ops;
+ if (!hdr->task.tk_start)
+ hdr->task.tk_start = ktime_get();
+}
+
+static void
+nfs_local_pgio_done(struct nfs_pgio_header *hdr, long status)
+{
+ if (status >= 0) {
+ hdr->res.count = status;
+ hdr->res.op_status = NFS4_OK;
+ hdr->task.tk_status = 0;
+ } else {
+ hdr->res.op_status = nfs_localio_errno_to_nfs4_stat(status);
+ hdr->task.tk_status = status;
+ }
+}
+
+static void
+nfs_local_pgio_release(struct nfs_local_kiocb *iocb)
+{
+ struct nfs_pgio_header *hdr = iocb->hdr;
+
+ nfs_local_file_put(iocb->localio);
+ nfs_local_iocb_free(iocb);
+ nfs_local_hdr_release(hdr, hdr->task.tk_ops);
+}
+
+/*
+ * Complete the I/O from iocb->kiocb.ki_complete()
+ *
+ * Note that this function can be called from a bottom half context,
+ * hence we need to queue the rpc_call_done() etc to a workqueue
+ */
+static inline void nfs_local_pgio_aio_complete(struct nfs_local_kiocb *iocb)
+{
+ INIT_WORK(&iocb->work, iocb->aio_complete_work);
+ queue_work(nfsiod_workqueue, &iocb->work);
+}
+
+static void
+nfs_local_read_done(struct nfs_local_kiocb *iocb, long status)
+{
+ struct nfs_pgio_header *hdr = iocb->hdr;
+ struct file *filp = iocb->kiocb.ki_filp;
+
+ nfs_local_pgio_done(hdr, status);
+
+ /*
+ * Must clear replen otherwise NFSv3 data corruption will occur
+ * if/when switching from LOCALIO back to using normal RPC.
+ */
+ hdr->res.replen = 0;
+
+ if (hdr->res.count != hdr->args.count ||
+ hdr->args.offset + hdr->res.count >= i_size_read(file_inode(filp)))
+ hdr->res.eof = true;
+
+ dprintk("%s: read %ld bytes eof %d.\n", __func__,
+ status > 0 ? status : 0, hdr->res.eof);
+}
+
+static void nfs_local_read_aio_complete_work(struct work_struct *work)
+{
+ struct nfs_local_kiocb *iocb =
+ container_of(work, struct nfs_local_kiocb, work);
+
+ nfs_local_pgio_release(iocb);
+}
+
+static void nfs_local_read_aio_complete(struct kiocb *kiocb, long ret)
+{
+ struct nfs_local_kiocb *iocb =
+ container_of(kiocb, struct nfs_local_kiocb, kiocb);
+
+ nfs_local_read_done(iocb, ret);
+ nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_read_aio_complete_work */
+}
+
+static void nfs_local_call_read(struct work_struct *work)
+{
+ struct nfs_local_kiocb *iocb =
+ container_of(work, struct nfs_local_kiocb, work);
+ struct file *filp = iocb->kiocb.ki_filp;
+ const struct cred *save_cred;
+ struct iov_iter iter;
+ ssize_t status;
+
+ save_cred = override_creds(filp->f_cred);
+
+ nfs_local_iter_init(&iter, iocb, READ);
+
+ status = filp->f_op->read_iter(&iocb->kiocb, &iter);
+ if (status != -EIOCBQUEUED) {
+ nfs_local_read_done(iocb, status);
+ nfs_local_pgio_release(iocb);
+ }
+
+ revert_creds(save_cred);
+}
+
+static int
+nfs_do_local_read(struct nfs_pgio_header *hdr,
+ struct nfsd_file *localio,
+ const struct rpc_call_ops *call_ops)
+{
+ struct nfs_local_kiocb *iocb;
+ struct file *file = nfs_to->nfsd_file_file(localio);
+
+ /* Don't support filesystems without read_iter */
+ if (!file->f_op->read_iter)
+ return -EAGAIN;
+
+ dprintk("%s: vfs_read count=%u pos=%llu\n",
+ __func__, hdr->args.count, hdr->args.offset);
+
+ iocb = nfs_local_iocb_alloc(hdr, file, GFP_KERNEL);
+ if (iocb == NULL)
+ return -ENOMEM;
+ iocb->localio = localio;
+
+ nfs_local_pgio_init(hdr, call_ops);
+ hdr->res.eof = false;
+
+ if (iocb->kiocb.ki_flags & IOCB_DIRECT) {
+ iocb->kiocb.ki_complete = nfs_local_read_aio_complete;
+ iocb->aio_complete_work = nfs_local_read_aio_complete_work;
+ }
+
+ INIT_WORK(&iocb->work, nfs_local_call_read);
+ queue_work(nfslocaliod_workqueue, &iocb->work);
+
+ return 0;
+}
+
+static void
+nfs_copy_boot_verifier(struct nfs_write_verifier *verifier, struct inode *inode)
+{
+ struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+ u32 *verf = (u32 *)verifier->data;
+ int seq = 0;
+
+ do {
+ read_seqbegin_or_lock(&clp->cl_boot_lock, &seq);
+ verf[0] = (u32)clp->cl_nfssvc_boot.tv_sec;
+ verf[1] = (u32)clp->cl_nfssvc_boot.tv_nsec;
+ } while (need_seqretry(&clp->cl_boot_lock, seq));
+ done_seqretry(&clp->cl_boot_lock, seq);
+}
+
+static void
+nfs_reset_boot_verifier(struct inode *inode)
+{
+ struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+
+ write_seqlock(&clp->cl_boot_lock);
+ ktime_get_real_ts64(&clp->cl_nfssvc_boot);
+ write_sequnlock(&clp->cl_boot_lock);
+}
+
+static void
+nfs_set_local_verifier(struct inode *inode,
+ struct nfs_writeverf *verf,
+ enum nfs3_stable_how how)
+{
+ nfs_copy_boot_verifier(&verf->verifier, inode);
+ verf->committed = how;
+}
+
+/* Factored out from fs/nfsd/vfs.h:fh_getattr() */
+static int __vfs_getattr(struct path *p, struct kstat *stat, int version)
+{
+ u32 request_mask = STATX_BASIC_STATS;
+
+ if (version == 4)
+ request_mask |= (STATX_BTIME | STATX_CHANGE_COOKIE);
+ return vfs_getattr(p, stat, request_mask, AT_STATX_SYNC_AS_STAT);
+}
+
+/* Copied from fs/nfsd/nfsfh.c:nfsd4_change_attribute() */
+static u64 __nfsd4_change_attribute(const struct kstat *stat,
+ const struct inode *inode)
+{
+ u64 chattr;
+
+ if (stat->result_mask & STATX_CHANGE_COOKIE) {
+ chattr = stat->change_cookie;
+ if (S_ISREG(inode->i_mode) &&
+ !(stat->attributes & STATX_ATTR_CHANGE_MONOTONIC)) {
+ chattr += (u64)stat->ctime.tv_sec << 30;
+ chattr += stat->ctime.tv_nsec;
+ }
+ } else {
+ chattr = time_to_chattr(&stat->ctime);
+ }
+ return chattr;
+}
+
+static void nfs_local_vfs_getattr(struct nfs_local_kiocb *iocb)
+{
+ struct kstat stat;
+ struct file *filp = iocb->kiocb.ki_filp;
+ struct nfs_pgio_header *hdr = iocb->hdr;
+ struct nfs_fattr *fattr = hdr->res.fattr;
+ int version = NFS_PROTO(hdr->inode)->version;
+
+ if (unlikely(!fattr) || __vfs_getattr(&filp->f_path, &stat, version))
+ return;
+
+ fattr->valid = (NFS_ATTR_FATTR_FILEID |
+ NFS_ATTR_FATTR_CHANGE |
+ NFS_ATTR_FATTR_SIZE |
+ NFS_ATTR_FATTR_ATIME |
+ NFS_ATTR_FATTR_MTIME |
+ NFS_ATTR_FATTR_CTIME |
+ NFS_ATTR_FATTR_SPACE_USED);
+
+ fattr->fileid = stat.ino;
+ fattr->size = stat.size;
+ fattr->atime = stat.atime;
+ fattr->mtime = stat.mtime;
+ fattr->ctime = stat.ctime;
+ if (version == 4) {
+ fattr->change_attr =
+ __nfsd4_change_attribute(&stat, file_inode(filp));
+ } else
+ fattr->change_attr = nfs_timespec_to_change_attr(&fattr->ctime);
+ fattr->du.nfs3.used = stat.blocks << 9;
+}
+
+static void
+nfs_local_write_done(struct nfs_local_kiocb *iocb, long status)
+{
+ struct nfs_pgio_header *hdr = iocb->hdr;
+ struct inode *inode = hdr->inode;
+
+ dprintk("%s: wrote %ld bytes.\n", __func__, status > 0 ? status : 0);
+
+ /* Handle short writes as if they are ENOSPC */
+ if (status > 0 && status < hdr->args.count) {
+ hdr->mds_offset += status;
+ hdr->args.offset += status;
+ hdr->args.pgbase += status;
+ hdr->args.count -= status;
+ nfs_set_pgio_error(hdr, -ENOSPC, hdr->args.offset);
+ status = -ENOSPC;
+ }
+ if (status < 0)
+ nfs_reset_boot_verifier(inode);
+
+ nfs_local_pgio_done(hdr, status);
+}
+
+static void nfs_local_write_aio_complete_work(struct work_struct *work)
+{
+ struct nfs_local_kiocb *iocb =
+ container_of(work, struct nfs_local_kiocb, work);
+
+ nfs_local_vfs_getattr(iocb);
+ nfs_local_pgio_release(iocb);
+}
+
+static void nfs_local_write_aio_complete(struct kiocb *kiocb, long ret)
+{
+ struct nfs_local_kiocb *iocb =
+ container_of(kiocb, struct nfs_local_kiocb, kiocb);
+
+ nfs_local_write_done(iocb, ret);
+ nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_write_aio_complete_work */
+}
+
+static void nfs_local_call_write(struct work_struct *work)
+{
+ struct nfs_local_kiocb *iocb =
+ container_of(work, struct nfs_local_kiocb, work);
+ struct file *filp = iocb->kiocb.ki_filp;
+ unsigned long old_flags = current->flags;
+ const struct cred *save_cred;
+ struct iov_iter iter;
+ ssize_t status;
+
+ current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO;
+ save_cred = override_creds(filp->f_cred);
+
+ nfs_local_iter_init(&iter, iocb, WRITE);
+
+ file_start_write(filp);
+ status = filp->f_op->write_iter(&iocb->kiocb, &iter);
+ file_end_write(filp);
+ if (status != -EIOCBQUEUED) {
+ nfs_local_write_done(iocb, status);
+ nfs_local_vfs_getattr(iocb);
+ nfs_local_pgio_release(iocb);
+ }
+
+ revert_creds(save_cred);
+ current->flags = old_flags;
+}
+
+static int
+nfs_do_local_write(struct nfs_pgio_header *hdr,
+ struct nfsd_file *localio,
+ const struct rpc_call_ops *call_ops)
+{
+ struct nfs_local_kiocb *iocb;
+ struct file *file = nfs_to->nfsd_file_file(localio);
+
+ /* Don't support filesystems without write_iter */
+ if (!file->f_op->write_iter)
+ return -EAGAIN;
+
+ dprintk("%s: vfs_write count=%u pos=%llu %s\n",
+ __func__, hdr->args.count, hdr->args.offset,
+ (hdr->args.stable == NFS_UNSTABLE) ? "unstable" : "stable");
+
+ iocb = nfs_local_iocb_alloc(hdr, file, GFP_NOIO);
+ if (iocb == NULL)
+ return -ENOMEM;
+ iocb->localio = localio;
+
+ switch (hdr->args.stable) {
+ default:
+ break;
+ case NFS_DATA_SYNC:
+ iocb->kiocb.ki_flags |= IOCB_DSYNC;
+ break;
+ case NFS_FILE_SYNC:
+ iocb->kiocb.ki_flags |= IOCB_DSYNC|IOCB_SYNC;
+ }
+
+ nfs_local_pgio_init(hdr, call_ops);
+
+ nfs_set_local_verifier(hdr->inode, hdr->res.verf, hdr->args.stable);
+
+ if (iocb->kiocb.ki_flags & IOCB_DIRECT) {
+ iocb->kiocb.ki_complete = nfs_local_write_aio_complete;
+ iocb->aio_complete_work = nfs_local_write_aio_complete_work;
+ }
+
+ INIT_WORK(&iocb->work, nfs_local_call_write);
+ queue_work(nfslocaliod_workqueue, &iocb->work);
+
+ return 0;
+}
+
+int nfs_local_doio(struct nfs_client *clp, struct nfsd_file *localio,
+ struct nfs_pgio_header *hdr,
+ const struct rpc_call_ops *call_ops)
+{
+ int status = 0;
+
+ if (!hdr->args.count)
+ return 0;
+
+ switch (hdr->rw_mode) {
+ case FMODE_READ:
+ status = nfs_do_local_read(hdr, localio, call_ops);
+ break;
+ case FMODE_WRITE:
+ status = nfs_do_local_write(hdr, localio, call_ops);
+ break;
+ default:
+ dprintk("%s: invalid mode: %d\n", __func__,
+ hdr->rw_mode);
+ status = -EINVAL;
+ }
+
+ if (status != 0) {
+ if (status == -EAGAIN)
+ nfs_localio_disable_client(clp);
+ nfs_local_file_put(localio);
+ hdr->task.tk_status = status;
+ nfs_local_hdr_release(hdr, call_ops);
+ }
+ return status;
+}
+
+static void
+nfs_local_init_commit(struct nfs_commit_data *data,
+ const struct rpc_call_ops *call_ops)
+{
+ data->task.tk_ops = call_ops;
+}
+
+static int
+nfs_local_run_commit(struct file *filp, struct nfs_commit_data *data)
+{
+ loff_t start = data->args.offset;
+ loff_t end = LLONG_MAX;
+
+ if (data->args.count > 0) {
+ end = start + data->args.count - 1;
+ if (end < start)
+ end = LLONG_MAX;
+ }
+
+ dprintk("%s: commit %llu - %llu\n", __func__, start, end);
+ return vfs_fsync_range(filp, start, end, 0);
+}
+
+static void
+nfs_local_commit_done(struct nfs_commit_data *data, int status)
+{
+ if (status >= 0) {
+ nfs_set_local_verifier(data->inode,
+ data->res.verf,
+ NFS_FILE_SYNC);
+ data->res.op_status = NFS4_OK;
+ data->task.tk_status = 0;
+ } else {
+ nfs_reset_boot_verifier(data->inode);
+ data->res.op_status = nfs_localio_errno_to_nfs4_stat(status);
+ data->task.tk_status = status;
+ }
+}
+
+static void
+nfs_local_release_commit_data(struct nfsd_file *localio,
+ struct nfs_commit_data *data,
+ const struct rpc_call_ops *call_ops)
+{
+ nfs_local_file_put(localio);
+ call_ops->rpc_call_done(&data->task, data);
+ call_ops->rpc_release(data);
+}
+
+static void
+nfs_local_fsync_ctx_free(struct nfs_local_fsync_ctx *ctx)
+{
+ nfs_local_release_commit_data(ctx->localio, ctx->data,
+ ctx->data->task.tk_ops);
+ kfree(ctx);
+}
+
+static void
+nfs_local_fsync_work(struct work_struct *work)
+{
+ struct nfs_local_fsync_ctx *ctx;
+ int status;
+
+ ctx = container_of(work, struct nfs_local_fsync_ctx, work);
+
+ status = nfs_local_run_commit(nfs_to->nfsd_file_file(ctx->localio),
+ ctx->data);
+ nfs_local_commit_done(ctx->data, status);
+ if (ctx->done != NULL)
+ complete(ctx->done);
+ nfs_local_fsync_ctx_free(ctx);
+}
+
+static struct nfs_local_fsync_ctx *
+nfs_local_fsync_ctx_alloc(struct nfs_commit_data *data,
+ struct nfsd_file *localio, gfp_t flags)
+{
+ struct nfs_local_fsync_ctx *ctx = kmalloc(sizeof(*ctx), flags);
+
+ if (ctx != NULL) {
+ ctx->localio = localio;
+ ctx->data = data;
+ INIT_WORK(&ctx->work, nfs_local_fsync_work);
+ ctx->done = NULL;
+ }
+ return ctx;
+}
+
+int nfs_local_commit(struct nfsd_file *localio,
+ struct nfs_commit_data *data,
+ const struct rpc_call_ops *call_ops, int how)
+{
+ struct nfs_local_fsync_ctx *ctx;
+
+ ctx = nfs_local_fsync_ctx_alloc(data, localio, GFP_KERNEL);
+ if (!ctx) {
+ nfs_local_commit_done(data, -ENOMEM);
+ nfs_local_release_commit_data(localio, data, call_ops);
+ return -ENOMEM;
+ }
+
+ nfs_local_init_commit(data, call_ops);
+
+ if (how & FLUSH_SYNC) {
+ DECLARE_COMPLETION_ONSTACK(done);
+ ctx->done = &done;
+ queue_work(nfsiod_workqueue, &ctx->work);
+ wait_for_completion(&done);
+ } else
+ queue_work(nfsiod_workqueue, &ctx->work);
+
+ return 0;
+}
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 68e76b626371..57c9dd700b58 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -128,11 +128,6 @@ struct mountres {
rpc_authflavor_t *auth_flavors;
};
-struct mnt_fhstatus {
- u32 status;
- struct nfs_fh *fh;
-};
-
/**
* nfs_mount - Obtain an NFS file handle for the given host and path
* @info: pointer to mount request arguments
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index e7494cdd957e..7f1ec9c67ff2 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -182,7 +182,7 @@ struct vfsmount *nfs_d_automount(struct path *path)
ctx->version = client->rpc_ops->version;
ctx->minorversion = client->cl_minorversion;
ctx->nfs_mod = client->cl_nfs_mod;
- __module_get(ctx->nfs_mod->owner);
+ get_nfs_version(ctx->nfs_mod);
ret = client->rpc_ops->submount(fc, server);
if (ret < 0) {
@@ -195,7 +195,6 @@ struct vfsmount *nfs_d_automount(struct path *path)
if (IS_ERR(mnt))
goto out_fc;
- mntget(mnt); /* prevent immediate expiration */
if (timeout <= 0)
goto out_fc;
@@ -308,7 +307,7 @@ int nfs_submount(struct fs_context *fc, struct nfs_server *server)
int err;
/* Look it up again to get its attributes */
- err = server->nfs_client->rpc_ops->lookup(d_inode(parent), dentry,
+ err = server->nfs_client->rpc_ops->lookup(d_inode(parent), dentry, &dentry->d_name,
ctx->mntfh, ctx->clone_data.fattr);
dput(parent);
if (err != 0)
diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h
index a68b21603ea9..6ba3ea39e928 100644
--- a/fs/nfs/netns.h
+++ b/fs/nfs/netns.h
@@ -31,7 +31,11 @@ struct nfs_net {
unsigned short nfs_callback_tcpport;
unsigned short nfs_callback_tcpport6;
int cb_users[NFS4_MAX_MINOR_VERSION + 1];
-#endif
+#endif /* CONFIG_NFS_V4 */
+#if IS_ENABLED(CONFIG_NFS_V4_1)
+ struct list_head nfs4_data_server_cache;
+ spinlock_t nfs4_data_server_lock;
+#endif /* CONFIG_NFS_V4_1 */
struct nfs_netns_client *nfs_client;
spinlock_t nfs_client_lock;
ktime_t boot_time;
diff --git a/fs/nfs/nfs.h b/fs/nfs/nfs.h
index 0d3ce0460e35..8a5f51be013a 100644
--- a/fs/nfs/nfs.h
+++ b/fs/nfs/nfs.h
@@ -19,10 +19,10 @@ struct nfs_subversion {
const struct nfs_rpc_ops *rpc_ops; /* NFS operations */
const struct super_operations *sops; /* NFS Super operations */
const struct xattr_handler * const *xattr; /* NFS xattr handlers */
- struct list_head list; /* List of NFS versions */
};
-struct nfs_subversion *get_nfs_version(unsigned int);
+struct nfs_subversion *find_nfs_version(unsigned int);
+int get_nfs_version(struct nfs_subversion *);
void put_nfs_version(struct nfs_subversion *);
void register_nfs_version(struct nfs_subversion *);
void unregister_nfs_version(struct nfs_subversion *);
diff --git a/fs/nfs/nfs2super.c b/fs/nfs/nfs2super.c
index 467f21ee6a35..b1badc70bd71 100644
--- a/fs/nfs/nfs2super.c
+++ b/fs/nfs/nfs2super.c
@@ -26,6 +26,7 @@ static void __exit exit_nfs_v2(void)
unregister_nfs_version(&nfs_v2);
}
+MODULE_DESCRIPTION("NFSv2 client support");
MODULE_LICENSE("GPL");
module_init(init_nfs_v2);
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index c19093814296..6e75c6c2d234 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -22,14 +22,12 @@
#include <linux/nfs.h>
#include <linux/nfs2.h>
#include <linux/nfs_fs.h>
+#include <linux/nfs_common.h>
#include "nfstrace.h"
#include "internal.h"
#define NFSDBG_FACILITY NFSDBG_XDR
-/* Mapping from NFS error code to "errno" error code. */
-#define errno_NFSERR_IO EIO
-
/*
* Declare the space requirements for NFS arguments and replies as
* number of 32bit-words
@@ -64,8 +62,6 @@
#define NFS_readdirres_sz (1+NFS_pagepad_sz)
#define NFS_statfsres_sz (1+NFS_info_sz)
-static int nfs_stat_to_errno(enum nfs_stat);
-
/*
* Encode/decode NFSv2 basic data types
*
@@ -1054,70 +1050,6 @@ out_default:
return nfs_stat_to_errno(status);
}
-
-/*
- * We need to translate between nfs status return values and
- * the local errno values which may not be the same.
- */
-static const struct {
- int stat;
- int errno;
-} nfs_errtbl[] = {
- { NFS_OK, 0 },
- { NFSERR_PERM, -EPERM },
- { NFSERR_NOENT, -ENOENT },
- { NFSERR_IO, -errno_NFSERR_IO},
- { NFSERR_NXIO, -ENXIO },
-/* { NFSERR_EAGAIN, -EAGAIN }, */
- { NFSERR_ACCES, -EACCES },
- { NFSERR_EXIST, -EEXIST },
- { NFSERR_XDEV, -EXDEV },
- { NFSERR_NODEV, -ENODEV },
- { NFSERR_NOTDIR, -ENOTDIR },
- { NFSERR_ISDIR, -EISDIR },
- { NFSERR_INVAL, -EINVAL },
- { NFSERR_FBIG, -EFBIG },
- { NFSERR_NOSPC, -ENOSPC },
- { NFSERR_ROFS, -EROFS },
- { NFSERR_MLINK, -EMLINK },
- { NFSERR_NAMETOOLONG, -ENAMETOOLONG },
- { NFSERR_NOTEMPTY, -ENOTEMPTY },
- { NFSERR_DQUOT, -EDQUOT },
- { NFSERR_STALE, -ESTALE },
- { NFSERR_REMOTE, -EREMOTE },
-#ifdef EWFLUSH
- { NFSERR_WFLUSH, -EWFLUSH },
-#endif
- { NFSERR_BADHANDLE, -EBADHANDLE },
- { NFSERR_NOT_SYNC, -ENOTSYNC },
- { NFSERR_BAD_COOKIE, -EBADCOOKIE },
- { NFSERR_NOTSUPP, -ENOTSUPP },
- { NFSERR_TOOSMALL, -ETOOSMALL },
- { NFSERR_SERVERFAULT, -EREMOTEIO },
- { NFSERR_BADTYPE, -EBADTYPE },
- { NFSERR_JUKEBOX, -EJUKEBOX },
- { -1, -EIO }
-};
-
-/**
- * nfs_stat_to_errno - convert an NFS status code to a local errno
- * @status: NFS status code to convert
- *
- * Returns a local errno value, or -EIO if the NFS status code is
- * not recognized. This function is used jointly by NFSv2 and NFSv3.
- */
-static int nfs_stat_to_errno(enum nfs_stat status)
-{
- int i;
-
- for (i = 0; nfs_errtbl[i].stat != -1; i++) {
- if (nfs_errtbl[i].stat == (int)status)
- return nfs_errtbl[i].errno;
- }
- dprintk("NFS: Unrecognized nfs status value: %u\n", status);
- return nfs_errtbl[i].errno;
-}
-
#define PROC(proc, argtype, restype, timer) \
[NFSPROC_##proc] = { \
.p_proc = NFSPROC_##proc, \
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 18d8f6529f61..a126eb31f62f 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -104,7 +104,7 @@ struct posix_acl *nfs3_get_acl(struct inode *inode, int type, bool rcu)
switch (status) {
case 0:
- status = nfs_refresh_inode(inode, res.fattr);
+ nfs_refresh_inode(inode, res.fattr);
break;
case -EPFNOSUPPORT:
case -EPROTONOSUPPORT:
diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c
index b0c8a39c2bbd..0d7310c1ee0c 100644
--- a/fs/nfs/nfs3client.c
+++ b/fs/nfs/nfs3client.c
@@ -120,6 +120,8 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv,
if (mds_srv->flags & NFS_MOUNT_NORESVPORT)
__set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
+ if (test_bit(NFS_CS_NETUNREACH_FATAL, &mds_clp->cl_flags))
+ __set_bit(NFS_CS_NETUNREACH_FATAL, &cl_init.init_flags);
__set_bit(NFS_CS_DS, &cl_init.init_flags);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index cbbe3f0193b8..a4cb67573aa7 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -39,7 +39,7 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
__set_current_state(TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
schedule_timeout(NFS_JUKEBOX_RETRY_TIME);
res = -ERESTARTSYS;
- } while (!fatal_signal_pending(current));
+ } while (!fatal_signal_pending(current) && !nfs_current_task_exiting());
return res;
}
@@ -192,7 +192,7 @@ __nfs3_proc_lookup(struct inode *dir, const char *name, size_t len,
}
static int
-nfs3_proc_lookup(struct inode *dir, struct dentry *dentry,
+nfs3_proc_lookup(struct inode *dir, struct dentry *dentry, const struct qstr *name,
struct nfs_fh *fhandle, struct nfs_fattr *fattr)
{
unsigned short task_flags = 0;
@@ -202,8 +202,7 @@ nfs3_proc_lookup(struct inode *dir, struct dentry *dentry,
task_flags |= RPC_TASK_TIMEOUT;
dprintk("NFS call lookup %pd2\n", dentry);
- return __nfs3_proc_lookup(dir, dentry->d_name.name,
- dentry->d_name.len, fhandle, fattr,
+ return __nfs3_proc_lookup(dir, name->name, name->len, fhandle, fattr,
task_flags);
}
@@ -579,13 +578,13 @@ out:
return status;
}
-static int
+static struct dentry *
nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
{
struct posix_acl *default_acl, *acl;
struct nfs3_createdata *data;
- struct dentry *d_alias;
- int status = -ENOMEM;
+ struct dentry *ret = ERR_PTR(-ENOMEM);
+ int status;
dprintk("NFS call mkdir %pd\n", dentry);
@@ -593,8 +592,9 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
if (data == NULL)
goto out;
- status = posix_acl_create(dir, &sattr->ia_mode, &default_acl, &acl);
- if (status)
+ ret = ERR_PTR(posix_acl_create(dir, &sattr->ia_mode,
+ &default_acl, &acl));
+ if (IS_ERR(ret))
goto out;
data->msg.rpc_proc = &nfs3_procedures[NFS3PROC_MKDIR];
@@ -603,25 +603,27 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
data->arg.mkdir.len = dentry->d_name.len;
data->arg.mkdir.sattr = sattr;
- d_alias = nfs3_do_create(dir, dentry, data);
- status = PTR_ERR_OR_ZERO(d_alias);
+ ret = nfs3_do_create(dir, dentry, data);
- if (status != 0)
+ if (IS_ERR(ret))
goto out_release_acls;
- if (d_alias)
- dentry = d_alias;
+ if (ret)
+ dentry = ret;
status = nfs3_proc_setacls(d_inode(dentry), acl, default_acl);
+ if (status) {
+ dput(ret);
+ ret = ERR_PTR(status);
+ }
- dput(d_alias);
out_release_acls:
posix_acl_release(acl);
posix_acl_release(default_acl);
out:
nfs3_free_createdata(data);
- dprintk("NFS reply mkdir: %d\n", status);
- return status;
+ dprintk("NFS reply mkdir: %d\n", PTR_ERR_OR_ZERO(ret));
+ return ret;
}
static int
@@ -844,6 +846,41 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
return status;
}
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+
+static unsigned nfs3_localio_probe_throttle __read_mostly = 0;
+module_param(nfs3_localio_probe_throttle, uint, 0644);
+MODULE_PARM_DESC(nfs3_localio_probe_throttle,
+ "Probe for NFSv3 LOCALIO every N IO requests. Must be power-of-2, defaults to 0 (probing disabled).");
+
+static void nfs3_localio_probe(struct nfs_server *server)
+{
+ struct nfs_client *clp = server->nfs_client;
+
+ /* Throttled to reduce nfs_local_probe_async() frequency */
+ if (!nfs3_localio_probe_throttle || nfs_server_is_local(clp))
+ return;
+
+ /*
+ * Try (re)enabling LOCALIO if isn't enabled -- admin deems
+ * it worthwhile to periodically check if LOCALIO possible by
+ * setting the 'nfs3_localio_probe_throttle' module parameter.
+ *
+ * This is useful if LOCALIO was previously enabled, but was
+ * disabled due to server restart, and IO has successfully
+ * completed in terms of normal RPC.
+ */
+ if ((clp->cl_uuid.nfs3_localio_probe_count++ &
+ (nfs3_localio_probe_throttle - 1)) == 0) {
+ if (!nfs_server_is_local(clp))
+ nfs_local_probe_async(clp);
+ }
+}
+
+#else
+static void nfs3_localio_probe(struct nfs_server *server) {}
+#endif
+
static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
{
struct inode *inode = hdr->inode;
@@ -855,8 +892,11 @@ static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
if (nfs3_async_handle_jukebox(task, inode))
return -EAGAIN;
- if (task->tk_status >= 0 && !server->read_hdrsize)
- cmpxchg(&server->read_hdrsize, 0, hdr->res.replen);
+ if (task->tk_status >= 0) {
+ if (!server->read_hdrsize)
+ cmpxchg(&server->read_hdrsize, 0, hdr->res.replen);
+ nfs3_localio_probe(server);
+ }
nfs_invalidate_atime(inode);
nfs_refresh_inode(inode, &hdr->fattr);
@@ -886,8 +926,10 @@ static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
if (nfs3_async_handle_jukebox(task, inode))
return -EAGAIN;
- if (task->tk_status >= 0)
+ if (task->tk_status >= 0) {
nfs_writeback_update_inode(hdr);
+ nfs3_localio_probe(NFS_SERVER(inode));
+ }
return 0;
}
@@ -979,13 +1021,21 @@ nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
return status;
}
-static int nfs3_have_delegation(struct inode *inode, fmode_t flags)
+static int nfs3_have_delegation(struct inode *inode, fmode_t type, int flags)
+{
+ return 0;
+}
+
+static int nfs3_return_delegation(struct inode *inode)
{
+ if (S_ISREG(inode->i_mode))
+ nfs_wb_all(inode);
return 0;
}
static const struct inode_operations nfs3_dir_inode_operations = {
.create = nfs_create,
+ .atomic_open = nfs_atomic_open_v23,
.lookup = nfs_lookup,
.link = nfs_link,
.unlink = nfs_unlink,
@@ -1061,6 +1111,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
.clear_acl_cache = forget_all_cached_acls,
.close_context = nfs_close_context,
.have_delegation = nfs3_have_delegation,
+ .return_delegation = nfs3_return_delegation,
.alloc_client = nfs_alloc_client,
.init_client = nfs_init_client,
.free_client = nfs_free_client,
diff --git a/fs/nfs/nfs3super.c b/fs/nfs/nfs3super.c
index 8a9be9e47f76..20a80478449e 100644
--- a/fs/nfs/nfs3super.c
+++ b/fs/nfs/nfs3super.c
@@ -27,6 +27,7 @@ static void __exit exit_nfs_v3(void)
unregister_nfs_version(&nfs_v3);
}
+MODULE_DESCRIPTION("NFSv3 client support");
MODULE_LICENSE("GPL");
module_init(init_nfs_v3);
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 60f032be805a..4ae01c10b7e2 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -21,14 +21,13 @@
#include <linux/nfs3.h>
#include <linux/nfs_fs.h>
#include <linux/nfsacl.h>
+#include <linux/nfs_common.h>
+
#include "nfstrace.h"
#include "internal.h"
#define NFSDBG_FACILITY NFSDBG_XDR
-/* Mapping from NFS error code to "errno" error code. */
-#define errno_NFSERR_IO EIO
-
/*
* Declare the space requirements for NFS arguments and replies as
* number of 32bit-words
@@ -91,8 +90,6 @@
NFS3_pagepad_sz)
#define ACL3_setaclres_sz (1+NFS3_post_op_attr_sz)
-static int nfs3_stat_to_errno(enum nfs_stat);
-
/*
* Map file type to S_IFMT bits
*/
@@ -1406,7 +1403,7 @@ static int nfs3_xdr_dec_getattr3res(struct rpc_rqst *req,
out:
return error;
out_default:
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
/*
@@ -1445,7 +1442,7 @@ static int nfs3_xdr_dec_setattr3res(struct rpc_rqst *req,
out:
return error;
out_status:
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
/*
@@ -1495,7 +1492,7 @@ out_default:
error = decode_post_op_attr(xdr, result->dir_attr, userns);
if (unlikely(error))
goto out;
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
/*
@@ -1537,7 +1534,7 @@ static int nfs3_xdr_dec_access3res(struct rpc_rqst *req,
out:
return error;
out_default:
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
/*
@@ -1578,7 +1575,7 @@ static int nfs3_xdr_dec_readlink3res(struct rpc_rqst *req,
out:
return error;
out_default:
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
/*
@@ -1658,7 +1655,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
out:
return error;
out_status:
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
/*
@@ -1728,7 +1725,7 @@ static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr,
out:
return error;
out_status:
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
/*
@@ -1795,7 +1792,7 @@ out_default:
error = decode_wcc_data(xdr, result->dir_attr, userns);
if (unlikely(error))
goto out;
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
/*
@@ -1835,7 +1832,7 @@ static int nfs3_xdr_dec_remove3res(struct rpc_rqst *req,
out:
return error;
out_status:
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
/*
@@ -1881,7 +1878,7 @@ static int nfs3_xdr_dec_rename3res(struct rpc_rqst *req,
out:
return error;
out_status:
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
/*
@@ -1926,7 +1923,7 @@ static int nfs3_xdr_dec_link3res(struct rpc_rqst *req, struct xdr_stream *xdr,
out:
return error;
out_status:
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
/**
@@ -2101,7 +2098,7 @@ out_default:
error = decode_post_op_attr(xdr, result->dir_attr, rpc_rqst_userns(req));
if (unlikely(error))
goto out;
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
/*
@@ -2167,7 +2164,7 @@ static int nfs3_xdr_dec_fsstat3res(struct rpc_rqst *req,
out:
return error;
out_status:
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
/*
@@ -2243,7 +2240,7 @@ static int nfs3_xdr_dec_fsinfo3res(struct rpc_rqst *req,
out:
return error;
out_status:
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
/*
@@ -2304,7 +2301,7 @@ static int nfs3_xdr_dec_pathconf3res(struct rpc_rqst *req,
out:
return error;
out_status:
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
/*
@@ -2350,7 +2347,7 @@ static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req,
out:
return error;
out_status:
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
#ifdef CONFIG_NFS_V3_ACL
@@ -2416,7 +2413,7 @@ static int nfs3_xdr_dec_getacl3res(struct rpc_rqst *req,
out:
return error;
out_default:
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req,
@@ -2435,76 +2432,11 @@ static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req,
out:
return error;
out_default:
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
#endif /* CONFIG_NFS_V3_ACL */
-
-/*
- * We need to translate between nfs status return values and
- * the local errno values which may not be the same.
- */
-static const struct {
- int stat;
- int errno;
-} nfs_errtbl[] = {
- { NFS_OK, 0 },
- { NFSERR_PERM, -EPERM },
- { NFSERR_NOENT, -ENOENT },
- { NFSERR_IO, -errno_NFSERR_IO},
- { NFSERR_NXIO, -ENXIO },
-/* { NFSERR_EAGAIN, -EAGAIN }, */
- { NFSERR_ACCES, -EACCES },
- { NFSERR_EXIST, -EEXIST },
- { NFSERR_XDEV, -EXDEV },
- { NFSERR_NODEV, -ENODEV },
- { NFSERR_NOTDIR, -ENOTDIR },
- { NFSERR_ISDIR, -EISDIR },
- { NFSERR_INVAL, -EINVAL },
- { NFSERR_FBIG, -EFBIG },
- { NFSERR_NOSPC, -ENOSPC },
- { NFSERR_ROFS, -EROFS },
- { NFSERR_MLINK, -EMLINK },
- { NFSERR_NAMETOOLONG, -ENAMETOOLONG },
- { NFSERR_NOTEMPTY, -ENOTEMPTY },
- { NFSERR_DQUOT, -EDQUOT },
- { NFSERR_STALE, -ESTALE },
- { NFSERR_REMOTE, -EREMOTE },
-#ifdef EWFLUSH
- { NFSERR_WFLUSH, -EWFLUSH },
-#endif
- { NFSERR_BADHANDLE, -EBADHANDLE },
- { NFSERR_NOT_SYNC, -ENOTSYNC },
- { NFSERR_BAD_COOKIE, -EBADCOOKIE },
- { NFSERR_NOTSUPP, -ENOTSUPP },
- { NFSERR_TOOSMALL, -ETOOSMALL },
- { NFSERR_SERVERFAULT, -EREMOTEIO },
- { NFSERR_BADTYPE, -EBADTYPE },
- { NFSERR_JUKEBOX, -EJUKEBOX },
- { -1, -EIO }
-};
-
-/**
- * nfs3_stat_to_errno - convert an NFS status code to a local errno
- * @status: NFS status code to convert
- *
- * Returns a local errno value, or -EIO if the NFS status code is
- * not recognized. This function is used jointly by NFSv2 and NFSv3.
- */
-static int nfs3_stat_to_errno(enum nfs_stat status)
-{
- int i;
-
- for (i = 0; nfs_errtbl[i].stat != -1; i++) {
- if (nfs_errtbl[i].stat == (int)status)
- return nfs_errtbl[i].errno;
- }
- dprintk("NFS: Unrecognized nfs status value: %u\n", status);
- return nfs_errtbl[i].errno;
-}
-
-
#define PROC(proc, argtype, restype, timer) \
[NFS3PROC_##proc] = { \
.p_proc = NFS3PROC_##proc, \
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 28704f924612..5cf52ece96ac 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -21,6 +21,8 @@
#define NFSDBG_FACILITY NFSDBG_PROC
static int nfs42_do_offload_cancel_async(struct file *dst, nfs4_stateid *std);
+static int nfs42_proc_offload_status(struct file *file, nfs4_stateid *stateid,
+ u64 *copied);
static void nfs42_set_netaddr(struct file *filep, struct nfs42_netaddr *naddr)
{
@@ -173,6 +175,20 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len)
return err;
}
+static void nfs4_copy_dequeue_callback(struct nfs_server *dst_server,
+ struct nfs_server *src_server,
+ struct nfs4_copy_state *copy)
+{
+ spin_lock(&dst_server->nfs_client->cl_lock);
+ list_del_init(&copy->copies);
+ spin_unlock(&dst_server->nfs_client->cl_lock);
+ if (dst_server != src_server) {
+ spin_lock(&src_server->nfs_client->cl_lock);
+ list_del_init(&copy->src_copies);
+ spin_unlock(&src_server->nfs_client->cl_lock);
+ }
+}
+
static int handle_async_copy(struct nfs42_copy_res *res,
struct nfs_server *dst_server,
struct nfs_server *src_server,
@@ -182,9 +198,12 @@ static int handle_async_copy(struct nfs42_copy_res *res,
bool *restart)
{
struct nfs4_copy_state *copy, *tmp_copy = NULL, *iter;
- int status = NFS4_OK;
struct nfs_open_context *dst_ctx = nfs_file_open_context(dst);
struct nfs_open_context *src_ctx = nfs_file_open_context(src);
+ struct nfs_client *clp = dst_server->nfs_client;
+ unsigned long timeout = 3 * HZ;
+ int status = NFS4_OK;
+ u64 copied;
copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_KERNEL);
if (!copy)
@@ -218,19 +237,16 @@ static int handle_async_copy(struct nfs42_copy_res *res,
if (dst_server != src_server) {
spin_lock(&src_server->nfs_client->cl_lock);
- list_add_tail(&copy->src_copies, &src_server->ss_copies);
+ list_add_tail(&copy->src_copies, &src_server->ss_src_copies);
spin_unlock(&src_server->nfs_client->cl_lock);
}
- status = wait_for_completion_interruptible(&copy->completion);
- spin_lock(&dst_server->nfs_client->cl_lock);
- list_del_init(&copy->copies);
- spin_unlock(&dst_server->nfs_client->cl_lock);
- if (dst_server != src_server) {
- spin_lock(&src_server->nfs_client->cl_lock);
- list_del_init(&copy->src_copies);
- spin_unlock(&src_server->nfs_client->cl_lock);
- }
+wait:
+ status = wait_for_completion_interruptible_timeout(&copy->completion,
+ timeout);
+ if (!status)
+ goto timeout;
+ nfs4_copy_dequeue_callback(dst_server, src_server, copy);
if (status == -ERESTARTSYS) {
goto out_cancel;
} else if (copy->flags || copy->error == NFS4ERR_PARTNER_NO_AUTH) {
@@ -240,6 +256,7 @@ static int handle_async_copy(struct nfs42_copy_res *res,
}
out:
res->write_res.count = copy->count;
+ /* Copy out the updated write verifier provided by CB_OFFLOAD. */
memcpy(&res->write_res.verifier, &copy->verf, sizeof(copy->verf));
status = -copy->error;
@@ -251,6 +268,39 @@ out_cancel:
if (!nfs42_files_from_same_server(src, dst))
nfs42_do_offload_cancel_async(src, src_stateid);
goto out_free;
+timeout:
+ timeout <<= 1;
+ if (timeout > (clp->cl_lease_time >> 1))
+ timeout = clp->cl_lease_time >> 1;
+ status = nfs42_proc_offload_status(dst, &copy->stateid, &copied);
+ if (status == -EINPROGRESS)
+ goto wait;
+ nfs4_copy_dequeue_callback(dst_server, src_server, copy);
+ switch (status) {
+ case 0:
+ /* The server recognized the copy stateid, so it hasn't
+ * rebooted. Don't overwrite the verifier returned in the
+ * COPY result. */
+ res->write_res.count = copied;
+ goto out_free;
+ case -EREMOTEIO:
+ /* COPY operation failed on the server. */
+ status = -EOPNOTSUPP;
+ res->write_res.count = copied;
+ goto out_free;
+ case -EBADF:
+ /* Server did not recognize the copy stateid. It has
+ * probably restarted and lost the plot. */
+ res->write_res.count = 0;
+ status = -EOPNOTSUPP;
+ break;
+ case -EOPNOTSUPP:
+ /* RFC 7862 REQUIREs server to support OFFLOAD_STATUS when
+ * it has signed up for an async COPY, so server is not
+ * spec-compliant. */
+ res->write_res.count = 0;
+ }
+ goto out_free;
}
static int process_copy_commit(struct file *dst, loff_t pos_dst,
@@ -498,15 +548,15 @@ out_put_src_lock:
return err;
}
-struct nfs42_offloadcancel_data {
+struct nfs42_offload_data {
struct nfs_server *seq_server;
struct nfs42_offload_status_args args;
struct nfs42_offload_status_res res;
};
-static void nfs42_offload_cancel_prepare(struct rpc_task *task, void *calldata)
+static void nfs42_offload_prepare(struct rpc_task *task, void *calldata)
{
- struct nfs42_offloadcancel_data *data = calldata;
+ struct nfs42_offload_data *data = calldata;
nfs4_setup_sequence(data->seq_server->nfs_client,
&data->args.osa_seq_args,
@@ -515,7 +565,7 @@ static void nfs42_offload_cancel_prepare(struct rpc_task *task, void *calldata)
static void nfs42_offload_cancel_done(struct rpc_task *task, void *calldata)
{
- struct nfs42_offloadcancel_data *data = calldata;
+ struct nfs42_offload_data *data = calldata;
trace_nfs4_offload_cancel(&data->args, task->tk_status);
nfs41_sequence_done(task, &data->res.osr_seq_res);
@@ -525,22 +575,22 @@ static void nfs42_offload_cancel_done(struct rpc_task *task, void *calldata)
rpc_restart_call_prepare(task);
}
-static void nfs42_free_offloadcancel_data(void *data)
+static void nfs42_offload_release(void *data)
{
kfree(data);
}
static const struct rpc_call_ops nfs42_offload_cancel_ops = {
- .rpc_call_prepare = nfs42_offload_cancel_prepare,
+ .rpc_call_prepare = nfs42_offload_prepare,
.rpc_call_done = nfs42_offload_cancel_done,
- .rpc_release = nfs42_free_offloadcancel_data,
+ .rpc_release = nfs42_offload_release,
};
static int nfs42_do_offload_cancel_async(struct file *dst,
nfs4_stateid *stateid)
{
struct nfs_server *dst_server = NFS_SERVER(file_inode(dst));
- struct nfs42_offloadcancel_data *data = NULL;
+ struct nfs42_offload_data *data = NULL;
struct nfs_open_context *ctx = nfs_file_open_context(dst);
struct rpc_task *task;
struct rpc_message msg = {
@@ -552,14 +602,14 @@ static int nfs42_do_offload_cancel_async(struct file *dst,
.rpc_message = &msg,
.callback_ops = &nfs42_offload_cancel_ops,
.workqueue = nfsiod_workqueue,
- .flags = RPC_TASK_ASYNC,
+ .flags = RPC_TASK_ASYNC | RPC_TASK_MOVEABLE,
};
int status;
if (!(dst_server->caps & NFS_CAP_OFFLOAD_CANCEL))
return -EOPNOTSUPP;
- data = kzalloc(sizeof(struct nfs42_offloadcancel_data), GFP_KERNEL);
+ data = kzalloc(sizeof(struct nfs42_offload_data), GFP_KERNEL);
if (data == NULL)
return -ENOMEM;
@@ -582,6 +632,108 @@ static int nfs42_do_offload_cancel_async(struct file *dst,
return status;
}
+static int
+_nfs42_proc_offload_status(struct nfs_server *server, struct file *file,
+ struct nfs42_offload_data *data)
+{
+ struct nfs_open_context *ctx = nfs_file_open_context(file);
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OFFLOAD_STATUS],
+ .rpc_argp = &data->args,
+ .rpc_resp = &data->res,
+ .rpc_cred = ctx->cred,
+ };
+ int status;
+
+ status = nfs4_call_sync(server->client, server, &msg,
+ &data->args.osa_seq_args,
+ &data->res.osr_seq_res, 1);
+ trace_nfs4_offload_status(&data->args, status);
+ switch (status) {
+ case 0:
+ break;
+
+ case -NFS4ERR_ADMIN_REVOKED:
+ case -NFS4ERR_BAD_STATEID:
+ case -NFS4ERR_OLD_STATEID:
+ /*
+ * Server does not recognize the COPY stateid. CB_OFFLOAD
+ * could have purged it, or server might have rebooted.
+ * Since COPY stateids don't have an associated inode,
+ * avoid triggering state recovery.
+ */
+ status = -EBADF;
+ break;
+ case -NFS4ERR_NOTSUPP:
+ case -ENOTSUPP:
+ case -EOPNOTSUPP:
+ server->caps &= ~NFS_CAP_OFFLOAD_STATUS;
+ status = -EOPNOTSUPP;
+ break;
+ }
+
+ return status;
+}
+
+/**
+ * nfs42_proc_offload_status - Poll completion status of an async copy operation
+ * @dst: handle of file being copied into
+ * @stateid: copy stateid (from async COPY result)
+ * @copied: OUT: number of bytes copied so far
+ *
+ * Return values:
+ * %0: Server returned an NFS4_OK completion status
+ * %-EINPROGRESS: Server returned no completion status
+ * %-EREMOTEIO: Server returned an error completion status
+ * %-EBADF: Server did not recognize the copy stateid
+ * %-EOPNOTSUPP: Server does not support OFFLOAD_STATUS
+ * %-ERESTARTSYS: Wait interrupted by signal
+ *
+ * Other negative errnos indicate the client could not complete the
+ * request.
+ */
+static int
+nfs42_proc_offload_status(struct file *dst, nfs4_stateid *stateid, u64 *copied)
+{
+ struct inode *inode = file_inode(dst);
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct nfs4_exception exception = {
+ .inode = inode,
+ };
+ struct nfs42_offload_data *data;
+ int status;
+
+ if (!(server->caps & NFS_CAP_OFFLOAD_STATUS))
+ return -EOPNOTSUPP;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+ data->seq_server = server;
+ data->args.osa_src_fh = NFS_FH(inode);
+ memcpy(&data->args.osa_stateid, stateid,
+ sizeof(data->args.osa_stateid));
+ exception.stateid = &data->args.osa_stateid;
+ do {
+ status = _nfs42_proc_offload_status(server, dst, data);
+ if (status == -EOPNOTSUPP)
+ goto out;
+ status = nfs4_handle_exception(server, status, &exception);
+ } while (exception.retry);
+ if (status)
+ goto out;
+
+ *copied = data->res.osr_count;
+ if (!data->res.complete_count)
+ status = -EINPROGRESS;
+ else if (data->res.osr_complete != NFS_OK)
+ status = -EREMOTEIO;
+
+out:
+ kfree(data);
+ return status;
+}
+
static int _nfs42_proc_copy_notify(struct file *src, struct file *dst,
struct nfs42_copy_notify_args *args,
struct nfs42_copy_notify_res *res)
@@ -861,7 +1013,7 @@ int nfs42_proc_layoutstats_generic(struct nfs_server *server,
.rpc_message = &msg,
.callback_ops = &nfs42_layoutstat_ops,
.callback_data = data,
- .flags = RPC_TASK_ASYNC,
+ .flags = RPC_TASK_ASYNC | RPC_TASK_MOVEABLE,
};
struct rpc_task *task;
@@ -1016,7 +1168,7 @@ int nfs42_proc_layouterror(struct pnfs_layout_segment *lseg,
struct rpc_task_setup task_setup = {
.rpc_message = &msg,
.callback_ops = &nfs42_layouterror_ops,
- .flags = RPC_TASK_ASYNC,
+ .flags = RPC_TASK_ASYNC | RPC_TASK_MOVEABLE,
};
unsigned int i;
diff --git a/fs/nfs/nfs42xattr.c b/fs/nfs/nfs42xattr.c
index b6e3d8f77b91..37d79400e5f4 100644
--- a/fs/nfs/nfs42xattr.c
+++ b/fs/nfs/nfs42xattr.c
@@ -802,7 +802,7 @@ static struct shrinker *nfs4_xattr_large_entry_shrinker;
static enum lru_status
cache_lru_isolate(struct list_head *item,
- struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
+ struct list_lru_one *lru, void *arg)
{
struct list_head *dispose = arg;
struct inode *inode;
@@ -867,7 +867,7 @@ nfs4_xattr_cache_count(struct shrinker *shrink, struct shrink_control *sc)
static enum lru_status
entry_lru_isolate(struct list_head *item,
- struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
+ struct list_lru_one *lru, void *arg)
{
struct list_head *dispose = arg;
struct nfs4_xattr_bucket *bucket;
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index 9e3ae53e2205..b1b663468249 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -35,6 +35,11 @@
#define encode_offload_cancel_maxsz (op_encode_hdr_maxsz + \
XDR_QUADLEN(NFS4_STATEID_SIZE))
#define decode_offload_cancel_maxsz (op_decode_hdr_maxsz)
+#define encode_offload_status_maxsz (op_encode_hdr_maxsz + \
+ XDR_QUADLEN(NFS4_STATEID_SIZE))
+#define decode_offload_status_maxsz (op_decode_hdr_maxsz + \
+ 2 /* osr_count */ + \
+ 2 /* osr_complete */)
#define encode_copy_notify_maxsz (op_encode_hdr_maxsz + \
XDR_QUADLEN(NFS4_STATEID_SIZE) + \
1 + /* nl4_type */ \
@@ -143,10 +148,20 @@
decode_sequence_maxsz + \
decode_putfh_maxsz + \
decode_offload_cancel_maxsz)
+#define NFS4_enc_offload_status_sz (compound_encode_hdr_maxsz + \
+ encode_sequence_maxsz + \
+ encode_putfh_maxsz + \
+ encode_offload_status_maxsz)
+#define NFS4_dec_offload_status_sz (compound_decode_hdr_maxsz + \
+ decode_sequence_maxsz + \
+ decode_putfh_maxsz + \
+ decode_offload_status_maxsz)
#define NFS4_enc_copy_notify_sz (compound_encode_hdr_maxsz + \
+ encode_sequence_maxsz + \
encode_putfh_maxsz + \
encode_copy_notify_maxsz)
#define NFS4_dec_copy_notify_sz (compound_decode_hdr_maxsz + \
+ decode_sequence_maxsz + \
decode_putfh_maxsz + \
decode_copy_notify_maxsz)
#define NFS4_enc_deallocate_sz (compound_encode_hdr_maxsz + \
@@ -343,6 +358,14 @@ static void encode_offload_cancel(struct xdr_stream *xdr,
encode_nfs4_stateid(xdr, &args->osa_stateid);
}
+static void encode_offload_status(struct xdr_stream *xdr,
+ const struct nfs42_offload_status_args *args,
+ struct compound_hdr *hdr)
+{
+ encode_op_hdr(xdr, OP_OFFLOAD_STATUS, decode_offload_status_maxsz, hdr);
+ encode_nfs4_stateid(xdr, &args->osa_stateid);
+}
+
static void encode_copy_notify(struct xdr_stream *xdr,
const struct nfs42_copy_notify_args *args,
struct compound_hdr *hdr)
@@ -549,7 +572,7 @@ static void nfs4_xdr_enc_copy(struct rpc_rqst *req,
}
/*
- * Encode OFFLOAD_CANEL request
+ * Encode OFFLOAD_CANCEL request
*/
static void nfs4_xdr_enc_offload_cancel(struct rpc_rqst *req,
struct xdr_stream *xdr,
@@ -568,6 +591,25 @@ static void nfs4_xdr_enc_offload_cancel(struct rpc_rqst *req,
}
/*
+ * Encode OFFLOAD_STATUS request
+ */
+static void nfs4_xdr_enc_offload_status(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const void *data)
+{
+ const struct nfs42_offload_status_args *args = data;
+ struct compound_hdr hdr = {
+ .minorversion = nfs4_xdr_minorversion(&args->osa_seq_args),
+ };
+
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->osa_seq_args, &hdr);
+ encode_putfh(xdr, args->osa_src_fh, &hdr);
+ encode_offload_status(xdr, args, &hdr);
+ encode_nops(&hdr);
+}
+
+/*
* Encode COPY_NOTIFY request
*/
static void nfs4_xdr_enc_copy_notify(struct rpc_rqst *req,
@@ -919,6 +961,26 @@ static int decode_offload_cancel(struct xdr_stream *xdr,
return decode_op_hdr(xdr, OP_OFFLOAD_CANCEL);
}
+static int decode_offload_status(struct xdr_stream *xdr,
+ struct nfs42_offload_status_res *res)
+{
+ ssize_t result;
+ int status;
+
+ status = decode_op_hdr(xdr, OP_OFFLOAD_STATUS);
+ if (status)
+ return status;
+ /* osr_count */
+ if (xdr_stream_decode_u64(xdr, &res->osr_count) < 0)
+ return -EIO;
+ /* osr_complete<1> */
+ result = xdr_stream_decode_uint32_array(xdr, &res->osr_complete, 1);
+ if (result < 0)
+ return -EIO;
+ res->complete_count = result;
+ return 0;
+}
+
static int decode_copy_notify(struct xdr_stream *xdr,
struct nfs42_copy_notify_res *res)
{
@@ -1369,6 +1431,32 @@ out:
}
/*
+ * Decode OFFLOAD_STATUS response
+ */
+static int nfs4_xdr_dec_offload_status(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ void *data)
+{
+ struct nfs42_offload_status_res *res = data;
+ struct compound_hdr hdr;
+ int status;
+
+ status = decode_compound_hdr(xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_sequence(xdr, &res->osr_seq_res, rqstp);
+ if (status)
+ goto out;
+ status = decode_putfh(xdr);
+ if (status)
+ goto out;
+ status = decode_offload_status(xdr, res);
+
+out:
+ return status;
+}
+
+/*
* Decode COPY_NOTIFY response
*/
static int nfs4_xdr_dec_copy_notify(struct rpc_rqst *rqstp,
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 7024230f0d1d..7d383d29a995 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -67,7 +67,8 @@ struct nfs4_minor_version_ops {
void (*free_lock_state)(struct nfs_server *,
struct nfs4_lock_state *);
int (*test_and_free_expired)(struct nfs_server *,
- nfs4_stateid *, const struct cred *);
+ const nfs4_stateid *,
+ const struct cred *);
struct nfs_seqid *
(*alloc_seqid)(struct nfs_seqid_counter *, gfp_t);
void (*session_trunk)(struct rpc_clnt *clnt,
@@ -82,7 +83,7 @@ struct nfs4_minor_version_ops {
#define NFS_SEQID_CONFIRMED 1
struct nfs_seqid_counter {
ktime_t create_time;
- int owner_id;
+ u64 owner_id;
int flags;
u32 counter;
spinlock_t lock; /* Protects the list */
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 84573df5cf5a..162c85a83a14 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -231,9 +231,10 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init)
__set_bit(NFS_CS_INFINITE_SLOTS, &clp->cl_flags);
__set_bit(NFS_CS_DISCRTRY, &clp->cl_flags);
__set_bit(NFS_CS_NO_RETRANS_TIMEOUT, &clp->cl_flags);
-
- if (test_bit(NFS_CS_DS, &cl_init->init_flags))
- __set_bit(NFS_CS_DS, &clp->cl_flags);
+ if (test_bit(NFS_CS_PNFS, &cl_init->init_flags))
+ __set_bit(NFS_CS_PNFS, &clp->cl_flags);
+ if (test_bit(NFS_CS_NETUNREACH_FATAL, &cl_init->init_flags))
+ __set_bit(NFS_CS_NETUNREACH_FATAL, &clp->cl_flags);
/*
* Set up the connection to the server before we add add to the
* global list.
@@ -938,6 +939,9 @@ static int nfs4_set_client(struct nfs_server *server,
__set_bit(NFS_CS_TSM_POSSIBLE, &cl_init.init_flags);
server->port = rpc_get_port((struct sockaddr *)addr);
+ if (server->flags & NFS_MOUNT_NETUNREACH_FATAL)
+ __set_bit(NFS_CS_NETUNREACH_FATAL, &cl_init.init_flags);
+
/* Allocate or find a client reference we can use */
clp = nfs_get_client(&cl_init);
if (IS_ERR(clp))
@@ -1012,8 +1016,9 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
if (mds_srv->flags & NFS_MOUNT_NORESVPORT)
__set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
+ if (test_bit(NFS_CS_NETUNREACH_FATAL, &mds_clp->cl_flags))
+ __set_bit(NFS_CS_NETUNREACH_FATAL, &cl_init.init_flags);
- __set_bit(NFS_CS_DS, &cl_init.init_flags);
__set_bit(NFS_CS_PNFS, &cl_init.init_flags);
cl_init.max_connect = NFS_MAX_TRANSPORTS;
/*
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index ea390db94b62..b1d2122bd5a7 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -103,10 +103,10 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
const struct cred *cred,
struct nfs4_slot *slot,
bool is_privileged);
-static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *,
- const struct cred *);
+static int nfs41_test_stateid(struct nfs_server *, const nfs4_stateid *,
+ const struct cred *);
static int nfs41_free_stateid(struct nfs_server *, const nfs4_stateid *,
- const struct cred *, bool);
+ const struct cred *, bool);
#endif
#ifdef CONFIG_NFS_V4_SECURITY_LABEL
@@ -114,6 +114,7 @@ static inline struct nfs4_label *
nfs4_label_init_security(struct inode *dir, struct dentry *dentry,
struct iattr *sattr, struct nfs4_label *label)
{
+ struct lsm_context shim;
int err;
if (label == NULL)
@@ -128,18 +129,26 @@ nfs4_label_init_security(struct inode *dir, struct dentry *dentry,
label->label = NULL;
err = security_dentry_init_security(dentry, sattr->ia_mode,
- &dentry->d_name, NULL,
- (void **)&label->label, &label->len);
- if (err == 0)
- return label;
+ &dentry->d_name, NULL, &shim);
+ if (err)
+ return NULL;
- return NULL;
+ label->lsmid = shim.id;
+ label->label = shim.context;
+ label->len = shim.len;
+ return label;
}
static inline void
nfs4_label_release_security(struct nfs4_label *label)
{
- if (label)
- security_release_secctx(label->label, label->len);
+ struct lsm_context shim;
+
+ if (label) {
+ shim.context = label->label;
+ shim.len = label->len;
+ shim.id = label->lsmid;
+ security_release_secctx(&shim);
+ }
}
static inline u32 *nfs4_bitmask(struct nfs_server *server, struct nfs4_label *label)
{
@@ -186,6 +195,9 @@ static int nfs4_map_errors(int err)
return -EBUSY;
case -NFS4ERR_NOT_SAME:
return -ENOTSYNC;
+ case -ENETDOWN:
+ case -ENETUNREACH:
+ break;
default:
dprintk("%s could not handle NFSv4 error %d\n",
__func__, -err);
@@ -293,7 +305,7 @@ static void nfs4_bitmap_copy_adjust(__u32 *dst, const __u32 *src,
unsigned long cache_validity;
memcpy(dst, src, NFS4_BITMASK_SZ*sizeof(*dst));
- if (!inode || !nfs4_have_delegation(inode, FMODE_READ))
+ if (!inode || !nfs_have_read_or_write_delegation(inode))
return;
cache_validity = READ_ONCE(NFS_I(inode)->cache_validity) | flags;
@@ -310,6 +322,18 @@ static void nfs4_bitmap_copy_adjust(__u32 *dst, const __u32 *src,
dst[1] &= ~FATTR4_WORD1_MODE;
if (!(cache_validity & NFS_INO_INVALID_OTHER))
dst[1] &= ~(FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP);
+
+ if (nfs_have_delegated_mtime(inode)) {
+ if (!(cache_validity & NFS_INO_INVALID_ATIME))
+ dst[1] &= ~FATTR4_WORD1_TIME_ACCESS;
+ if (!(cache_validity & NFS_INO_INVALID_MTIME))
+ dst[1] &= ~FATTR4_WORD1_TIME_MODIFY;
+ if (!(cache_validity & NFS_INO_INVALID_CTIME))
+ dst[1] &= ~FATTR4_WORD1_TIME_METADATA;
+ } else if (nfs_have_delegated_atime(inode)) {
+ if (!(cache_validity & NFS_INO_INVALID_ATIME))
+ dst[1] &= ~FATTR4_WORD1_TIME_ACCESS;
+ }
}
static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dentry,
@@ -422,6 +446,8 @@ static int nfs4_delay_killable(long *timeout)
{
might_sleep();
+ if (unlikely(nfs_current_task_exiting()))
+ return -EINTR;
__set_current_state(TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
schedule_timeout(nfs4_update_delay(timeout));
if (!__fatal_signal_pending(current))
@@ -433,6 +459,8 @@ static int nfs4_delay_interruptible(long *timeout)
{
might_sleep();
+ if (unlikely(nfs_current_task_exiting()))
+ return -EINTR;
__set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE_UNSAFE);
schedule_timeout(nfs4_update_delay(timeout));
if (!signal_pending(current))
@@ -643,6 +671,15 @@ nfs4_async_handle_exception(struct rpc_task *task, struct nfs_server *server,
struct nfs_client *clp = server->nfs_client;
int ret;
+ if ((task->tk_rpc_status == -ENETDOWN ||
+ task->tk_rpc_status == -ENETUNREACH) &&
+ task->tk_flags & RPC_TASK_NETUNREACH_FATAL) {
+ exception->delay = 0;
+ exception->recovering = 0;
+ exception->retry = 0;
+ return -EIO;
+ }
+
ret = nfs4_do_handle_exception(server, errorcode, exception);
if (exception->delay) {
int ret2 = nfs4_exception_should_retrans(server, exception);
@@ -1245,7 +1282,8 @@ nfs4_update_changeattr_locked(struct inode *inode,
struct nfs_inode *nfsi = NFS_I(inode);
u64 change_attr = inode_peek_iversion_raw(inode);
- cache_validity |= NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME;
+ if (!nfs_have_delegated_mtime(inode))
+ cache_validity |= NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME;
if (S_ISDIR(inode->i_mode))
cache_validity |= NFS_INO_INVALID_DATA;
@@ -1264,7 +1302,7 @@ nfs4_update_changeattr_locked(struct inode *inode,
if (S_ISDIR(inode->i_mode))
nfs_force_lookup_revalidate(inode);
- if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
+ if (!nfs_have_delegated_attributes(inode))
cache_validity |=
NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL |
NFS_INO_INVALID_SIZE | NFS_INO_INVALID_OTHER |
@@ -1320,8 +1358,7 @@ static fmode_t _nfs4_ctx_to_openmode(const struct nfs_open_context *ctx)
}
static u32
-nfs4_map_atomic_open_share(struct nfs_server *server,
- fmode_t fmode, int openflags)
+nfs4_fmode_to_share_access(fmode_t fmode)
{
u32 res = 0;
@@ -1335,11 +1372,27 @@ nfs4_map_atomic_open_share(struct nfs_server *server,
case FMODE_READ|FMODE_WRITE:
res = NFS4_SHARE_ACCESS_BOTH;
}
+ return res;
+}
+
+static u32
+nfs4_map_atomic_open_share(struct nfs_server *server,
+ fmode_t fmode, int openflags)
+{
+ u32 res = nfs4_fmode_to_share_access(fmode);
+
if (!(server->caps & NFS_CAP_ATOMIC_OPEN_V1))
goto out;
/* Want no delegation if we're using O_DIRECT */
- if (openflags & O_DIRECT)
+ if (openflags & O_DIRECT) {
res |= NFS4_SHARE_WANT_NO_DELEG;
+ goto out;
+ }
+ /* res |= NFS4_SHARE_WANT_NO_PREFERENCE; */
+ if (server->caps & NFS_CAP_DELEGTIME)
+ res |= NFS4_SHARE_WANT_DELEG_TIMESTAMPS;
+ if (server->caps & NFS_CAP_OPEN_XOR)
+ res |= NFS4_SHARE_WANT_OPEN_XOR_DELEGATION;
out:
return res;
}
@@ -1737,7 +1790,8 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state,
rcu_read_unlock();
trace_nfs4_open_stateid_update_wait(state->inode, stateid, 0);
- if (!fatal_signal_pending(current)) {
+ if (!fatal_signal_pending(current) &&
+ !nfs_current_task_exiting()) {
if (schedule_timeout(5*HZ) == 0)
status = -EAGAIN;
else
@@ -1954,44 +2008,41 @@ out_return_state:
}
static void
-nfs4_opendata_check_deleg(struct nfs4_opendata *data, struct nfs4_state *state)
-{
- struct nfs_client *clp = NFS_SERVER(state->inode)->nfs_client;
- struct nfs_delegation *delegation;
- int delegation_flags = 0;
-
- rcu_read_lock();
- delegation = rcu_dereference(NFS_I(state->inode)->delegation);
- if (delegation)
- delegation_flags = delegation->flags;
- rcu_read_unlock();
- switch (data->o_arg.claim) {
- default:
+nfs4_process_delegation(struct inode *inode, const struct cred *cred,
+ enum open_claim_type4 claim,
+ const struct nfs4_open_delegation *delegation)
+{
+ switch (delegation->open_delegation_type) {
+ case NFS4_OPEN_DELEGATE_READ:
+ case NFS4_OPEN_DELEGATE_WRITE:
+ case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG:
+ case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG:
break;
+ default:
+ return;
+ }
+ switch (claim) {
case NFS4_OPEN_CLAIM_DELEGATE_CUR:
case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
pr_err_ratelimited("NFS: Broken NFSv4 server %s is "
"returning a delegation for "
"OPEN(CLAIM_DELEGATE_CUR)\n",
- clp->cl_hostname);
- return;
+ NFS_SERVER(inode)->nfs_client->cl_hostname);
+ break;
+ case NFS4_OPEN_CLAIM_PREVIOUS:
+ nfs_inode_reclaim_delegation(inode, cred, delegation->type,
+ &delegation->stateid,
+ delegation->pagemod_limit,
+ delegation->open_delegation_type);
+ break;
+ default:
+ nfs_inode_set_delegation(inode, cred, delegation->type,
+ &delegation->stateid,
+ delegation->pagemod_limit,
+ delegation->open_delegation_type);
}
- if ((delegation_flags & 1UL<<NFS_DELEGATION_NEED_RECLAIM) == 0)
- nfs_inode_set_delegation(state->inode,
- data->owner->so_cred,
- data->o_res.delegation_type,
- &data->o_res.delegation,
- data->o_res.pagemod_limit);
- else
- nfs_inode_reclaim_delegation(state->inode,
- data->owner->so_cred,
- data->o_res.delegation_type,
- &data->o_res.delegation,
- data->o_res.pagemod_limit);
-
- if (data->o_res.do_recall)
- nfs_async_inode_return_delegation(state->inode,
- &data->o_res.delegation);
+ if (delegation->do_recall)
+ nfs_async_inode_return_delegation(inode, &delegation->stateid);
}
/*
@@ -2015,11 +2066,16 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data)
if (ret)
return ERR_PTR(ret);
- if (data->o_res.delegation_type != 0)
- nfs4_opendata_check_deleg(data, state);
+ nfs4_process_delegation(state->inode,
+ data->owner->so_cred,
+ data->o_arg.claim,
+ &data->o_res.delegation);
- if (!update_open_stateid(state, &data->o_res.stateid,
- NULL, data->o_arg.fmode))
+ if (!(data->o_res.rflags & NFS4_OPEN_RESULT_NO_OPEN_STATEID)) {
+ if (!update_open_stateid(state, &data->o_res.stateid,
+ NULL, data->o_arg.fmode))
+ return ERR_PTR(-EAGAIN);
+ } else if (!update_open_stateid(state, NULL, NULL, data->o_arg.fmode))
return ERR_PTR(-EAGAIN);
refcount_inc(&state->count);
@@ -2083,10 +2139,18 @@ _nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data)
if (IS_ERR(state))
goto out;
- if (data->o_res.delegation_type != 0)
- nfs4_opendata_check_deleg(data, state);
- if (!update_open_stateid(state, &data->o_res.stateid,
- NULL, data->o_arg.fmode)) {
+ nfs4_process_delegation(state->inode,
+ data->owner->so_cred,
+ data->o_arg.claim,
+ &data->o_res.delegation);
+
+ if (!(data->o_res.rflags & NFS4_OPEN_RESULT_NO_OPEN_STATEID)) {
+ if (!update_open_stateid(state, &data->o_res.stateid,
+ NULL, data->o_arg.fmode)) {
+ nfs4_put_open_state(state);
+ state = ERR_PTR(-EAGAIN);
+ }
+ } else if (!update_open_stateid(state, NULL, NULL, data->o_arg.fmode)) {
nfs4_put_open_state(state);
state = ERR_PTR(-EAGAIN);
}
@@ -2222,7 +2286,7 @@ static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state
{
struct nfs_delegation *delegation;
struct nfs4_opendata *opendata;
- fmode_t delegation_type = 0;
+ u32 delegation_type = NFS4_OPEN_DELEGATE_NONE;
int status;
opendata = nfs4_open_recoverdata_alloc(ctx, state,
@@ -2231,8 +2295,20 @@ static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state
return PTR_ERR(opendata);
rcu_read_lock();
delegation = rcu_dereference(NFS_I(state->inode)->delegation);
- if (delegation != NULL && test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) != 0)
- delegation_type = delegation->type;
+ if (delegation != NULL && test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) != 0) {
+ switch(delegation->type) {
+ case FMODE_READ:
+ delegation_type = NFS4_OPEN_DELEGATE_READ;
+ if (test_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags))
+ delegation_type = NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG;
+ break;
+ case FMODE_WRITE:
+ case FMODE_READ|FMODE_WRITE:
+ delegation_type = NFS4_OPEN_DELEGATE_WRITE;
+ if (test_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags))
+ delegation_type = NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG;
+ }
+ }
rcu_read_unlock();
opendata->o_arg.u.delegation_type = delegation_type;
status = nfs4_open_recover(opendata, state);
@@ -2553,12 +2629,14 @@ static void nfs4_open_release(void *calldata)
struct nfs4_opendata *data = calldata;
struct nfs4_state *state = NULL;
+ /* In case of error, no cleanup! */
+ if (data->rpc_status != 0 || !data->rpc_done) {
+ nfs_release_seqid(data->o_arg.seqid);
+ goto out_free;
+ }
/* If this request hasn't been cancelled, do nothing */
if (!data->cancelled)
goto out_free;
- /* In case of error, no cleanup! */
- if (data->rpc_status != 0 || !data->rpc_done)
- goto out_free;
/* In case we need an open_confirm, no cleanup! */
if (data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM)
goto out_free;
@@ -2825,16 +2903,16 @@ static int nfs40_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st
}
static int nfs40_test_and_free_expired_stateid(struct nfs_server *server,
- nfs4_stateid *stateid,
- const struct cred *cred)
+ const nfs4_stateid *stateid,
+ const struct cred *cred)
{
return -NFS4ERR_BAD_STATEID;
}
#if defined(CONFIG_NFS_V4_1)
static int nfs41_test_and_free_expired_stateid(struct nfs_server *server,
- nfs4_stateid *stateid,
- const struct cred *cred)
+ const nfs4_stateid *stateid,
+ const struct cred *cred)
{
int status;
@@ -3093,9 +3171,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
if (d_really_is_negative(dentry)) {
struct dentry *alias;
d_drop(dentry);
- alias = d_exact_alias(dentry, state->inode);
- if (!alias)
- alias = d_splice_alias(igrab(state->inode), dentry);
+ alias = d_splice_alias(igrab(state->inode), dentry);
/* d_splice_alias() can't fail here - it's a non-directory */
if (alias) {
dput(ctx->dentry);
@@ -3111,7 +3187,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
case NFS4_OPEN_CLAIM_DELEGATE_PREV:
if (!opendata->rpc_done)
break;
- if (opendata->o_res.delegation_type != 0)
+ if (opendata->o_res.delegation.type != 0)
dir_verifier = nfs_save_change_attribute(dir);
nfs_set_verifier(dentry, dir_verifier);
}
@@ -3394,13 +3470,18 @@ static int nfs4_do_setattr(struct inode *inode, const struct cred *cred,
.inode = inode,
.stateid = &arg.stateid,
};
- unsigned long adjust_flags = NFS_INO_INVALID_CHANGE;
+ unsigned long adjust_flags = NFS_INO_INVALID_CHANGE |
+ NFS_INO_INVALID_CTIME;
int err;
if (sattr->ia_valid & (ATTR_MODE | ATTR_KILL_SUID | ATTR_KILL_SGID))
adjust_flags |= NFS_INO_INVALID_MODE;
if (sattr->ia_valid & (ATTR_UID | ATTR_GID))
adjust_flags |= NFS_INO_INVALID_OTHER;
+ if (sattr->ia_valid & ATTR_ATIME)
+ adjust_flags |= NFS_INO_INVALID_ATIME;
+ if (sattr->ia_valid & ATTR_MTIME)
+ adjust_flags |= NFS_INO_INVALID_MTIME;
do {
nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, fattr->label),
@@ -3512,7 +3593,7 @@ static bool nfs4_refresh_open_old_stateid(nfs4_stateid *dst,
write_sequnlock(&state->seqlock);
trace_nfs4_close_stateid_update_wait(state->inode, dst, 0);
- if (fatal_signal_pending(current))
+ if (fatal_signal_pending(current) || nfs_current_task_exiting())
status = -EINTR;
else
if (schedule_timeout(5*HZ) != 0)
@@ -3700,7 +3781,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
if (calldata->arg.fmode == 0 || calldata->arg.fmode == FMODE_READ) {
/* Close-to-open cache consistency revalidation */
- if (!nfs4_have_delegation(inode, FMODE_READ)) {
+ if (!nfs4_have_delegation(inode, FMODE_READ, 0)) {
nfs4_bitmask_set(calldata->arg.bitmask_store,
server->cache_consistency_bitmask,
inode, 0);
@@ -3710,8 +3791,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
}
calldata->arg.share_access =
- nfs4_map_atomic_open_share(NFS_SERVER(inode),
- calldata->arg.fmode, 0);
+ nfs4_fmode_to_share_access(calldata->arg.fmode);
if (calldata->res.fattr == NULL)
calldata->arg.bitmask = NULL;
@@ -3842,8 +3922,11 @@ nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx,
static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync)
{
+ struct dentry *dentry = ctx->dentry;
if (ctx->state == NULL)
return;
+ if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
+ nfs4_inode_set_return_delegation_on_close(d_inode(dentry));
if (is_sync)
nfs4_close_sync(ctx->state, _nfs4_ctx_to_openmode(ctx));
else
@@ -3852,11 +3935,26 @@ static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync)
#define FATTR4_WORD1_NFS40_MASK (2*FATTR4_WORD1_MOUNTED_ON_FILEID - 1UL)
#define FATTR4_WORD2_NFS41_MASK (2*FATTR4_WORD2_SUPPATTR_EXCLCREAT - 1UL)
-#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_XATTR_SUPPORT - 1UL)
+#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_OPEN_ARGUMENTS - 1UL)
+
+#define FATTR4_WORD2_NFS42_TIME_DELEG_MASK \
+ (FATTR4_WORD2_TIME_DELEG_MODIFY|FATTR4_WORD2_TIME_DELEG_ACCESS)
+static bool nfs4_server_delegtime_capable(struct nfs4_server_caps_res *res)
+{
+ u32 share_access_want = res->open_caps.oa_share_access_want[0];
+ u32 attr_bitmask = res->attr_bitmask[2];
+
+ return (share_access_want & NFS4_SHARE_WANT_DELEG_TIMESTAMPS) &&
+ ((attr_bitmask & FATTR4_WORD2_NFS42_TIME_DELEG_MASK) ==
+ FATTR4_WORD2_NFS42_TIME_DELEG_MASK);
+}
static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
{
- u32 bitmask[3] = {}, minorversion = server->nfs_client->cl_minorversion;
+ u32 minorversion = server->nfs_client->cl_minorversion;
+ u32 bitmask[3] = {
+ [0] = FATTR4_WORD0_SUPPORTED_ATTRS,
+ };
struct nfs4_server_caps_arg args = {
.fhandle = fhandle,
.bitmask = bitmask,
@@ -3878,10 +3976,19 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
FATTR4_WORD0_CASE_INSENSITIVE |
FATTR4_WORD0_CASE_PRESERVING;
if (minorversion)
- bitmask[2] = FATTR4_WORD2_SUPPATTR_EXCLCREAT;
+ bitmask[2] = FATTR4_WORD2_SUPPATTR_EXCLCREAT |
+ FATTR4_WORD2_OPEN_ARGUMENTS;
status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
if (status == 0) {
+ bitmask[0] = (FATTR4_WORD0_SUPPORTED_ATTRS |
+ FATTR4_WORD0_FH_EXPIRE_TYPE |
+ FATTR4_WORD0_LINK_SUPPORT |
+ FATTR4_WORD0_SYMLINK_SUPPORT |
+ FATTR4_WORD0_ACLSUPPORT |
+ FATTR4_WORD0_CASE_INSENSITIVE |
+ FATTR4_WORD0_CASE_PRESERVING) &
+ res.attr_bitmask[0];
/* Sanity check the server answers */
switch (minorversion) {
case 0:
@@ -3890,9 +3997,14 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
break;
case 1:
res.attr_bitmask[2] &= FATTR4_WORD2_NFS41_MASK;
+ bitmask[2] = FATTR4_WORD2_SUPPATTR_EXCLCREAT &
+ res.attr_bitmask[2];
break;
case 2:
res.attr_bitmask[2] &= FATTR4_WORD2_NFS42_MASK;
+ bitmask[2] = (FATTR4_WORD2_SUPPATTR_EXCLCREAT |
+ FATTR4_WORD2_OPEN_ARGUMENTS) &
+ res.attr_bitmask[2];
}
memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask));
server->caps &= ~(NFS_CAP_ACLS | NFS_CAP_HARDLINKS |
@@ -3939,6 +4051,12 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
sizeof(server->attr_bitmask));
server->attr_bitmask_nl[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
+ if (res.open_caps.oa_share_access_want[0] &
+ NFS4_SHARE_WANT_OPEN_XOR_DELEGATION)
+ server->caps |= NFS_CAP_OPEN_XOR;
+ if (nfs4_server_delegtime_capable(&res))
+ server->caps |= NFS_CAP_DELEGTIME;
+
memcpy(server->cache_consistency_bitmask, res.attr_bitmask, sizeof(server->cache_consistency_bitmask));
server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE;
server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY;
@@ -4023,6 +4141,23 @@ static void test_fs_location_for_trunking(struct nfs4_fs_location *location,
}
}
+static bool _is_same_nfs4_pathname(struct nfs4_pathname *path1,
+ struct nfs4_pathname *path2)
+{
+ int i;
+
+ if (path1->ncomponents != path2->ncomponents)
+ return false;
+ for (i = 0; i < path1->ncomponents; i++) {
+ if (path1->components[i].len != path2->components[i].len)
+ return false;
+ if (memcmp(path1->components[i].data, path2->components[i].data,
+ path1->components[i].len))
+ return false;
+ }
+ return true;
+}
+
static int _nfs4_discover_trunking(struct nfs_server *server,
struct nfs_fh *fhandle)
{
@@ -4056,9 +4191,13 @@ static int _nfs4_discover_trunking(struct nfs_server *server,
if (status)
goto out_free_3;
- for (i = 0; i < locations->nlocations; i++)
+ for (i = 0; i < locations->nlocations; i++) {
+ if (!_is_same_nfs4_pathname(&locations->fs_path,
+ &locations->locations[i].rootpath))
+ continue;
test_fs_location_for_trunking(&locations->locations[i], clp,
server);
+ }
out_free_3:
kfree(locations->fattr);
out_free_2:
@@ -4424,15 +4563,15 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
}
static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir,
- struct dentry *dentry, struct nfs_fh *fhandle,
- struct nfs_fattr *fattr)
+ struct dentry *dentry, const struct qstr *name,
+ struct nfs_fh *fhandle, struct nfs_fattr *fattr)
{
struct nfs_server *server = NFS_SERVER(dir);
int status;
struct nfs4_lookup_arg args = {
.bitmask = server->attr_bitmask,
.dir_fh = NFS_FH(dir),
- .name = &dentry->d_name,
+ .name = name,
};
struct nfs4_lookup_res res = {
.server = server,
@@ -4474,17 +4613,16 @@ static void nfs_fixup_secinfo_attributes(struct nfs_fattr *fattr)
}
static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir,
- struct dentry *dentry, struct nfs_fh *fhandle,
- struct nfs_fattr *fattr)
+ struct dentry *dentry, const struct qstr *name,
+ struct nfs_fh *fhandle, struct nfs_fattr *fattr)
{
struct nfs4_exception exception = {
.interruptible = true,
};
struct rpc_clnt *client = *clnt;
- const struct qstr *name = &dentry->d_name;
int err;
do {
- err = _nfs4_proc_lookup(client, dir, dentry, fhandle, fattr);
+ err = _nfs4_proc_lookup(client, dir, dentry, name, fhandle, fattr);
trace_nfs4_lookup(dir, name, err);
switch (err) {
case -NFS4ERR_BADNAME:
@@ -4519,13 +4657,13 @@ out:
return err;
}
-static int nfs4_proc_lookup(struct inode *dir, struct dentry *dentry,
+static int nfs4_proc_lookup(struct inode *dir, struct dentry *dentry, const struct qstr *name,
struct nfs_fh *fhandle, struct nfs_fattr *fattr)
{
int status;
struct rpc_clnt *client = NFS_CLIENT(dir);
- status = nfs4_proc_lookup_common(&client, dir, dentry, fhandle, fattr);
+ status = nfs4_proc_lookup_common(&client, dir, dentry, name, fhandle, fattr);
if (client != NFS_CLIENT(dir)) {
rpc_shutdown_client(client);
nfs_fixup_secinfo_attributes(fattr);
@@ -4540,7 +4678,8 @@ nfs4_proc_lookup_mountpoint(struct inode *dir, struct dentry *dentry,
struct rpc_clnt *client = NFS_CLIENT(dir);
int status;
- status = nfs4_proc_lookup_common(&client, dir, dentry, fhandle, fattr);
+ status = nfs4_proc_lookup_common(&client, dir, dentry, &dentry->d_name,
+ fhandle, fattr);
if (status < 0)
return ERR_PTR(status);
return (client == NFS_CLIENT(dir)) ? rpc_clone_client(client) : client;
@@ -4617,7 +4756,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry
};
int status = 0;
- if (!nfs4_have_delegation(inode, FMODE_READ)) {
+ if (!nfs4_have_delegation(inode, FMODE_READ, 0)) {
res.fattr = nfs_alloc_fattr();
if (res.fattr == NULL)
return -ENOMEM;
@@ -4935,8 +5074,9 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, const struct
goto out;
nfs4_inode_make_writeable(inode);
- nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, res.fattr->label), inode,
- NFS_INO_INVALID_CHANGE);
+ nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, res.fattr->label),
+ inode,
+ NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME);
status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
if (!status) {
nfs4_update_changeattr(dir, &res.cinfo, res.fattr->time_start,
@@ -5014,9 +5154,6 @@ static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_
&data->arg.seq_args, &data->res.seq_res, 1);
if (status == 0) {
spin_lock(&dir->i_lock);
- /* Creating a directory bumps nlink in the parent */
- if (data->arg.ftype == NF4DIR)
- nfs4_inc_nlink_locked(dir);
nfs4_update_changeattr_locked(dir, &data->res.dir_cinfo,
data->res.fattr->time_start,
NFS_INO_INVALID_DATA);
@@ -5026,6 +5163,25 @@ static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_
return status;
}
+static struct dentry *nfs4_do_mkdir(struct inode *dir, struct dentry *dentry,
+ struct nfs4_createdata *data)
+{
+ int status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &data->msg,
+ &data->arg.seq_args, &data->res.seq_res, 1);
+
+ if (status)
+ return ERR_PTR(status);
+
+ spin_lock(&dir->i_lock);
+ /* Creating a directory bumps nlink in the parent */
+ nfs4_inc_nlink_locked(dir);
+ nfs4_update_changeattr_locked(dir, &data->res.dir_cinfo,
+ data->res.fattr->time_start,
+ NFS_INO_INVALID_DATA);
+ spin_unlock(&dir->i_lock);
+ return nfs_add_or_obtain(dentry, data->res.fh, data->res.fattr);
+}
+
static void nfs4_free_createdata(struct nfs4_createdata *data)
{
nfs4_label_free(data->fattr.label);
@@ -5082,32 +5238,34 @@ static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry,
return err;
}
-static int _nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
- struct iattr *sattr, struct nfs4_label *label)
+static struct dentry *_nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
+ struct iattr *sattr,
+ struct nfs4_label *label)
{
struct nfs4_createdata *data;
- int status = -ENOMEM;
+ struct dentry *ret = ERR_PTR(-ENOMEM);
data = nfs4_alloc_createdata(dir, &dentry->d_name, sattr, NF4DIR);
if (data == NULL)
goto out;
data->arg.label = label;
- status = nfs4_do_create(dir, dentry, data);
+ ret = nfs4_do_mkdir(dir, dentry, data);
nfs4_free_createdata(data);
out:
- return status;
+ return ret;
}
-static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
- struct iattr *sattr)
+static struct dentry *nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
+ struct iattr *sattr)
{
struct nfs_server *server = NFS_SERVER(dir);
struct nfs4_exception exception = {
.interruptible = true,
};
struct nfs4_label l, *label;
+ struct dentry *alias;
int err;
label = nfs4_label_init_security(dir, dentry, sattr, &l);
@@ -5115,14 +5273,15 @@ static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
if (!(server->attr_bitmask[2] & FATTR4_WORD2_MODE_UMASK))
sattr->ia_mode &= ~current_umask();
do {
- err = _nfs4_proc_mkdir(dir, dentry, sattr, label);
+ alias = _nfs4_proc_mkdir(dir, dentry, sattr, label);
+ err = PTR_ERR_OR_ZERO(alias);
trace_nfs4_mkdir(dir, &dentry->d_name, err);
err = nfs4_handle_exception(NFS_SERVER(dir), err,
&exception);
} while (exception.retry);
nfs4_label_release_security(label);
- return err;
+ return alias;
}
static int _nfs4_proc_readdir(struct nfs_readdir_arg *nr_arg,
@@ -5456,7 +5615,7 @@ static bool nfs4_read_plus_not_supported(struct rpc_task *task,
struct rpc_message *msg = &task->tk_msg;
if (msg->rpc_proc == &nfs4_procedures[NFSPROC4_CLNT_READ_PLUS] &&
- server->caps & NFS_CAP_READ_PLUS && task->tk_status == -ENOTSUPP) {
+ task->tk_status == -ENOTSUPP) {
server->caps &= ~NFS_CAP_READ_PLUS;
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
rpc_restart_call_prepare(task);
@@ -5586,7 +5745,7 @@ bool nfs4_write_need_cache_consistency_data(struct nfs_pgio_header *hdr)
/* Otherwise, request attributes if and only if we don't hold
* a delegation
*/
- return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0;
+ return nfs4_have_delegation(hdr->inode, FMODE_READ, 0) == 0;
}
void nfs4_bitmask_set(__u32 bitmask[], const __u32 src[],
@@ -6148,7 +6307,7 @@ static int _nfs4_get_security_label(struct inode *inode, void *buf,
size_t buflen)
{
struct nfs_server *server = NFS_SERVER(inode);
- struct nfs4_label label = {0, 0, buflen, buf};
+ struct nfs4_label label = {0, 0, 0, buflen, buf};
u32 bitmask[3] = { 0, 0, FATTR4_WORD2_SECURITY_LABEL };
struct nfs_fattr fattr = {
@@ -6253,7 +6412,7 @@ static int nfs4_do_set_security_label(struct inode *inode,
static int
nfs4_set_security_label(struct inode *inode, const void *buf, size_t buflen)
{
- struct nfs4_label ilabel = {0, 0, buflen, (char *)buf };
+ struct nfs4_label ilabel = {0, 0, 0, buflen, (char *)buf };
struct nfs_fattr *fattr;
int status;
@@ -6268,6 +6427,7 @@ nfs4_set_security_label(struct inode *inode, const void *buf, size_t buflen)
if (status == 0)
nfs_setsecurity(inode, fattr);
+ nfs_free_fattr(fattr);
return status;
}
#endif /* CONFIG_NFS_V4_SECURITY_LABEL */
@@ -6553,6 +6713,7 @@ struct nfs4_delegreturndata {
u32 roc_barrier;
bool roc;
} lr;
+ struct nfs4_delegattr sattr;
struct nfs_fattr fattr;
int rpc_status;
struct inode *inode;
@@ -6577,6 +6738,30 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
&data->res.lr_ret) == -EAGAIN)
goto out_restart;
+ if (data->args.sattr_args && task->tk_status != 0) {
+ switch(data->res.sattr_ret) {
+ case 0:
+ data->args.sattr_args = NULL;
+ data->res.sattr_res = false;
+ break;
+ case -NFS4ERR_ADMIN_REVOKED:
+ case -NFS4ERR_DELEG_REVOKED:
+ case -NFS4ERR_EXPIRED:
+ case -NFS4ERR_BAD_STATEID:
+ /* Let the main handler below do stateid recovery */
+ break;
+ case -NFS4ERR_OLD_STATEID:
+ if (nfs4_refresh_delegation_stateid(&data->stateid,
+ data->inode))
+ goto out_restart;
+ fallthrough;
+ default:
+ data->args.sattr_args = NULL;
+ data->res.sattr_res = false;
+ goto out_restart;
+ }
+ }
+
switch (task->tk_status) {
case 0:
renew_lease(data->res.server, data->timestamp);
@@ -6670,7 +6855,10 @@ static const struct rpc_call_ops nfs4_delegreturn_ops = {
.rpc_release = nfs4_delegreturn_release,
};
-static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, const nfs4_stateid *stateid, int issync)
+static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred,
+ const nfs4_stateid *stateid,
+ struct nfs_delegation *delegation,
+ int issync)
{
struct nfs4_delegreturndata *data;
struct nfs_server *server = NFS_SERVER(inode);
@@ -6722,12 +6910,27 @@ static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred,
}
}
+ if (delegation &&
+ test_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags)) {
+ if (delegation->type & FMODE_READ) {
+ data->sattr.atime = inode_get_atime(inode);
+ data->sattr.atime_set = true;
+ }
+ if (delegation->type & FMODE_WRITE) {
+ data->sattr.mtime = inode_get_mtime(inode);
+ data->sattr.mtime_set = true;
+ }
+ data->args.sattr_args = &data->sattr;
+ data->res.sattr_res = true;
+ }
+
if (!data->inode)
nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1,
1);
else
nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1,
0);
+
task_setup_data.callback_data = data;
msg.rpc_argp = &data->args;
msg.rpc_resp = &data->res;
@@ -6745,13 +6948,16 @@ out:
return status;
}
-int nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, const nfs4_stateid *stateid, int issync)
+int nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred,
+ const nfs4_stateid *stateid,
+ struct nfs_delegation *delegation, int issync)
{
struct nfs_server *server = NFS_SERVER(inode);
struct nfs4_exception exception = { };
int err;
do {
- err = _nfs4_proc_delegreturn(inode, cred, stateid, issync);
+ err = _nfs4_proc_delegreturn(inode, cred, stateid,
+ delegation, issync);
trace_nfs4_delegreturn(inode, stateid, err);
switch (err) {
case -NFS4ERR_STALE_STATEID:
@@ -6877,10 +7083,18 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl,
struct nfs4_unlockdata *p;
struct nfs4_state *state = lsp->ls_state;
struct inode *inode = state->inode;
+ struct nfs_lock_context *l_ctx;
p = kzalloc(sizeof(*p), GFP_KERNEL);
if (p == NULL)
return NULL;
+ l_ctx = nfs_get_lock_context(ctx);
+ if (!IS_ERR(l_ctx)) {
+ p->l_ctx = l_ctx;
+ } else {
+ kfree(p);
+ return NULL;
+ }
p->arg.fh = NFS_FH(inode);
p->arg.fl = &p->fl;
p->arg.seqid = seqid;
@@ -6888,7 +7102,6 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl,
p->lsp = lsp;
/* Ensure we don't close file until we're done freeing locks! */
p->ctx = get_nfs_open_context(ctx);
- p->l_ctx = nfs_get_lock_context(ctx);
locks_init_lock(&p->fl);
locks_copy_lock(&p->fl, fl);
p->server = NFS_SERVER(inode);
@@ -7607,10 +7820,10 @@ static int nfs4_add_lease(struct file *file, int arg, struct file_lease **lease,
int ret;
/* No delegation, no lease */
- if (!nfs4_have_delegation(inode, type))
+ if (!nfs4_have_delegation(inode, type, 0))
return -EAGAIN;
ret = generic_setlease(file, arg, lease, priv);
- if (ret || nfs4_have_delegation(inode, type))
+ if (ret || nfs4_have_delegation(inode, type, 0))
return ret;
/* We raced with a delegation return */
nfs4_delete_lease(file, priv);
@@ -8818,7 +9031,7 @@ nfs4_run_exchange_id(struct nfs_client *clp, const struct cred *cred,
#ifdef CONFIG_NFS_V4_1_MIGRATION
calldata->args.flags |= EXCHGID4_FLAG_SUPP_MOVED_MIGR;
#endif
- if (test_bit(NFS_CS_DS, &clp->cl_flags))
+ if (test_bit(NFS_CS_PNFS, &clp->cl_flags))
calldata->args.flags |= EXCHGID4_FLAG_USE_PNFS_DS;
msg.rpc_argp = &calldata->args;
msg.rpc_resp = &calldata->res;
@@ -9405,7 +9618,7 @@ static void nfs41_sequence_call_done(struct rpc_task *task, void *data)
return;
trace_nfs4_sequence(clp, task->tk_status);
- if (task->tk_status < 0 && !task->tk_client->cl_shutdown) {
+ if (task->tk_status < 0 && clp->cl_cons_state >= 0) {
dprintk("%s ERROR %d\n", __func__, task->tk_status);
if (refcount_read(&clp->cl_count) == 1)
return;
@@ -9832,6 +10045,11 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
if (!nfs41_sequence_process(task, &lrp->res.seq_res))
return;
+ if (task->tk_rpc_status == -ETIMEDOUT) {
+ lrp->rpc_status = -EAGAIN;
+ lrp->res.lrs_present = 0;
+ return;
+ }
/*
* Was there an RPC level error? Assume the call succeeded,
* and that we need to release the layout
@@ -9851,13 +10069,25 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
fallthrough;
default:
task->tk_status = 0;
+ lrp->res.lrs_present = 0;
fallthrough;
case 0:
break;
+ case -NFS4ERR_BADSESSION:
+ case -NFS4ERR_DEADSESSION:
+ case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
+ nfs4_schedule_session_recovery(server->nfs_client->cl_session,
+ task->tk_status);
+ lrp->res.lrs_present = 0;
+ lrp->rpc_status = -EAGAIN;
+ task->tk_status = 0;
+ break;
case -NFS4ERR_DELAY:
- if (nfs4_async_handle_error(task, server, NULL, NULL) != -EAGAIN)
- break;
- goto out_restart;
+ if (nfs4_async_handle_error(task, server, NULL, NULL) ==
+ -EAGAIN)
+ goto out_restart;
+ lrp->res.lrs_present = 0;
+ break;
}
return;
out_restart:
@@ -9871,8 +10101,13 @@ static void nfs4_layoutreturn_release(void *calldata)
struct nfs4_layoutreturn *lrp = calldata;
struct pnfs_layout_hdr *lo = lrp->args.layout;
- pnfs_layoutreturn_free_lsegs(lo, &lrp->args.stateid, &lrp->args.range,
+ if (lrp->rpc_status == 0 || !lrp->inode)
+ pnfs_layoutreturn_free_lsegs(
+ lo, &lrp->args.stateid, &lrp->args.range,
lrp->res.lrs_present ? &lrp->res.stateid : NULL);
+ else
+ pnfs_layoutreturn_retry_later(lo, &lrp->args.stateid,
+ &lrp->args.range);
nfs4_sequence_free_slot(&lrp->res.seq_res);
if (lrp->ld_private.ops && lrp->ld_private.ops->free)
lrp->ld_private.ops->free(&lrp->ld_private);
@@ -9888,7 +10123,7 @@ static const struct rpc_call_ops nfs4_layoutreturn_call_ops = {
.rpc_release = nfs4_layoutreturn_release,
};
-int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync)
+int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, unsigned int flags)
{
struct rpc_task *task;
struct rpc_message msg = {
@@ -9911,7 +10146,7 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync)
&task_setup_data.rpc_client, &msg);
lrp->inode = nfs_igrab_and_active(lrp->args.inode);
- if (!sync) {
+ if (flags & PNFS_FL_LAYOUTRETURN_ASYNC) {
if (!lrp->inode) {
nfs4_layoutreturn_release(lrp);
return -EAGAIN;
@@ -9919,6 +10154,8 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync)
task_setup_data.flags |= RPC_TASK_ASYNC;
}
if (!lrp->inode)
+ flags |= PNFS_FL_LAYOUTRETURN_PRIVILEGED;
+ if (flags & PNFS_FL_LAYOUTRETURN_PRIVILEGED)
nfs4_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1,
1);
else
@@ -9927,7 +10164,7 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync)
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
- if (sync)
+ if (!(flags & PNFS_FL_LAYOUTRETURN_ASYNC))
status = task->tk_status;
trace_nfs4_layoutreturn(lrp->args.inode, &lrp->args.stateid, status);
dprintk("<-- %s status=%d\n", __func__, status);
@@ -10245,12 +10482,12 @@ out:
}
static int _nfs41_test_stateid(struct nfs_server *server,
- nfs4_stateid *stateid,
- const struct cred *cred)
+ const nfs4_stateid *stateid,
+ const struct cred *cred)
{
int status;
struct nfs41_test_stateid_args args = {
- .stateid = stateid,
+ .stateid = *stateid,
};
struct nfs41_test_stateid_res res;
struct rpc_message msg = {
@@ -10306,8 +10543,8 @@ static void nfs4_handle_delay_or_session_error(struct nfs_server *server,
* failed or the state ID is not currently valid.
*/
static int nfs41_test_stateid(struct nfs_server *server,
- nfs4_stateid *stateid,
- const struct cred *cred)
+ const nfs4_stateid *stateid,
+ const struct cred *cred)
{
struct nfs4_exception exception = {
.interruptible = true,
@@ -10585,7 +10822,8 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = {
| NFS_CAP_CLONE
| NFS_CAP_LAYOUTERROR
| NFS_CAP_READ_PLUS
- | NFS_CAP_MOVEABLE,
+ | NFS_CAP_MOVEABLE
+ | NFS_CAP_OFFLOAD_STATUS,
.init_client = nfs41_init_client,
.shutdown_client = nfs41_shutdown_client,
.match_stateid = nfs41_match_stateid,
@@ -10737,6 +10975,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
.close_context = nfs4_close_context,
.open_context = nfs4_atomic_open,
.have_delegation = nfs4_have_delegation,
+ .return_delegation = nfs4_inode_return_delegation,
.alloc_client = nfs4_alloc_client,
.init_client = nfs4_init_client,
.free_client = nfs4_free_client,
diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h
index 351616c61df5..f9c291e2165c 100644
--- a/fs/nfs/nfs4session.h
+++ b/fs/nfs/nfs4session.h
@@ -148,16 +148,12 @@ static inline void nfs4_copy_sessionid(struct nfs4_sessionid *dst,
memcpy(dst->data, src->data, NFS4_MAX_SESSIONID_LEN);
}
-#ifdef CONFIG_CRC32
/*
* nfs_session_id_hash - calculate the crc32 hash for the session id
* @session - pointer to session
*/
#define nfs_session_id_hash(sess_id) \
(~crc32_le(0xFFFFFFFF, &(sess_id)->data[0], sizeof((sess_id)->data)))
-#else
-#define nfs_session_id_hash(session) (0)
-#endif
#else /* defined(CONFIG_NFS_V4_1) */
static inline int nfs4_init_session(struct nfs_client *clp)
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 662e86ea3a2d..7612e977e80b 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -501,11 +501,7 @@ nfs4_alloc_state_owner(struct nfs_server *server,
sp = kzalloc(sizeof(*sp), gfp_flags);
if (!sp)
return NULL;
- sp->so_seqid.owner_id = ida_alloc(&server->openowner_id, gfp_flags);
- if (sp->so_seqid.owner_id < 0) {
- kfree(sp);
- return NULL;
- }
+ sp->so_seqid.owner_id = atomic64_inc_return(&server->owner_ctr);
sp->so_server = server;
sp->so_cred = get_cred(cred);
spin_lock_init(&sp->so_lock);
@@ -536,7 +532,6 @@ static void nfs4_free_state_owner(struct nfs4_state_owner *sp)
{
nfs4_destroy_seqid_counter(&sp->so_seqid);
put_cred(sp->so_cred);
- ida_free(&sp->so_server->openowner_id, sp->so_seqid.owner_id);
kfree(sp);
}
@@ -879,19 +874,13 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
refcount_set(&lsp->ls_count, 1);
lsp->ls_state = state;
lsp->ls_owner = owner;
- lsp->ls_seqid.owner_id = ida_alloc(&server->lockowner_id, GFP_KERNEL_ACCOUNT);
- if (lsp->ls_seqid.owner_id < 0)
- goto out_free;
+ lsp->ls_seqid.owner_id = atomic64_inc_return(&server->owner_ctr);
INIT_LIST_HEAD(&lsp->ls_locks);
return lsp;
-out_free:
- kfree(lsp);
- return NULL;
}
void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp)
{
- ida_free(&server->lockowner_id, lsp->ls_seqid.owner_id);
nfs4_destroy_seqid_counter(&lsp->ls_seqid);
kfree(lsp);
}
@@ -1094,14 +1083,12 @@ void nfs_release_seqid(struct nfs_seqid *seqid)
return;
sequence = seqid->sequence;
spin_lock(&sequence->lock);
- list_del_init(&seqid->list);
- if (!list_empty(&sequence->list)) {
- struct nfs_seqid *next;
-
- next = list_first_entry(&sequence->list,
- struct nfs_seqid, list);
+ if (list_is_first(&seqid->list, &sequence->list) &&
+ !list_is_singular(&sequence->list)) {
+ struct nfs_seqid *next = list_next_entry(seqid, list);
rpc_wake_up_queued_task(&sequence->wait, next->task);
}
+ list_del_init(&seqid->list);
spin_unlock(&sequence->lock);
}
@@ -1211,7 +1198,7 @@ void nfs4_schedule_state_manager(struct nfs_client *clp)
struct rpc_clnt *clnt = clp->cl_rpcclient;
bool swapon = false;
- if (clnt->cl_shutdown)
+ if (clp->cl_cons_state < 0)
return;
set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
@@ -1416,7 +1403,7 @@ int nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4_
dprintk("%s: scheduling stateid recovery for server %s\n", __func__,
clp->cl_hostname);
nfs4_schedule_state_manager(clp);
- return 0;
+ return clp->cl_cons_state < 0 ? clp->cl_cons_state : 0;
}
EXPORT_SYMBOL_GPL(nfs4_schedule_stateid_recovery);
@@ -1596,7 +1583,7 @@ static void nfs42_complete_copies(struct nfs4_state_owner *sp, struct nfs4_state
complete(&copy->completion);
}
}
- list_for_each_entry(copy, &sp->so_server->ss_copies, src_copies) {
+ list_for_each_entry(copy, &sp->so_server->ss_src_copies, src_copies) {
if ((test_bit(NFS_CLNT_SRC_SSC_COPY_STATE, &state->flags) &&
!nfs4_stateid_match_other(&state->stateid,
&copy->parent_src_state->stateid)))
@@ -1863,6 +1850,7 @@ static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp)
if (!nfs4_state_clear_reclaim_reboot(clp))
return;
+ pnfs_destroy_all_layouts(clp);
ops = clp->cl_mvops->reboot_recovery_ops;
cred = nfs4_get_clid_cred(clp);
err = nfs4_reclaim_complete(clp, ops, cred);
@@ -1956,6 +1944,7 @@ restart:
set_bit(ops->owner_flag_bit, &sp->so_flags);
nfs4_put_state_owner(sp);
status = nfs4_recovery_handle_error(clp, status);
+ nfs4_free_state_owners(&freeme);
return (status != 0) ? status : -EAGAIN;
}
@@ -1966,6 +1955,7 @@ restart:
}
rcu_read_unlock();
nfs4_free_state_owners(&freeme);
+ nfs_local_probe_async(clp);
if (lost_locks)
pr_warn("NFS: %s: lost %d locks\n",
clp->cl_hostname, lost_locks);
@@ -2022,6 +2012,12 @@ static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status)
nfs_mark_client_ready(clp, -EPERM);
clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
return -EPERM;
+ case -ETIMEDOUT:
+ if (clp->cl_cons_state == NFS_CS_SESSION_INITING) {
+ nfs_mark_client_ready(clp, -EIO);
+ return -EIO;
+ }
+ fallthrough;
case -EACCES:
case -NFS4ERR_DELAY:
case -EAGAIN:
@@ -2068,7 +2064,6 @@ static int nfs4_establish_lease(struct nfs_client *clp)
put_cred(cred);
if (status != 0)
return status;
- pnfs_destroy_all_layouts(clp);
return 0;
}
@@ -2116,6 +2111,7 @@ static int nfs4_try_migration(struct nfs_server *server, const struct cred *cred
{
struct nfs_client *clp = server->nfs_client;
struct nfs4_fs_locations *locations = NULL;
+ struct nfs_fattr *fattr;
struct inode *inode;
struct page *page;
int status, result;
@@ -2125,19 +2121,16 @@ static int nfs4_try_migration(struct nfs_server *server, const struct cred *cred
(unsigned long long)server->fsid.minor,
clp->cl_hostname);
- result = 0;
page = alloc_page(GFP_KERNEL);
locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL);
- if (page == NULL || locations == NULL) {
- dprintk("<-- %s: no memory\n", __func__);
- goto out;
- }
- locations->fattr = nfs_alloc_fattr();
- if (locations->fattr == NULL) {
+ fattr = nfs_alloc_fattr();
+ if (page == NULL || locations == NULL || fattr == NULL) {
dprintk("<-- %s: no memory\n", __func__);
+ result = 0;
goto out;
}
+ locations->fattr = fattr;
inode = d_inode(server->super->s_root);
result = nfs4_proc_get_locations(server, NFS_FH(inode), locations,
page, cred);
@@ -2682,6 +2675,8 @@ static void nfs4_state_manager(struct nfs_client *clp)
section = "reclaim reboot";
status = nfs4_do_reclaim(clp,
clp->cl_mvops->reboot_recovery_ops);
+ if (status == 0)
+ status = pnfs_layout_handle_reboot(clp);
if (status == -EAGAIN)
continue;
if (status < 0)
@@ -2744,7 +2739,15 @@ out_error:
pr_warn_ratelimited("NFS: state manager%s%s failed on NFSv4 server %s"
" with error %d\n", section_sep, section,
clp->cl_hostname, -status);
- ssleep(1);
+ switch (status) {
+ case -ENETDOWN:
+ case -ENETUNREACH:
+ nfs_mark_client_ready(clp, -EIO);
+ break;
+ default:
+ ssleep(1);
+ break;
+ }
out_drain:
memalloc_nofs_restore(memflags);
nfs4_end_drain_session(clp);
diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c
index 8da5a9c000f4..b29a26923ce0 100644
--- a/fs/nfs/nfs4super.c
+++ b/fs/nfs/nfs4super.c
@@ -332,6 +332,7 @@ static void __exit exit_nfs_v4(void)
nfs_dns_resolver_destroy();
}
+MODULE_DESCRIPTION("NFSv4 client support");
MODULE_LICENSE("GPL");
module_init(init_nfs_v4);
diff --git a/fs/nfs/nfs4sysctl.c b/fs/nfs/nfs4sysctl.c
index 886a7c4c60b3..d1a92d8f8ba4 100644
--- a/fs/nfs/nfs4sysctl.c
+++ b/fs/nfs/nfs4sysctl.c
@@ -17,7 +17,7 @@ static const int nfs_set_port_min;
static const int nfs_set_port_max = 65535;
static struct ctl_table_header *nfs4_callback_sysctl_table;
-static struct ctl_table nfs4_cb_sysctls[] = {
+static const struct ctl_table nfs4_cb_sysctls[] = {
{
.procname = "nfs_callback_tcpport",
.data = &nfs_callback_set_tcpport,
diff --git a/fs/nfs/nfs4trace.c b/fs/nfs/nfs4trace.c
index d22c6670f770..389941ccc9c9 100644
--- a/fs/nfs/nfs4trace.c
+++ b/fs/nfs/nfs4trace.c
@@ -2,6 +2,8 @@
/*
* Copyright (c) 2013 Trond Myklebust <Trond.Myklebust@netapp.com>
*/
+#include <uapi/linux/pr.h>
+#include <linux/blkdev.h>
#include <linux/nfs_fs.h>
#include "nfs4_fs.h"
#include "internal.h"
@@ -29,5 +31,10 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_read_error);
EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_write_error);
EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_commit_error);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bl_pr_key_reg);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bl_pr_key_reg_err);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bl_pr_key_unreg);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bl_pr_key_unreg_err);
+
EXPORT_TRACEPOINT_SYMBOL_GPL(fl_getdevinfo);
#endif
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index 10985a4b8259..deab4c0e21a0 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -47,7 +47,7 @@ DECLARE_EVENT_CLASS(nfs4_clientid_event,
TP_fast_assign(
__entry->error = error < 0 ? -error : 0;
- __assign_str(dstaddr, clp->cl_hostname);
+ __assign_str(dstaddr);
),
TP_printk(
@@ -94,8 +94,8 @@ TRACE_EVENT(nfs4_trunked_exchange_id,
TP_fast_assign(
__entry->error = error < 0 ? -error : 0;
- __assign_str(main_addr, clp->cl_hostname);
- __assign_str(trunk_addr, addr);
+ __assign_str(main_addr);
+ __assign_str(trunk_addr);
),
TP_printk(
@@ -365,7 +365,7 @@ TRACE_EVENT(nfs4_state_mgr,
TP_fast_assign(
__entry->state = clp->cl_state;
- __assign_str(hostname, clp->cl_hostname);
+ __assign_str(hostname);
),
TP_printk(
@@ -393,8 +393,8 @@ TRACE_EVENT(nfs4_state_mgr_failed,
TP_fast_assign(
__entry->error = status < 0 ? -status : 0;
__entry->state = clp->cl_state;
- __assign_str(hostname, clp->cl_hostname);
- __assign_str(section, section);
+ __assign_str(hostname);
+ __assign_str(section);
),
TP_printk(
@@ -578,7 +578,7 @@ DECLARE_EVENT_CLASS(nfs4_open_event,
__entry->fhandle = 0;
}
__entry->dir = NFS_FILEID(d_inode(ctx->dentry->d_parent));
- __assign_str(name, ctx->dentry->d_name.name);
+ __assign_str(name);
),
TP_printk(
@@ -1072,7 +1072,7 @@ DECLARE_EVENT_CLASS(nfs4_lookup_event,
__entry->dev = dir->i_sb->s_dev;
__entry->dir = NFS_FILEID(dir);
__entry->error = -error;
- __assign_str(name, name->name);
+ __assign_str(name);
),
TP_printk(
@@ -1156,8 +1156,8 @@ TRACE_EVENT(nfs4_rename,
__entry->olddir = NFS_FILEID(olddir);
__entry->newdir = NFS_FILEID(newdir);
__entry->error = error < 0 ? -error : 0;
- __assign_str(oldname, oldname->name);
- __assign_str(newname, newname->name);
+ __assign_str(oldname);
+ __assign_str(newname);
),
TP_printk(
@@ -1359,7 +1359,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_callback_event,
__entry->fileid = 0;
__entry->dev = 0;
}
- __assign_str(dstaddr, clp ? clp->cl_hostname : "unknown");
+ __assign_str(dstaddr);
),
TP_printk(
@@ -1416,7 +1416,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_callback_event,
__entry->fileid = 0;
__entry->dev = 0;
}
- __assign_str(dstaddr, clp ? clp->cl_hostname : "unknown");
+ __assign_str(dstaddr);
__entry->stateid_seq =
be32_to_cpu(stateid->seqid);
__entry->stateid_hash =
@@ -1960,7 +1960,7 @@ DECLARE_EVENT_CLASS(nfs4_deviceid_event,
),
TP_fast_assign(
- __assign_str(dstaddr, clp->cl_hostname);
+ __assign_str(dstaddr);
memcpy(__entry->deviceid, deviceid->data,
NFS4_DEVICEID4_SIZE);
),
@@ -1998,7 +1998,7 @@ DECLARE_EVENT_CLASS(nfs4_deviceid_status,
TP_fast_assign(
__entry->dev = server->s_dev;
__entry->status = status;
- __assign_str(dstaddr, server->nfs_client->cl_hostname);
+ __assign_str(dstaddr);
memcpy(__entry->deviceid, deviceid->data,
NFS4_DEVICEID4_SIZE);
),
@@ -2036,8 +2036,8 @@ TRACE_EVENT(fl_getdevinfo,
),
TP_fast_assign(
- __assign_str(mds_addr, server->nfs_client->cl_hostname);
- __assign_str(ds_ips, ds_remotestr);
+ __assign_str(mds_addr);
+ __assign_str(ds_ips);
memcpy(__entry->deviceid, deviceid->data,
NFS4_DEVICEID4_SIZE);
),
@@ -2051,13 +2051,15 @@ TRACE_EVENT(fl_getdevinfo,
DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event,
TP_PROTO(
- const struct nfs_pgio_header *hdr
+ const struct nfs_pgio_header *hdr,
+ int error
),
- TP_ARGS(hdr),
+ TP_ARGS(hdr, error),
TP_STRUCT__entry(
__field(unsigned long, error)
+ __field(unsigned long, nfs_error)
__field(dev_t, dev)
__field(u32, fhandle)
__field(u64, fileid)
@@ -2073,7 +2075,8 @@ DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event,
TP_fast_assign(
const struct inode *inode = hdr->inode;
- __entry->error = hdr->res.op_status;
+ __entry->error = -error;
+ __entry->nfs_error = hdr->res.op_status;
__entry->fhandle = nfs_fhandle_hash(hdr->args.fh);
__entry->fileid = NFS_FILEID(inode);
__entry->dev = inode->i_sb->s_dev;
@@ -2083,14 +2086,13 @@ DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event,
be32_to_cpu(hdr->args.stateid.seqid);
__entry->stateid_hash =
nfs_stateid_hash(&hdr->args.stateid);
- __assign_str(dstaddr, hdr->ds_clp ?
- rpc_peeraddr2str(hdr->ds_clp->cl_rpcclient,
- RPC_DISPLAY_ADDR) : "unknown");
+ __assign_str(dstaddr);
),
TP_printk(
"error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
- "offset=%llu count=%u stateid=%d:0x%08x dstaddr=%s",
+ "offset=%llu count=%u stateid=%d:0x%08x dstaddr=%s "
+ "nfs_error=%lu (%s)",
-__entry->error,
show_nfs4_status(__entry->error),
MAJOR(__entry->dev), MINOR(__entry->dev),
@@ -2098,28 +2100,32 @@ DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event,
__entry->fhandle,
__entry->offset, __entry->count,
__entry->stateid_seq, __entry->stateid_hash,
- __get_str(dstaddr)
+ __get_str(dstaddr), __entry->nfs_error,
+ show_nfs4_status(__entry->nfs_error)
)
);
#define DEFINE_NFS4_FLEXFILES_IO_EVENT(name) \
DEFINE_EVENT(nfs4_flexfiles_io_event, name, \
TP_PROTO( \
- const struct nfs_pgio_header *hdr \
+ const struct nfs_pgio_header *hdr, \
+ int error \
), \
- TP_ARGS(hdr))
+ TP_ARGS(hdr, error))
DEFINE_NFS4_FLEXFILES_IO_EVENT(ff_layout_read_error);
DEFINE_NFS4_FLEXFILES_IO_EVENT(ff_layout_write_error);
TRACE_EVENT(ff_layout_commit_error,
TP_PROTO(
- const struct nfs_commit_data *data
+ const struct nfs_commit_data *data,
+ int error
),
- TP_ARGS(data),
+ TP_ARGS(data, error),
TP_STRUCT__entry(
__field(unsigned long, error)
+ __field(unsigned long, nfs_error)
__field(dev_t, dev)
__field(u32, fhandle)
__field(u64, fileid)
@@ -2133,30 +2139,118 @@ TRACE_EVENT(ff_layout_commit_error,
TP_fast_assign(
const struct inode *inode = data->inode;
- __entry->error = data->res.op_status;
+ __entry->error = -error;
+ __entry->nfs_error = data->res.op_status;
__entry->fhandle = nfs_fhandle_hash(data->args.fh);
__entry->fileid = NFS_FILEID(inode);
__entry->dev = inode->i_sb->s_dev;
__entry->offset = data->args.offset;
__entry->count = data->args.count;
- __assign_str(dstaddr, data->ds_clp ?
- rpc_peeraddr2str(data->ds_clp->cl_rpcclient,
- RPC_DISPLAY_ADDR) : "unknown");
+ __assign_str(dstaddr);
),
TP_printk(
"error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
- "offset=%llu count=%u dstaddr=%s",
+ "offset=%llu count=%u dstaddr=%s nfs_error=%lu (%s)",
-__entry->error,
show_nfs4_status(__entry->error),
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle,
__entry->offset, __entry->count,
- __get_str(dstaddr)
+ __get_str(dstaddr), __entry->nfs_error,
+ show_nfs4_status(__entry->nfs_error)
)
);
+DECLARE_EVENT_CLASS(pnfs_bl_pr_key_class,
+ TP_PROTO(
+ const struct block_device *bdev,
+ u64 key
+ ),
+ TP_ARGS(bdev, key),
+ TP_STRUCT__entry(
+ __field(u64, key)
+ __field(dev_t, dev)
+ __string(device, bdev->bd_disk->disk_name)
+ ),
+ TP_fast_assign(
+ __entry->key = key;
+ __entry->dev = bdev->bd_dev;
+ __assign_str(device);
+ ),
+ TP_printk("dev=%d,%d (%s) key=0x%016llx",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __get_str(device), __entry->key
+ )
+);
+
+#define DEFINE_NFS4_BLOCK_PRKEY_EVENT(name) \
+ DEFINE_EVENT(pnfs_bl_pr_key_class, name, \
+ TP_PROTO( \
+ const struct block_device *bdev, \
+ u64 key \
+ ), \
+ TP_ARGS(bdev, key))
+DEFINE_NFS4_BLOCK_PRKEY_EVENT(bl_pr_key_reg);
+DEFINE_NFS4_BLOCK_PRKEY_EVENT(bl_pr_key_unreg);
+
+/*
+ * From uapi/linux/pr.h
+ */
+TRACE_DEFINE_ENUM(PR_STS_SUCCESS);
+TRACE_DEFINE_ENUM(PR_STS_IOERR);
+TRACE_DEFINE_ENUM(PR_STS_RESERVATION_CONFLICT);
+TRACE_DEFINE_ENUM(PR_STS_RETRY_PATH_FAILURE);
+TRACE_DEFINE_ENUM(PR_STS_PATH_FAST_FAILED);
+TRACE_DEFINE_ENUM(PR_STS_PATH_FAILED);
+
+#define show_pr_status(x) \
+ __print_symbolic(x, \
+ { PR_STS_SUCCESS, "SUCCESS" }, \
+ { PR_STS_IOERR, "IOERR" }, \
+ { PR_STS_RESERVATION_CONFLICT, "RESERVATION_CONFLICT" }, \
+ { PR_STS_RETRY_PATH_FAILURE, "RETRY_PATH_FAILURE" }, \
+ { PR_STS_PATH_FAST_FAILED, "PATH_FAST_FAILED" }, \
+ { PR_STS_PATH_FAILED, "PATH_FAILED" })
+
+DECLARE_EVENT_CLASS(pnfs_bl_pr_key_err_class,
+ TP_PROTO(
+ const struct block_device *bdev,
+ u64 key,
+ int status
+ ),
+ TP_ARGS(bdev, key, status),
+ TP_STRUCT__entry(
+ __field(u64, key)
+ __field(dev_t, dev)
+ __field(unsigned long, status)
+ __string(device, bdev->bd_disk->disk_name)
+ ),
+ TP_fast_assign(
+ __entry->key = key;
+ __entry->dev = bdev->bd_dev;
+ __entry->status = status;
+ __assign_str(device);
+ ),
+ TP_printk("dev=%d,%d (%s) key=0x%016llx status=%s",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __get_str(device), __entry->key,
+ show_pr_status(__entry->status)
+ )
+);
+
+#define DEFINE_NFS4_BLOCK_PRKEY_ERR_EVENT(name) \
+ DEFINE_EVENT(pnfs_bl_pr_key_err_class, name, \
+ TP_PROTO( \
+ const struct block_device *bdev, \
+ u64 key, \
+ int status \
+ ), \
+ TP_ARGS(bdev, key, status))
+DEFINE_NFS4_BLOCK_PRKEY_ERR_EVENT(bl_pr_key_reg_err);
+DEFINE_NFS4_BLOCK_PRKEY_ERR_EVENT(bl_pr_key_unreg_err);
+
#ifdef CONFIG_NFS_V4_2
TRACE_DEFINE_ENUM(NFS4_CONTENT_DATA);
TRACE_DEFINE_ENUM(NFS4_CONTENT_HOLE);
@@ -2524,7 +2618,7 @@ TRACE_EVENT(nfs4_copy_notify,
)
);
-TRACE_EVENT(nfs4_offload_cancel,
+DECLARE_EVENT_CLASS(nfs4_offload_class,
TP_PROTO(
const struct nfs42_offload_status_args *args,
int error
@@ -2556,6 +2650,15 @@ TRACE_EVENT(nfs4_offload_cancel,
__entry->stateid_seq, __entry->stateid_hash
)
);
+#define DEFINE_NFS4_OFFLOAD_EVENT(name) \
+ DEFINE_EVENT(nfs4_offload_class, name, \
+ TP_PROTO( \
+ const struct nfs42_offload_status_args *args, \
+ int error \
+ ), \
+ TP_ARGS(args, error))
+DEFINE_NFS4_OFFLOAD_EVENT(nfs4_offload_cancel);
+DEFINE_NFS4_OFFLOAD_EVENT(nfs4_offload_status);
DECLARE_EVENT_CLASS(nfs4_xattr_event,
TP_PROTO(
@@ -2579,7 +2682,7 @@ DECLARE_EVENT_CLASS(nfs4_xattr_event,
__entry->dev = inode->i_sb->s_dev;
__entry->fileid = NFS_FILEID(inode);
__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
- __assign_str(name, name);
+ __assign_str(name);
),
TP_printk(
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 1416099dfcd1..55bef5fbfa47 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -52,6 +52,7 @@
#include <linux/nfs.h>
#include <linux/nfs4.h>
#include <linux/nfs_fs.h>
+#include <linux/nfs_common.h>
#include "nfs4_fs.h"
#include "nfs4trace.h"
@@ -63,11 +64,7 @@
#define NFSDBG_FACILITY NFSDBG_XDR
-/* Mapping from NFS error code to "errno" error code. */
-#define errno_NFSERR_IO EIO
-
struct compound_hdr;
-static int nfs4_stat_to_errno(int);
static void encode_layoutget(struct xdr_stream *xdr,
const struct nfs4_layoutget_args *args,
struct compound_hdr *hdr);
@@ -85,9 +82,8 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
* we currently use size 2 (u64) out of (NFS4_OPAQUE_LIMIT >> 2)
*/
#define pagepad_maxsz (1)
-#define open_owner_id_maxsz (1 + 2 + 1 + 1 + 2)
-#define lock_owner_id_maxsz (1 + 1 + 4)
-#define decode_lockowner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
+#define open_owner_id_maxsz (2 + 1 + 2 + 2)
+#define lock_owner_id_maxsz (2 + 1 + 2)
#define compound_encode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2))
#define compound_decode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2))
#define op_encode_hdr_maxsz (1)
@@ -188,7 +184,7 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
#define encode_claim_null_maxsz (1 + nfs4_name_maxsz)
#define encode_open_maxsz (op_encode_hdr_maxsz + \
2 + encode_share_access_maxsz + 2 + \
- open_owner_id_maxsz + \
+ 1 + open_owner_id_maxsz + \
encode_opentype_maxsz + \
encode_claim_null_maxsz)
#define decode_space_limit_maxsz (3)
@@ -224,6 +220,11 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
encode_attrs_maxsz)
#define decode_setattr_maxsz (op_decode_hdr_maxsz + \
nfs4_fattr_bitmap_maxsz)
+#define encode_delegattr_maxsz (op_encode_hdr_maxsz + \
+ encode_stateid_maxsz + \
+ nfs4_fattr_bitmap_maxsz + \
+ 2*nfstime4_maxsz)
+#define decode_delegattr_maxsz (decode_setattr_maxsz)
#define encode_read_maxsz (op_encode_hdr_maxsz + \
encode_stateid_maxsz + 3)
#define decode_read_maxsz (op_decode_hdr_maxsz + 2 + pagepad_maxsz)
@@ -253,13 +254,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
#define encode_link_maxsz (op_encode_hdr_maxsz + \
nfs4_name_maxsz)
#define decode_link_maxsz (op_decode_hdr_maxsz + decode_change_info_maxsz)
-#define encode_lockowner_maxsz (7)
+#define encode_lockowner_maxsz (2 + 1 + lock_owner_id_maxsz)
+
#define encode_lock_maxsz (op_encode_hdr_maxsz + \
7 + \
1 + encode_stateid_maxsz + 1 + \
encode_lockowner_maxsz)
#define decode_lock_denied_maxsz \
- (8 + decode_lockowner_maxsz)
+ (2 + 2 + 1 + 2 + 1 + lock_owner_id_maxsz)
#define decode_lock_maxsz (op_decode_hdr_maxsz + \
decode_lock_denied_maxsz)
#define encode_lockt_maxsz (op_encode_hdr_maxsz + 5 + \
@@ -615,7 +617,7 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
encode_lockowner_maxsz)
#define NFS4_dec_release_lockowner_sz \
(compound_decode_hdr_maxsz + \
- decode_lockowner_maxsz)
+ decode_release_lockowner_maxsz)
#define NFS4_enc_access_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_putfh_maxsz + \
@@ -758,12 +760,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
encode_sequence_maxsz + \
encode_putfh_maxsz + \
encode_layoutreturn_maxsz + \
+ encode_delegattr_maxsz + \
encode_delegreturn_maxsz + \
encode_getattr_maxsz)
#define NFS4_dec_delegreturn_sz (compound_decode_hdr_maxsz + \
decode_sequence_maxsz + \
decode_putfh_maxsz + \
decode_layoutreturn_maxsz + \
+ decode_delegattr_maxsz + \
decode_delegreturn_maxsz + \
decode_getattr_maxsz)
#define NFS4_enc_getacl_sz (compound_encode_hdr_maxsz + \
@@ -968,11 +972,6 @@ static __be32 *reserve_space(struct xdr_stream *xdr, size_t nbytes)
return p;
}
-static void encode_opaque_fixed(struct xdr_stream *xdr, const void *buf, size_t len)
-{
- WARN_ON_ONCE(xdr_stream_encode_opaque_fixed(xdr, buf, len) < 0);
-}
-
static void encode_string(struct xdr_stream *xdr, unsigned int len, const char *str)
{
WARN_ON_ONCE(xdr_stream_encode_opaque(xdr, str, len) < 0);
@@ -1060,9 +1059,10 @@ static void encode_nops(struct compound_hdr *hdr)
*hdr->nops_p = htonl(hdr->nops);
}
-static void encode_nfs4_stateid(struct xdr_stream *xdr, const nfs4_stateid *stateid)
+static void encode_nfs4_stateid(struct xdr_stream *xdr,
+ const nfs4_stateid *stateid)
{
- encode_opaque_fixed(xdr, stateid, NFS4_STATEID_SIZE);
+ encode_opaque_fixed(xdr, stateid->data, NFS4_STATEID_SIZE);
}
static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *verf)
@@ -1412,16 +1412,16 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena
__be32 *p;
/*
* opcode 4, seqid 4, share_access 4, share_deny 4, clientid 8, ownerlen 4,
- * owner 4 = 32
+ * owner 28
*/
encode_nfs4_seqid(xdr, arg->seqid);
encode_share_access(xdr, arg->share_access);
- p = reserve_space(xdr, 36);
+ p = reserve_space(xdr, 40);
p = xdr_encode_hyper(p, arg->clientid);
- *p++ = cpu_to_be32(24);
+ *p++ = cpu_to_be32(28);
p = xdr_encode_opaque_fixed(p, "open id:", 8);
*p++ = cpu_to_be32(arg->server->s_dev);
- *p++ = cpu_to_be32(arg->id.uniquifier);
+ p = xdr_encode_hyper(p, arg->id.uniquifier);
xdr_encode_hyper(p, arg->id.create_time);
}
@@ -1468,20 +1468,18 @@ static void encode_opentype(struct xdr_stream *xdr, const struct nfs_openargs *a
}
}
-static inline void encode_delegation_type(struct xdr_stream *xdr, fmode_t delegation_type)
+static inline void encode_delegation_type(struct xdr_stream *xdr, u32 delegation_type)
{
__be32 *p;
p = reserve_space(xdr, 4);
switch (delegation_type) {
- case 0:
- *p = cpu_to_be32(NFS4_OPEN_DELEGATE_NONE);
- break;
- case FMODE_READ:
- *p = cpu_to_be32(NFS4_OPEN_DELEGATE_READ);
- break;
- case FMODE_WRITE|FMODE_READ:
- *p = cpu_to_be32(NFS4_OPEN_DELEGATE_WRITE);
+ case NFS4_OPEN_DELEGATE_NONE:
+ case NFS4_OPEN_DELEGATE_READ:
+ case NFS4_OPEN_DELEGATE_WRITE:
+ case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG:
+ case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG:
+ *p = cpu_to_be32(delegation_type);
break;
default:
BUG();
@@ -1497,7 +1495,7 @@ static inline void encode_claim_null(struct xdr_stream *xdr, const struct qstr *
encode_string(xdr, name->len, name->name);
}
-static inline void encode_claim_previous(struct xdr_stream *xdr, fmode_t type)
+static inline void encode_claim_previous(struct xdr_stream *xdr, u32 type)
{
__be32 *p;
@@ -1735,6 +1733,33 @@ static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs
server->attr_bitmask);
}
+static void encode_delegattr(struct xdr_stream *xdr,
+ const nfs4_stateid *stateid,
+ const struct nfs4_delegattr *attr,
+ struct compound_hdr *hdr)
+{
+ uint32_t bitmap[3] = { 0 };
+ uint32_t len = 0;
+ __be32 *p;
+
+ encode_op_hdr(xdr, OP_SETATTR, encode_delegattr_maxsz, hdr);
+ encode_nfs4_stateid(xdr, stateid);
+ if (attr->atime_set) {
+ bitmap[2] |= FATTR4_WORD2_TIME_DELEG_ACCESS;
+ len += (nfstime4_maxsz << 2);
+ }
+ if (attr->mtime_set) {
+ bitmap[2] |= FATTR4_WORD2_TIME_DELEG_MODIFY;
+ len += (nfstime4_maxsz << 2);
+ }
+ xdr_encode_bitmap4(xdr, bitmap, ARRAY_SIZE(bitmap));
+ xdr_stream_encode_opaque_inline(xdr, (void **)&p, len);
+ if (bitmap[2] & FATTR4_WORD2_TIME_DELEG_ACCESS)
+ p = xdr_encode_nfstime4(p, &attr->atime);
+ if (bitmap[2] & FATTR4_WORD2_TIME_DELEG_MODIFY)
+ p = xdr_encode_nfstime4(p, &attr->mtime);
+}
+
static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclientid *setclientid, struct compound_hdr *hdr)
{
__be32 *p;
@@ -2105,7 +2130,7 @@ static void encode_test_stateid(struct xdr_stream *xdr,
{
encode_op_hdr(xdr, OP_TEST_STATEID, decode_test_stateid_maxsz, hdr);
encode_uint32(xdr, 1);
- encode_nfs4_stateid(xdr, args->stateid);
+ encode_nfs4_stateid(xdr, &args->stateid);
}
static void encode_free_stateid(struct xdr_stream *xdr,
@@ -2812,6 +2837,8 @@ static void nfs4_xdr_enc_delegreturn(struct rpc_rqst *req,
encode_putfh(xdr, args->fhandle, &hdr);
if (args->lr_args)
encode_layoutreturn(xdr, args->lr_args, &hdr);
+ if (args->sattr_args)
+ encode_delegattr(xdr, args->stateid, args->sattr_args, &hdr);
if (args->bitmask)
encode_getfattr(xdr, args->bitmask, &hdr);
encode_delegreturn(xdr, args->stateid, &hdr);
@@ -3412,7 +3439,7 @@ static int decode_attr_link_support(struct xdr_stream *xdr, uint32_t *bitmap, ui
*res = be32_to_cpup(p);
bitmap[0] &= ~FATTR4_WORD0_LINK_SUPPORT;
}
- dprintk("%s: link support=%s\n", __func__, *res == 0 ? "false" : "true");
+ dprintk("%s: link support=%s\n", __func__, str_false_true(*res == 0));
return 0;
}
@@ -3430,7 +3457,7 @@ static int decode_attr_symlink_support(struct xdr_stream *xdr, uint32_t *bitmap,
*res = be32_to_cpup(p);
bitmap[0] &= ~FATTR4_WORD0_SYMLINK_SUPPORT;
}
- dprintk("%s: symlink support=%s\n", __func__, *res == 0 ? "false" : "true");
+ dprintk("%s: symlink support=%s\n", __func__, str_false_true(*res == 0));
return 0;
}
@@ -3572,7 +3599,7 @@ static int decode_attr_case_insensitive(struct xdr_stream *xdr, uint32_t *bitmap
*res = be32_to_cpup(p);
bitmap[0] &= ~FATTR4_WORD0_CASE_INSENSITIVE;
}
- dprintk("%s: case_insensitive=%s\n", __func__, *res == 0 ? "false" : "true");
+ dprintk("%s: case_insensitive=%s\n", __func__, str_false_true(*res == 0));
return 0;
}
@@ -3590,7 +3617,7 @@ static int decode_attr_case_preserving(struct xdr_stream *xdr, uint32_t *bitmap,
*res = be32_to_cpup(p);
bitmap[0] &= ~FATTR4_WORD0_CASE_PRESERVING;
}
- dprintk("%s: case_preserving=%s\n", __func__, *res == 0 ? "false" : "true");
+ dprintk("%s: case_preserving=%s\n", __func__, str_false_true(*res == 0));
return 0;
}
@@ -4298,8 +4325,29 @@ static int decode_attr_xattrsupport(struct xdr_stream *xdr, uint32_t *bitmap,
*res = be32_to_cpup(p);
bitmap[2] &= ~FATTR4_WORD2_XATTR_SUPPORT;
}
- dprintk("%s: XATTR support=%s\n", __func__,
- *res == 0 ? "false" : "true");
+ dprintk("%s: XATTR support=%s\n", __func__, str_false_true(*res == 0));
+ return 0;
+}
+
+static int decode_attr_open_arguments(struct xdr_stream *xdr, uint32_t *bitmap,
+ struct nfs4_open_caps *res)
+{
+ memset(res, 0, sizeof(*res));
+ if (unlikely(bitmap[2] & (FATTR4_WORD2_OPEN_ARGUMENTS - 1U)))
+ return -EIO;
+ if (likely(bitmap[2] & FATTR4_WORD2_OPEN_ARGUMENTS)) {
+ if (decode_bitmap4(xdr, res->oa_share_access, ARRAY_SIZE(res->oa_share_access)) < 0)
+ return -EIO;
+ if (decode_bitmap4(xdr, res->oa_share_deny, ARRAY_SIZE(res->oa_share_deny)) < 0)
+ return -EIO;
+ if (decode_bitmap4(xdr, res->oa_share_access_want, ARRAY_SIZE(res->oa_share_access_want)) < 0)
+ return -EIO;
+ if (decode_bitmap4(xdr, res->oa_open_claim, ARRAY_SIZE(res->oa_open_claim)) < 0)
+ return -EIO;
+ if (decode_bitmap4(xdr, res->oa_createmode, ARRAY_SIZE(res->oa_createmode)) < 0)
+ return -EIO;
+ bitmap[2] &= ~FATTR4_WORD2_OPEN_ARGUMENTS;
+ }
return 0;
}
@@ -4352,14 +4400,6 @@ static int decode_access(struct xdr_stream *xdr, u32 *supported, u32 *access)
return 0;
}
-static int decode_opaque_fixed(struct xdr_stream *xdr, void *buf, size_t len)
-{
- ssize_t ret = xdr_stream_decode_opaque_fixed(xdr, buf, len);
- if (unlikely(ret < 0))
- return -EIO;
- return 0;
-}
-
static int decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
{
return decode_opaque_fixed(xdr, stateid, NFS4_STATEID_SIZE);
@@ -4477,6 +4517,8 @@ static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_re
if ((status = decode_attr_exclcreat_supported(xdr, bitmap,
res->exclcreat_bitmask)) != 0)
goto xdr_error;
+ if ((status = decode_attr_open_arguments(xdr, bitmap, &res->open_caps)) != 0)
+ goto xdr_error;
status = verify_attr_len(xdr, savep, attrlen);
xdr_error:
dprintk("%s: xdr returned %d!\n", __func__, -status);
@@ -5035,7 +5077,7 @@ static int decode_link(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
/*
* We create the owner, so we know a proper owner.id length is 4.
*/
-static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl)
+static int decode_lock_denied(struct xdr_stream *xdr, struct file_lock *fl)
{
uint64_t offset, length, clientid;
__be32 *p;
@@ -5148,13 +5190,12 @@ static int decode_space_limit(struct xdr_stream *xdr,
}
static int decode_rw_delegation(struct xdr_stream *xdr,
- uint32_t delegation_type,
- struct nfs_openres *res)
+ struct nfs4_open_delegation *res)
{
__be32 *p;
int status;
- status = decode_delegation_stateid(xdr, &res->delegation);
+ status = decode_delegation_stateid(xdr, &res->stateid);
if (unlikely(status))
return status;
p = xdr_inline_decode(xdr, 4);
@@ -5162,52 +5203,57 @@ static int decode_rw_delegation(struct xdr_stream *xdr,
return -EIO;
res->do_recall = be32_to_cpup(p);
- switch (delegation_type) {
+ switch (res->open_delegation_type) {
case NFS4_OPEN_DELEGATE_READ:
- res->delegation_type = FMODE_READ;
+ case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG:
+ res->type = FMODE_READ;
break;
case NFS4_OPEN_DELEGATE_WRITE:
- res->delegation_type = FMODE_WRITE|FMODE_READ;
+ case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG:
+ res->type = FMODE_WRITE|FMODE_READ;
if (decode_space_limit(xdr, &res->pagemod_limit) < 0)
return -EIO;
}
return decode_ace(xdr, NULL);
}
-static int decode_no_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
+static int decode_no_delegation(struct xdr_stream *xdr,
+ struct nfs4_open_delegation *res)
{
__be32 *p;
- uint32_t why_no_delegation;
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
return -EIO;
- why_no_delegation = be32_to_cpup(p);
- switch (why_no_delegation) {
+ res->why_no_delegation = be32_to_cpup(p);
+ switch (res->why_no_delegation) {
case WND4_CONTENTION:
case WND4_RESOURCE:
- xdr_inline_decode(xdr, 4);
- /* Ignore for now */
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ return -EIO;
+ res->will_notify = be32_to_cpup(p);
}
return 0;
}
-static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
+static int decode_delegation(struct xdr_stream *xdr,
+ struct nfs4_open_delegation *res)
{
__be32 *p;
- uint32_t delegation_type;
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
return -EIO;
- delegation_type = be32_to_cpup(p);
- res->delegation_type = 0;
- switch (delegation_type) {
+ res->open_delegation_type = be32_to_cpup(p);
+ switch (res->open_delegation_type) {
case NFS4_OPEN_DELEGATE_NONE:
return 0;
case NFS4_OPEN_DELEGATE_READ:
case NFS4_OPEN_DELEGATE_WRITE:
- return decode_rw_delegation(xdr, delegation_type, res);
+ case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG:
+ case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG:
+ return decode_rw_delegation(xdr, res);
case NFS4_OPEN_DELEGATE_NONE_EXT:
return decode_no_delegation(xdr, res);
}
@@ -5248,7 +5294,7 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
for (; i < NFS4_BITMAP_SIZE; i++)
res->attrset[i] = 0;
- return decode_delegation(xdr, res);
+ return decode_delegation(xdr, &res->delegation);
xdr_error:
dprintk("%s: Bitmap too large! Length = %u\n", __func__, bmlen);
return -EIO;
@@ -5480,6 +5526,11 @@ static int decode_setattr(struct xdr_stream *xdr)
return -EIO;
}
+static int decode_delegattr(struct xdr_stream *xdr)
+{
+ return decode_setattr(xdr);
+}
+
static int decode_setclientid(struct xdr_stream *xdr, struct nfs4_setclientid_res *res)
{
__be32 *p;
@@ -7052,6 +7103,12 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp,
if (status)
goto out;
}
+ if (res->sattr_res) {
+ status = decode_delegattr(xdr);
+ res->sattr_ret = status;
+ if (status)
+ goto out;
+ }
if (res->fattr) {
status = decode_getfattr(xdr, res->fattr, res->server);
if (status != 0)
@@ -7547,72 +7604,6 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
return 0;
}
-/*
- * We need to translate between nfs status return values and
- * the local errno values which may not be the same.
- */
-static struct {
- int stat;
- int errno;
-} nfs_errtbl[] = {
- { NFS4_OK, 0 },
- { NFS4ERR_PERM, -EPERM },
- { NFS4ERR_NOENT, -ENOENT },
- { NFS4ERR_IO, -errno_NFSERR_IO},
- { NFS4ERR_NXIO, -ENXIO },
- { NFS4ERR_ACCESS, -EACCES },
- { NFS4ERR_EXIST, -EEXIST },
- { NFS4ERR_XDEV, -EXDEV },
- { NFS4ERR_NOTDIR, -ENOTDIR },
- { NFS4ERR_ISDIR, -EISDIR },
- { NFS4ERR_INVAL, -EINVAL },
- { NFS4ERR_FBIG, -EFBIG },
- { NFS4ERR_NOSPC, -ENOSPC },
- { NFS4ERR_ROFS, -EROFS },
- { NFS4ERR_MLINK, -EMLINK },
- { NFS4ERR_NAMETOOLONG, -ENAMETOOLONG },
- { NFS4ERR_NOTEMPTY, -ENOTEMPTY },
- { NFS4ERR_DQUOT, -EDQUOT },
- { NFS4ERR_STALE, -ESTALE },
- { NFS4ERR_BADHANDLE, -EBADHANDLE },
- { NFS4ERR_BAD_COOKIE, -EBADCOOKIE },
- { NFS4ERR_NOTSUPP, -ENOTSUPP },
- { NFS4ERR_TOOSMALL, -ETOOSMALL },
- { NFS4ERR_SERVERFAULT, -EREMOTEIO },
- { NFS4ERR_BADTYPE, -EBADTYPE },
- { NFS4ERR_LOCKED, -EAGAIN },
- { NFS4ERR_SYMLINK, -ELOOP },
- { NFS4ERR_OP_ILLEGAL, -EOPNOTSUPP },
- { NFS4ERR_DEADLOCK, -EDEADLK },
- { NFS4ERR_NOXATTR, -ENODATA },
- { NFS4ERR_XATTR2BIG, -E2BIG },
- { -1, -EIO }
-};
-
-/*
- * Convert an NFS error code to a local one.
- * This one is used jointly by NFSv2 and NFSv3.
- */
-static int
-nfs4_stat_to_errno(int stat)
-{
- int i;
- for (i = 0; nfs_errtbl[i].stat != -1; i++) {
- if (nfs_errtbl[i].stat == stat)
- return nfs_errtbl[i].errno;
- }
- if (stat <= 10000 || stat > 10100) {
- /* The server is looney tunes. */
- return -EREMOTEIO;
- }
- /* If we cannot translate the error, the recovery routines should
- * handle it.
- * Note: remaining NFSv4 error codes have values > 10000, so should
- * not conflict with native Linux error codes.
- */
- return -stat;
-}
-
#ifdef CONFIG_NFS_V4_2
#include "nfs42xdr.c"
#endif /* CONFIG_NFS_V4_2 */
@@ -7711,6 +7702,7 @@ const struct rpc_procinfo nfs4_procedures[] = {
PROC42(CLONE, enc_clone, dec_clone),
PROC42(COPY, enc_copy, dec_copy),
PROC42(OFFLOAD_CANCEL, enc_offload_cancel, dec_offload_cancel),
+ PROC42(OFFLOAD_STATUS, enc_offload_status, dec_offload_status),
PROC42(COPY_NOTIFY, enc_copy_notify, dec_copy_notify),
PROC(LOOKUPP, enc_lookupp, dec_lookupp),
PROC42(LAYOUTERROR, enc_layouterror, dec_layouterror),
diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
index afedb449b54f..7a058bd8c566 100644
--- a/fs/nfs/nfstrace.h
+++ b/fs/nfs/nfstrace.h
@@ -409,7 +409,7 @@ DECLARE_EVENT_CLASS(nfs_lookup_event,
__entry->dir = NFS_FILEID(dir);
__entry->flags = flags;
__entry->fileid = d_is_negative(dentry) ? 0 : NFS_FILEID(d_inode(dentry));
- __assign_str(name, dentry->d_name.name);
+ __assign_str(name);
),
TP_printk(
@@ -457,7 +457,7 @@ DECLARE_EVENT_CLASS(nfs_lookup_event_done,
__entry->error = error < 0 ? -error : 0;
__entry->flags = flags;
__entry->fileid = d_is_negative(dentry) ? 0 : NFS_FILEID(d_inode(dentry));
- __assign_str(name, dentry->d_name.name);
+ __assign_str(name);
),
TP_printk(
@@ -512,7 +512,7 @@ TRACE_EVENT(nfs_atomic_open_enter,
__entry->dir = NFS_FILEID(dir);
__entry->flags = flags;
__entry->fmode = (__force unsigned long)ctx->mode;
- __assign_str(name, ctx->dentry->d_name.name);
+ __assign_str(name);
),
TP_printk(
@@ -551,7 +551,7 @@ TRACE_EVENT(nfs_atomic_open_exit,
__entry->dir = NFS_FILEID(dir);
__entry->flags = flags;
__entry->fmode = (__force unsigned long)ctx->mode;
- __assign_str(name, ctx->dentry->d_name.name);
+ __assign_str(name);
),
TP_printk(
@@ -587,7 +587,7 @@ TRACE_EVENT(nfs_create_enter,
__entry->dev = dir->i_sb->s_dev;
__entry->dir = NFS_FILEID(dir);
__entry->flags = flags;
- __assign_str(name, dentry->d_name.name);
+ __assign_str(name);
),
TP_printk(
@@ -623,7 +623,7 @@ TRACE_EVENT(nfs_create_exit,
__entry->dev = dir->i_sb->s_dev;
__entry->dir = NFS_FILEID(dir);
__entry->flags = flags;
- __assign_str(name, dentry->d_name.name);
+ __assign_str(name);
),
TP_printk(
@@ -654,7 +654,7 @@ DECLARE_EVENT_CLASS(nfs_directory_event,
TP_fast_assign(
__entry->dev = dir->i_sb->s_dev;
__entry->dir = NFS_FILEID(dir);
- __assign_str(name, dentry->d_name.name);
+ __assign_str(name);
),
TP_printk(
@@ -693,7 +693,7 @@ DECLARE_EVENT_CLASS(nfs_directory_event_done,
__entry->dev = dir->i_sb->s_dev;
__entry->dir = NFS_FILEID(dir);
__entry->error = error < 0 ? -error : 0;
- __assign_str(name, dentry->d_name.name);
+ __assign_str(name);
),
TP_printk(
@@ -747,7 +747,7 @@ TRACE_EVENT(nfs_link_enter,
__entry->dev = inode->i_sb->s_dev;
__entry->fileid = NFS_FILEID(inode);
__entry->dir = NFS_FILEID(dir);
- __assign_str(name, dentry->d_name.name);
+ __assign_str(name);
),
TP_printk(
@@ -783,7 +783,7 @@ TRACE_EVENT(nfs_link_exit,
__entry->fileid = NFS_FILEID(inode);
__entry->dir = NFS_FILEID(dir);
__entry->error = error < 0 ? -error : 0;
- __assign_str(name, dentry->d_name.name);
+ __assign_str(name);
),
TP_printk(
@@ -819,8 +819,8 @@ DECLARE_EVENT_CLASS(nfs_rename_event,
__entry->dev = old_dir->i_sb->s_dev;
__entry->old_dir = NFS_FILEID(old_dir);
__entry->new_dir = NFS_FILEID(new_dir);
- __assign_str(old_name, old_dentry->d_name.name);
- __assign_str(new_name, new_dentry->d_name.name);
+ __assign_str(old_name);
+ __assign_str(new_name);
),
TP_printk(
@@ -868,8 +868,8 @@ DECLARE_EVENT_CLASS(nfs_rename_event_done,
__entry->error = -error;
__entry->old_dir = NFS_FILEID(old_dir);
__entry->new_dir = NFS_FILEID(new_dir);
- __assign_str(old_name, old_dentry->d_name.name);
- __assign_str(new_name, new_dentry->d_name.name);
+ __assign_str(old_name);
+ __assign_str(new_name);
),
TP_printk(
@@ -939,10 +939,11 @@ TRACE_EVENT(nfs_sillyrename_unlink,
DECLARE_EVENT_CLASS(nfs_folio_event,
TP_PROTO(
const struct inode *inode,
- struct folio *folio
+ loff_t offset,
+ size_t count
),
- TP_ARGS(inode, folio),
+ TP_ARGS(inode, offset, count),
TP_STRUCT__entry(
__field(dev_t, dev)
@@ -950,7 +951,7 @@ DECLARE_EVENT_CLASS(nfs_folio_event,
__field(u64, fileid)
__field(u64, version)
__field(loff_t, offset)
- __field(u32, count)
+ __field(size_t, count)
),
TP_fast_assign(
@@ -960,13 +961,13 @@ DECLARE_EVENT_CLASS(nfs_folio_event,
__entry->fileid = nfsi->fileid;
__entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
__entry->version = inode_peek_iversion_raw(inode);
- __entry->offset = folio_file_pos(folio);
- __entry->count = nfs_folio_length(folio);
+ __entry->offset = offset,
+ __entry->count = count;
),
TP_printk(
"fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu "
- "offset=%lld count=%u",
+ "offset=%lld count=%zu",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle, __entry->version,
@@ -978,18 +979,20 @@ DECLARE_EVENT_CLASS(nfs_folio_event,
DEFINE_EVENT(nfs_folio_event, name, \
TP_PROTO( \
const struct inode *inode, \
- struct folio *folio \
+ loff_t offset, \
+ size_t count \
), \
- TP_ARGS(inode, folio))
+ TP_ARGS(inode, offset, count))
DECLARE_EVENT_CLASS(nfs_folio_event_done,
TP_PROTO(
const struct inode *inode,
- struct folio *folio,
+ loff_t offset,
+ size_t count,
int ret
),
- TP_ARGS(inode, folio, ret),
+ TP_ARGS(inode, offset, count, ret),
TP_STRUCT__entry(
__field(dev_t, dev)
@@ -998,7 +1001,7 @@ DECLARE_EVENT_CLASS(nfs_folio_event_done,
__field(u64, fileid)
__field(u64, version)
__field(loff_t, offset)
- __field(u32, count)
+ __field(size_t, count)
),
TP_fast_assign(
@@ -1008,14 +1011,14 @@ DECLARE_EVENT_CLASS(nfs_folio_event_done,
__entry->fileid = nfsi->fileid;
__entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
__entry->version = inode_peek_iversion_raw(inode);
- __entry->offset = folio_file_pos(folio);
- __entry->count = nfs_folio_length(folio);
+ __entry->offset = offset,
+ __entry->count = count,
__entry->ret = ret;
),
TP_printk(
"fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu "
- "offset=%lld count=%u ret=%d",
+ "offset=%lld count=%zu ret=%d",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle, __entry->version,
@@ -1027,10 +1030,11 @@ DECLARE_EVENT_CLASS(nfs_folio_event_done,
DEFINE_EVENT(nfs_folio_event_done, name, \
TP_PROTO( \
const struct inode *inode, \
- struct folio *folio, \
+ loff_t offset, \
+ size_t count, \
int ret \
), \
- TP_ARGS(inode, folio, ret))
+ TP_ARGS(inode, offset, count, ret))
DEFINE_NFS_FOLIO_EVENT(nfs_aop_readpage);
DEFINE_NFS_FOLIO_EVENT_DONE(nfs_aop_readpage_done);
@@ -1636,8 +1640,8 @@ TRACE_EVENT(nfs_mount_assign,
),
TP_fast_assign(
- __assign_str(option, option);
- __assign_str(value, value);
+ __assign_str(option);
+ __assign_str(value);
),
TP_printk("option %s=%s",
@@ -1657,7 +1661,7 @@ TRACE_EVENT(nfs_mount_option,
),
TP_fast_assign(
- __assign_str(option, param->key);
+ __assign_str(option);
),
TP_printk("option %s", __get_str(option))
@@ -1675,12 +1679,41 @@ TRACE_EVENT(nfs_mount_path,
),
TP_fast_assign(
- __assign_str(path, path);
+ __assign_str(path);
),
TP_printk("path='%s'", __get_str(path))
);
+TRACE_EVENT(nfs_local_open_fh,
+ TP_PROTO(
+ const struct nfs_fh *fh,
+ fmode_t fmode,
+ int error
+ ),
+
+ TP_ARGS(fh, fmode, error),
+
+ TP_STRUCT__entry(
+ __field(int, error)
+ __field(u32, fhandle)
+ __field(unsigned int, fmode)
+ ),
+
+ TP_fast_assign(
+ __entry->error = error;
+ __entry->fhandle = nfs_fhandle_hash(fh);
+ __entry->fmode = (__force unsigned int)fmode;
+ ),
+
+ TP_printk(
+ "error=%d fhandle=0x%08x mode=%s",
+ __entry->error,
+ __entry->fhandle,
+ show_fs_fmode_flags(__entry->fmode)
+ )
+);
+
DECLARE_EVENT_CLASS(nfs_xdr_event,
TP_PROTO(
const struct xdr_stream *xdr,
@@ -1710,9 +1743,8 @@ DECLARE_EVENT_CLASS(nfs_xdr_event,
__entry->xid = be32_to_cpu(rqstp->rq_xid);
__entry->version = task->tk_client->cl_vers;
__entry->error = error;
- __assign_str(program,
- task->tk_client->cl_program->name);
- __assign_str(procedure, task->tk_msg.rpc_proc->p_name);
+ __assign_str(program);
+ __assign_str(procedure);
),
TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 6efb5068c116..11968dcb7243 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -188,102 +188,6 @@ nfs_async_iocounter_wait(struct rpc_task *task, struct nfs_lock_context *l_ctx)
EXPORT_SYMBOL_GPL(nfs_async_iocounter_wait);
/*
- * nfs_page_lock_head_request - page lock the head of the page group
- * @req: any member of the page group
- */
-struct nfs_page *
-nfs_page_group_lock_head(struct nfs_page *req)
-{
- struct nfs_page *head = req->wb_head;
-
- while (!nfs_lock_request(head)) {
- int ret = nfs_wait_on_request(head);
- if (ret < 0)
- return ERR_PTR(ret);
- }
- if (head != req)
- kref_get(&head->wb_kref);
- return head;
-}
-
-/*
- * nfs_unroll_locks - unlock all newly locked reqs and wait on @req
- * @head: head request of page group, must be holding head lock
- * @req: request that couldn't lock and needs to wait on the req bit lock
- *
- * This is a helper function for nfs_lock_and_join_requests
- * returns 0 on success, < 0 on error.
- */
-static void
-nfs_unroll_locks(struct nfs_page *head, struct nfs_page *req)
-{
- struct nfs_page *tmp;
-
- /* relinquish all the locks successfully grabbed this run */
- for (tmp = head->wb_this_page ; tmp != req; tmp = tmp->wb_this_page) {
- if (!kref_read(&tmp->wb_kref))
- continue;
- nfs_unlock_and_release_request(tmp);
- }
-}
-
-/*
- * nfs_page_group_lock_subreq - try to lock a subrequest
- * @head: head request of page group
- * @subreq: request to lock
- *
- * This is a helper function for nfs_lock_and_join_requests which
- * must be called with the head request and page group both locked.
- * On error, it returns with the page group unlocked.
- */
-static int
-nfs_page_group_lock_subreq(struct nfs_page *head, struct nfs_page *subreq)
-{
- int ret;
-
- if (!kref_get_unless_zero(&subreq->wb_kref))
- return 0;
- while (!nfs_lock_request(subreq)) {
- nfs_page_group_unlock(head);
- ret = nfs_wait_on_request(subreq);
- if (!ret)
- ret = nfs_page_group_lock(head);
- if (ret < 0) {
- nfs_unroll_locks(head, subreq);
- nfs_release_request(subreq);
- return ret;
- }
- }
- return 0;
-}
-
-/*
- * nfs_page_group_lock_subrequests - try to lock the subrequests
- * @head: head request of page group
- *
- * This is a helper function for nfs_lock_and_join_requests which
- * must be called with the head request locked.
- */
-int nfs_page_group_lock_subrequests(struct nfs_page *head)
-{
- struct nfs_page *subreq;
- int ret;
-
- ret = nfs_page_group_lock(head);
- if (ret < 0)
- return ret;
- /* lock each request in the page group */
- for (subreq = head->wb_this_page; subreq != head;
- subreq = subreq->wb_this_page) {
- ret = nfs_page_group_lock_subreq(head, subreq);
- if (ret < 0)
- return ret;
- }
- nfs_page_group_unlock(head);
- return 0;
-}
-
-/*
* nfs_page_set_headlock - set the request PG_HEADLOCK
* @req: request that is to be locked
*
@@ -569,7 +473,7 @@ struct nfs_page *nfs_page_create_from_folio(struct nfs_open_context *ctx,
if (IS_ERR(l_ctx))
return ERR_CAST(l_ctx);
- ret = nfs_page_create(l_ctx, offset, folio_index(folio), offset, count);
+ ret = nfs_page_create(l_ctx, offset, folio->index, offset, count);
if (!IS_ERR(ret)) {
nfs_page_assign_folio(ret, folio);
nfs_page_group_init(ret, NULL);
@@ -694,25 +598,6 @@ void nfs_release_request(struct nfs_page *req)
}
EXPORT_SYMBOL_GPL(nfs_release_request);
-/**
- * nfs_wait_on_request - Wait for a request to complete.
- * @req: request to wait upon.
- *
- * Interruptible by fatal signals only.
- * The user is responsible for holding a count on the request.
- */
-int
-nfs_wait_on_request(struct nfs_page *req)
-{
- if (!test_bit(PG_BUSY, &req->wb_flags))
- return 0;
- set_bit(PG_CONTENDED2, &req->wb_flags);
- smp_mb__after_atomic();
- return wait_on_bit_io(&req->wb_flags, PG_BUSY,
- TASK_UNINTERRUPTIBLE);
-}
-EXPORT_SYMBOL_GPL(nfs_wait_on_request);
-
/*
* nfs_generic_pg_test - determine if requests can be coalesced
* @desc: pointer to descriptor
@@ -846,7 +731,8 @@ static void nfs_pgio_prepare(struct rpc_task *task, void *calldata)
int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr,
const struct cred *cred, const struct nfs_rpc_ops *rpc_ops,
- const struct rpc_call_ops *call_ops, int how, int flags)
+ const struct rpc_call_ops *call_ops, int how, int flags,
+ struct nfsd_file *localio)
{
struct rpc_task *task;
struct rpc_message msg = {
@@ -876,6 +762,10 @@ int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr,
hdr->args.count,
(unsigned long long)hdr->args.offset);
+ if (localio)
+ return nfs_local_doio(NFS_SERVER(hdr->inode)->nfs_client,
+ localio, hdr, call_ops);
+
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
@@ -1068,6 +958,13 @@ static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc)
nfs_pgheader_init(desc, hdr, nfs_pgio_header_free);
ret = nfs_generic_pgio(desc, hdr);
if (ret == 0) {
+ struct nfs_client *clp = NFS_SERVER(hdr->inode)->nfs_client;
+
+ struct nfsd_file *localio =
+ nfs_local_open_fh(clp, hdr->cred, hdr->args.fh,
+ &hdr->args.context->nfl,
+ hdr->args.context->mode);
+
if (NFS_SERVER(hdr->inode)->nfs_client->cl_minorversion)
task_flags = RPC_TASK_MOVEABLE;
ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode),
@@ -1076,7 +973,8 @@ static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc)
NFS_PROTO(hdr->inode),
desc->pg_rpc_callops,
desc->pg_ioflags,
- RPC_TASK_CRED_NOREF | task_flags);
+ RPC_TASK_CRED_NOREF | task_flags,
+ localio);
}
return ret;
}
@@ -1545,6 +1443,11 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index)
continue;
} else if (index == prev->wb_index + 1)
continue;
+ /*
+ * We will submit more requests after these. Indicate
+ * this to the underlying layers.
+ */
+ desc->pg_moreio = 1;
nfs_pageio_complete(desc);
break;
}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index a5cc6199127f..3adb7d0dbec7 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -61,6 +61,7 @@ static void pnfs_free_returned_lsegs(struct pnfs_layout_hdr *lo,
u32 seq);
static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg,
struct list_head *tmp_list);
+static int pnfs_layout_return_on_reboot(struct pnfs_layout_hdr *lo);
/* Return the registered pnfs layout driver module matching given id */
static struct pnfs_layoutdriver_type *
@@ -476,6 +477,18 @@ pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo,
return !list_empty(&lo->plh_segs);
}
+static int pnfs_mark_layout_stateid_return(struct pnfs_layout_hdr *lo,
+ struct list_head *lseg_list,
+ enum pnfs_iomode iomode, u32 seq)
+{
+ struct pnfs_layout_range range = {
+ .iomode = iomode,
+ .length = NFS4_MAX_UINT64,
+ };
+
+ return pnfs_mark_matching_lsegs_return(lo, lseg_list, &range, seq);
+}
+
static int
pnfs_iomode_to_fail_bit(u32 iomode)
{
@@ -732,6 +745,14 @@ pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
return remaining;
}
+static void pnfs_reset_return_info(struct pnfs_layout_hdr *lo)
+{
+ struct pnfs_layout_segment *lseg;
+
+ list_for_each_entry(lseg, &lo->plh_return_segs, pls_list)
+ pnfs_set_plh_return_info(lo, lseg->pls_range.iomode, 0);
+}
+
static void
pnfs_free_returned_lsegs(struct pnfs_layout_hdr *lo,
struct list_head *free_me,
@@ -846,8 +867,6 @@ pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp,
break;
inode = pnfs_grab_inode_layout_hdr(lo);
if (inode != NULL) {
- if (test_and_clear_bit(NFS_LAYOUT_HASHED, &lo->plh_flags))
- list_del_rcu(&lo->plh_layouts);
if (pnfs_layout_add_bulk_destroy_list(inode,
layout_list))
continue;
@@ -868,7 +887,7 @@ pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp,
static int
pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,
- bool is_bulk_recall)
+ enum pnfs_layout_destroy_mode mode)
{
struct pnfs_layout_hdr *lo;
struct inode *inode;
@@ -886,8 +905,11 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,
spin_lock(&inode->i_lock);
list_del_init(&lo->plh_bulk_destroy);
- if (pnfs_mark_layout_stateid_invalid(lo, &lseg_list)) {
- if (is_bulk_recall)
+ if (mode == PNFS_LAYOUT_FILE_BULK_RETURN) {
+ pnfs_mark_layout_stateid_return(lo, &lseg_list,
+ IOMODE_ANY, 0);
+ } else if (pnfs_mark_layout_stateid_invalid(lo, &lseg_list)) {
+ if (mode == PNFS_LAYOUT_BULK_RETURN)
set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
ret = -EAGAIN;
}
@@ -901,10 +923,8 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,
return ret;
}
-int
-pnfs_destroy_layouts_byfsid(struct nfs_client *clp,
- struct nfs_fsid *fsid,
- bool is_recall)
+int pnfs_layout_destroy_byfsid(struct nfs_client *clp, struct nfs_fsid *fsid,
+ enum pnfs_layout_destroy_mode mode)
{
struct nfs_server *server;
LIST_HEAD(layout_list);
@@ -923,33 +943,40 @@ restart:
rcu_read_unlock();
spin_unlock(&clp->cl_lock);
- if (list_empty(&layout_list))
- return 0;
- return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall);
+ return pnfs_layout_free_bulk_destroy_list(&layout_list, mode);
}
-int
-pnfs_destroy_layouts_byclid(struct nfs_client *clp,
- bool is_recall)
+static void pnfs_layout_build_destroy_list_byclient(struct nfs_client *clp,
+ struct list_head *list)
{
struct nfs_server *server;
- LIST_HEAD(layout_list);
spin_lock(&clp->cl_lock);
rcu_read_lock();
restart:
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
- if (pnfs_layout_bulk_destroy_byserver_locked(clp,
- server,
- &layout_list) != 0)
+ if (pnfs_layout_bulk_destroy_byserver_locked(clp, server,
+ list) != 0)
goto restart;
}
rcu_read_unlock();
spin_unlock(&clp->cl_lock);
+}
- if (list_empty(&layout_list))
- return 0;
- return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall);
+static int pnfs_layout_do_destroy_byclid(struct nfs_client *clp,
+ struct list_head *list,
+ enum pnfs_layout_destroy_mode mode)
+{
+ pnfs_layout_build_destroy_list_byclient(clp, list);
+ return pnfs_layout_free_bulk_destroy_list(list, mode);
+}
+
+int pnfs_layout_destroy_byclid(struct nfs_client *clp,
+ enum pnfs_layout_destroy_mode mode)
+{
+ LIST_HEAD(layout_list);
+
+ return pnfs_layout_do_destroy_byclid(clp, &layout_list, mode);
}
/*
@@ -962,7 +989,68 @@ pnfs_destroy_all_layouts(struct nfs_client *clp)
nfs4_deviceid_mark_client_invalid(clp);
nfs4_deviceid_purge_client(clp);
- pnfs_destroy_layouts_byclid(clp, false);
+ pnfs_layout_destroy_byclid(clp, PNFS_LAYOUT_INVALIDATE);
+}
+
+static void pnfs_layout_build_recover_list_byclient(struct nfs_client *clp,
+ struct list_head *list)
+{
+ struct nfs_server *server;
+
+ spin_lock(&clp->cl_lock);
+ rcu_read_lock();
+restart:
+ list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+ if (!(server->caps & NFS_CAP_REBOOT_LAYOUTRETURN))
+ continue;
+ if (pnfs_layout_bulk_destroy_byserver_locked(clp, server,
+ list) != 0)
+ goto restart;
+ }
+ rcu_read_unlock();
+ spin_unlock(&clp->cl_lock);
+}
+
+static int pnfs_layout_bulk_list_reboot(struct list_head *list)
+{
+ struct pnfs_layout_hdr *lo;
+ struct nfs_server *server;
+ int ret;
+
+ list_for_each_entry(lo, list, plh_bulk_destroy) {
+ server = NFS_SERVER(lo->plh_inode);
+ ret = pnfs_layout_return_on_reboot(lo);
+ switch (ret) {
+ case 0:
+ continue;
+ case -NFS4ERR_BAD_STATEID:
+ server->caps &= ~NFS_CAP_REBOOT_LAYOUTRETURN;
+ break;
+ case -NFS4ERR_NO_GRACE:
+ break;
+ default:
+ goto err;
+ }
+ break;
+ }
+ return 0;
+err:
+ return ret;
+}
+
+int pnfs_layout_handle_reboot(struct nfs_client *clp)
+{
+ LIST_HEAD(list);
+ int ret = 0, ret2;
+
+ pnfs_layout_build_recover_list_byclient(clp, &list);
+ if (!list_empty(&list))
+ ret = pnfs_layout_bulk_list_reboot(&list);
+ ret2 = pnfs_layout_do_destroy_byclid(clp, &list,
+ PNFS_LAYOUT_INVALIDATE);
+ if (!ret)
+ ret = ret2;
+ return (ret == 0) ? 0 : -EAGAIN;
}
static void
@@ -1163,6 +1251,33 @@ static void pnfs_clear_layoutcommit(struct inode *inode,
}
}
+static void
+pnfs_layoutreturn_retry_later_locked(struct pnfs_layout_hdr *lo,
+ const nfs4_stateid *arg_stateid,
+ const struct pnfs_layout_range *range,
+ struct list_head *freeme)
+{
+ if (pnfs_layout_is_valid(lo) &&
+ nfs4_stateid_match_other(&lo->plh_stateid, arg_stateid))
+ pnfs_reset_return_info(lo);
+ else
+ pnfs_mark_layout_stateid_invalid(lo, freeme);
+ pnfs_clear_layoutreturn_waitbit(lo);
+}
+
+void pnfs_layoutreturn_retry_later(struct pnfs_layout_hdr *lo,
+ const nfs4_stateid *arg_stateid,
+ const struct pnfs_layout_range *range)
+{
+ struct inode *inode = lo->plh_inode;
+ LIST_HEAD(freeme);
+
+ spin_lock(&inode->i_lock);
+ pnfs_layoutreturn_retry_later_locked(lo, arg_stateid, range, &freeme);
+ spin_unlock(&inode->i_lock);
+ pnfs_free_lseg_list(&freeme);
+}
+
void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo,
const nfs4_stateid *arg_stateid,
const struct pnfs_layout_range *range,
@@ -1172,15 +1287,15 @@ void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo,
LIST_HEAD(freeme);
spin_lock(&inode->i_lock);
- if (!pnfs_layout_is_valid(lo) ||
- !nfs4_stateid_match_other(&lo->plh_stateid, arg_stateid))
+ if (!nfs4_stateid_match_other(&lo->plh_stateid, arg_stateid))
goto out_unlock;
- if (stateid) {
+ if (stateid && pnfs_layout_is_valid(lo)) {
u32 seq = be32_to_cpu(arg_stateid->seqid);
pnfs_mark_matching_lsegs_invalid(lo, &freeme, range, seq);
pnfs_free_returned_lsegs(lo, &freeme, range, seq);
pnfs_set_layout_stateid(lo, stateid, NULL, true);
+ pnfs_reset_return_info(lo);
} else
pnfs_mark_layout_stateid_invalid(lo, &freeme);
out_unlock:
@@ -1197,7 +1312,7 @@ pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo,
enum pnfs_iomode *iomode)
{
/* Serialise LAYOUTGET/LAYOUTRETURN */
- if (atomic_read(&lo->plh_outstanding) != 0)
+ if (atomic_read(&lo->plh_outstanding) != 0 && lo->plh_return_seq == 0)
return false;
if (test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags))
return false;
@@ -1239,7 +1354,7 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo,
const nfs4_stateid *stateid,
const struct cred **pcred,
enum pnfs_iomode iomode,
- bool sync)
+ unsigned int flags)
{
struct inode *ino = lo->plh_inode;
struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld;
@@ -1266,33 +1381,21 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo,
if (ld->prepare_layoutreturn)
ld->prepare_layoutreturn(&lrp->args);
- status = nfs4_proc_layoutreturn(lrp, sync);
+ status = nfs4_proc_layoutreturn(lrp, flags);
out:
dprintk("<-- %s status: %d\n", __func__, status);
return status;
}
-static bool
-pnfs_layout_segments_returnable(struct pnfs_layout_hdr *lo,
- enum pnfs_iomode iomode,
- u32 seq)
-{
- struct pnfs_layout_range recall_range = {
- .length = NFS4_MAX_UINT64,
- .iomode = iomode,
- };
- return pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs,
- &recall_range, seq) != -EBUSY;
-}
-
/* Return true if layoutreturn is needed */
static bool
pnfs_layout_need_return(struct pnfs_layout_hdr *lo)
{
if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
return false;
- return pnfs_layout_segments_returnable(lo, lo->plh_return_iomode,
- lo->plh_return_seq);
+ return pnfs_mark_layout_stateid_return(lo, &lo->plh_return_segs,
+ lo->plh_return_iomode,
+ lo->plh_return_seq) != EBUSY;
}
static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo)
@@ -1312,7 +1415,8 @@ static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo)
spin_unlock(&inode->i_lock);
if (send) {
/* Send an async layoutreturn so we dont deadlock */
- pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, false);
+ pnfs_send_layoutreturn(lo, &stateid, &cred, iomode,
+ PNFS_FL_LAYOUTRETURN_ASYNC);
}
} else
spin_unlock(&inode->i_lock);
@@ -1379,7 +1483,8 @@ _pnfs_return_layout(struct inode *ino)
send = pnfs_prepare_layoutreturn(lo, &stateid, &cred, NULL);
spin_unlock(&ino->i_lock);
if (send)
- status = pnfs_send_layoutreturn(lo, &stateid, &cred, IOMODE_ANY, true);
+ status = pnfs_send_layoutreturn(lo, &stateid, &cred, IOMODE_ANY,
+ 0);
out_wait_layoutreturn:
wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN, TASK_UNINTERRUPTIBLE);
out_put_layout_hdr:
@@ -1417,6 +1522,24 @@ pnfs_commit_and_return_layout(struct inode *inode)
return ret;
}
+static int pnfs_layout_return_on_reboot(struct pnfs_layout_hdr *lo)
+{
+ struct inode *inode = lo->plh_inode;
+ const struct cred *cred;
+
+ spin_lock(&inode->i_lock);
+ if (!pnfs_layout_is_valid(lo)) {
+ spin_unlock(&inode->i_lock);
+ return 0;
+ }
+ cred = get_cred(lo->plh_lc_cred);
+ pnfs_get_layout_hdr(lo);
+ spin_unlock(&inode->i_lock);
+
+ return pnfs_send_layoutreturn(lo, &zero_stateid, &cred, IOMODE_ANY,
+ PNFS_FL_LAYOUTRETURN_PRIVILEGED);
+}
+
bool pnfs_roc(struct inode *ino,
struct nfs4_layoutreturn_args *args,
struct nfs4_layoutreturn_res *res,
@@ -1520,7 +1643,7 @@ out_noroc:
return true;
}
if (layoutreturn)
- pnfs_send_layoutreturn(lo, &stateid, &lc_cred, iomode, true);
+ pnfs_send_layoutreturn(lo, &stateid, &lc_cred, iomode, 0);
pnfs_put_layout_hdr(lo);
return false;
}
@@ -1542,6 +1665,18 @@ int pnfs_roc_done(struct rpc_task *task, struct nfs4_layoutreturn_args **argpp,
/* Was there an RPC level error? If not, retry */
if (task->tk_rpc_status == 0)
break;
+ /*
+ * Is there a fatal network level error?
+ * If so release the layout, but flag the error.
+ */
+ if ((task->tk_rpc_status == -ENETDOWN ||
+ task->tk_rpc_status == -ENETUNREACH) &&
+ task->tk_flags & RPC_TASK_NETUNREACH_FATAL) {
+ *ret = 0;
+ (*respp)->lrs_present = 0;
+ retval = -EIO;
+ break;
+ }
/* If the call was not sent, let caller handle it */
if (!RPC_WAS_SENT(task))
return 0;
@@ -1570,22 +1705,24 @@ int pnfs_roc_done(struct rpc_task *task, struct nfs4_layoutreturn_args **argpp,
}
void pnfs_roc_release(struct nfs4_layoutreturn_args *args,
- struct nfs4_layoutreturn_res *res,
- int ret)
+ struct nfs4_layoutreturn_res *res, int ret)
{
struct pnfs_layout_hdr *lo = args->layout;
struct inode *inode = args->inode;
const nfs4_stateid *res_stateid = NULL;
struct nfs4_xdr_opaque_data *ld_private = args->ld_private;
+ LIST_HEAD(freeme);
switch (ret) {
+ case -NFS4ERR_BADSESSION:
+ case -NFS4ERR_DEADSESSION:
+ case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
case -NFS4ERR_NOMATCHING_LAYOUT:
spin_lock(&inode->i_lock);
- if (pnfs_layout_is_valid(lo) &&
- nfs4_stateid_match_other(&args->stateid, &lo->plh_stateid))
- pnfs_set_plh_return_info(lo, args->range.iomode, 0);
- pnfs_clear_layoutreturn_waitbit(lo);
+ pnfs_layoutreturn_retry_later_locked(lo, &args->stateid,
+ &args->range, &freeme);
spin_unlock(&inode->i_lock);
+ pnfs_free_lseg_list(&freeme);
break;
case 0:
if (res->lrs_present)
@@ -2566,7 +2703,8 @@ pnfs_mark_layout_for_return(struct inode *inode,
return_now = pnfs_prepare_layoutreturn(lo, &stateid, &cred, &iomode);
spin_unlock(&inode->i_lock);
if (return_now)
- pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, false);
+ pnfs_send_layoutreturn(lo, &stateid, &cred, iomode,
+ PNFS_FL_LAYOUTRETURN_ASYNC);
} else {
spin_unlock(&inode->i_lock);
nfs_commit_inode(inode, 0);
@@ -2682,7 +2820,8 @@ restart:
}
spin_unlock(&inode->i_lock);
rcu_read_unlock();
- pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, false);
+ pnfs_send_layoutreturn(lo, &stateid, &cred, iomode,
+ PNFS_FL_LAYOUTRETURN_ASYNC);
pnfs_put_layout_hdr(lo);
cond_resched();
goto restart;
@@ -2705,38 +2844,28 @@ pnfs_layout_return_unused_byclid(struct nfs_client *clp,
&range);
}
+/* Check if we have we have a valid layout but if there isn't an intersection
+ * between the request and the pgio->pg_lseg, put this pgio->pg_lseg away.
+ */
void
-pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio)
+pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio,
+ struct nfs_page *req)
{
if (pgio->pg_lseg == NULL ||
- test_bit(NFS_LSEG_VALID, &pgio->pg_lseg->pls_flags))
+ (test_bit(NFS_LSEG_VALID, &pgio->pg_lseg->pls_flags) &&
+ pnfs_lseg_request_intersecting(pgio->pg_lseg, req)))
return;
pnfs_put_lseg(pgio->pg_lseg);
pgio->pg_lseg = NULL;
}
EXPORT_SYMBOL_GPL(pnfs_generic_pg_check_layout);
-/*
- * Check for any intersection between the request and the pgio->pg_lseg,
- * and if none, put this pgio->pg_lseg away.
- */
-void
-pnfs_generic_pg_check_range(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
-{
- if (pgio->pg_lseg && !pnfs_lseg_request_intersecting(pgio->pg_lseg, req)) {
- pnfs_put_lseg(pgio->pg_lseg);
- pgio->pg_lseg = NULL;
- }
-}
-EXPORT_SYMBOL_GPL(pnfs_generic_pg_check_range);
-
void
pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
{
u64 rd_size;
- pnfs_generic_pg_check_layout(pgio);
- pnfs_generic_pg_check_range(pgio, req);
+ pnfs_generic_pg_check_layout(pgio, req);
if (pgio->pg_lseg == NULL) {
if (pgio->pg_dreq == NULL)
rd_size = i_size_read(pgio->pg_inode) - req_offset(req);
@@ -2766,8 +2895,7 @@ void
pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req, u64 wb_size)
{
- pnfs_generic_pg_check_layout(pgio);
- pnfs_generic_pg_check_range(pgio, req);
+ pnfs_generic_pg_check_layout(pgio, req);
if (pgio->pg_lseg == NULL) {
pgio->pg_lseg =
pnfs_update_layout(pgio->pg_inode, nfs_req_openctx(req),
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index db57a85500ee..91ff877185c8 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -60,6 +60,7 @@ struct nfs4_pnfs_ds {
struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */
char *ds_remotestr; /* comma sep list of addrs */
struct list_head ds_addrs;
+ const struct net *ds_net;
struct nfs_client *ds_clp;
refcount_t ds_count;
unsigned long ds_state;
@@ -118,6 +119,12 @@ enum layoutdriver_policy_flags {
PNFS_LAYOUTGET_ON_OPEN = 1 << 3,
};
+enum pnfs_layout_destroy_mode {
+ PNFS_LAYOUT_INVALIDATE = 0,
+ PNFS_LAYOUT_BULK_RETURN,
+ PNFS_LAYOUT_FILE_BULK_RETURN,
+};
+
struct nfs4_deviceid_node;
/* Per-layout driver specific registration structure */
@@ -127,7 +134,6 @@ struct pnfs_layoutdriver_type {
const char *name;
struct module *owner;
unsigned flags;
- unsigned max_deviceinfo_size;
unsigned max_layoutget_response;
int (*set_layoutdriver) (struct nfs_server *, const struct nfs_fh *);
@@ -193,8 +199,6 @@ struct pnfs_commit_ops {
int max);
void (*recover_commit_reqs) (struct list_head *list,
struct nfs_commit_info *cinfo);
- struct nfs_page * (*search_commit_reqs)(struct nfs_commit_info *cinfo,
- struct folio *folio);
};
struct pnfs_layout_hdr {
@@ -242,6 +246,9 @@ extern const struct pnfs_layoutdriver_type *pnfs_find_layoutdriver(u32 id);
extern void pnfs_put_layoutdriver(const struct pnfs_layoutdriver_type *ld);
/* nfs4proc.c */
+#define PNFS_FL_LAYOUTRETURN_ASYNC (1U << 0)
+#define PNFS_FL_LAYOUTRETURN_PRIVILEGED (1U << 1)
+
extern size_t max_response_pages(struct nfs_server *server);
extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
struct pnfs_device *dev,
@@ -249,7 +256,8 @@ extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
extern struct pnfs_layout_segment *
nfs4_proc_layoutget(struct nfs4_layoutget *lgp,
struct nfs4_exception *exception);
-extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync);
+extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp,
+ unsigned int flags);
/* pnfs.c */
void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo);
@@ -257,8 +265,7 @@ void pnfs_put_lseg(struct pnfs_layout_segment *lseg);
void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, struct nfs_fsinfo *);
void unset_pnfs_layoutdriver(struct nfs_server *);
-void pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio);
-void pnfs_generic_pg_check_range(struct nfs_pageio_descriptor *pgio, struct nfs_page *req);
+void pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio, struct nfs_page *req);
void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *);
int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc);
void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
@@ -274,11 +281,10 @@ void pnfs_free_lseg_list(struct list_head *tmp_list);
void pnfs_destroy_layout(struct nfs_inode *);
void pnfs_destroy_layout_final(struct nfs_inode *);
void pnfs_destroy_all_layouts(struct nfs_client *);
-int pnfs_destroy_layouts_byfsid(struct nfs_client *clp,
- struct nfs_fsid *fsid,
- bool is_recall);
-int pnfs_destroy_layouts_byclid(struct nfs_client *clp,
- bool is_recall);
+int pnfs_layout_destroy_byfsid(struct nfs_client *clp, struct nfs_fsid *fsid,
+ enum pnfs_layout_destroy_mode mode);
+int pnfs_layout_destroy_byclid(struct nfs_client *clp,
+ enum pnfs_layout_destroy_mode mode);
bool nfs4_layout_refresh_old_stateid(nfs4_stateid *dst,
struct pnfs_layout_range *dst_range,
struct inode *inode);
@@ -324,6 +330,9 @@ struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
enum pnfs_iomode iomode,
bool strict_iomode,
gfp_t gfp_flags);
+void pnfs_layoutreturn_retry_later(struct pnfs_layout_hdr *lo,
+ const nfs4_stateid *arg_stateid,
+ const struct pnfs_layout_range *range);
void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo,
const nfs4_stateid *arg_stateid,
const struct pnfs_layout_range *range,
@@ -345,6 +354,7 @@ void pnfs_error_mark_layout_for_return(struct inode *inode,
struct pnfs_layout_segment *lseg);
void pnfs_layout_return_unused_byclid(struct nfs_client *clp,
enum pnfs_iomode iomode);
+int pnfs_layout_handle_reboot(struct nfs_client *clp);
/* nfs4_deviceid_flags */
enum {
@@ -397,8 +407,6 @@ void pnfs_generic_prepare_to_resend_writes(struct nfs_commit_data *data);
void pnfs_generic_rw_release(void *data);
void pnfs_generic_recover_commit_reqs(struct list_head *dst,
struct nfs_commit_info *cinfo);
-struct nfs_page *pnfs_generic_search_commit_reqs(struct nfs_commit_info *cinfo,
- struct folio *folio);
int pnfs_generic_commit_pagelist(struct inode *inode,
struct list_head *mds_pages,
int how,
@@ -408,7 +416,8 @@ int pnfs_generic_commit_pagelist(struct inode *inode,
int pnfs_generic_scan_commit_lists(struct nfs_commit_info *cinfo, int max);
void pnfs_generic_write_commit_done(struct rpc_task *task, void *data);
void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds);
-struct nfs4_pnfs_ds *nfs4_pnfs_ds_add(struct list_head *dsaddrs,
+struct nfs4_pnfs_ds *nfs4_pnfs_ds_add(const struct net *net,
+ struct list_head *dsaddrs,
gfp_t gfp_flags);
void nfs4_pnfs_v3_ds_connect_unload(void);
int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
@@ -558,17 +567,6 @@ pnfs_recover_commit_reqs(struct list_head *head, struct nfs_commit_info *cinfo)
fl_cinfo->ops->recover_commit_reqs(head, cinfo);
}
-static inline struct nfs_page *
-pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo,
- struct folio *folio)
-{
- struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds;
-
- if (!fl_cinfo->ops || !fl_cinfo->ops->search_commit_reqs)
- return NULL;
- return fl_cinfo->ops->search_commit_reqs(cinfo, folio);
-}
-
/* Should the pNFS client commit and return the layout upon a setattr */
static inline bool
pnfs_ld_layoutret_on_setattr(struct inode *inode)
@@ -726,6 +724,11 @@ static inline void pnfs_destroy_layout_final(struct nfs_inode *nfsi)
{
}
+static inline int pnfs_layout_handle_reboot(struct nfs_client *clp)
+{
+ return 0;
+}
+
static inline struct pnfs_layout_segment *
pnfs_get_lseg(struct pnfs_layout_segment *lseg)
{
@@ -865,13 +868,6 @@ pnfs_recover_commit_reqs(struct list_head *head, struct nfs_commit_info *cinfo)
{
}
-static inline struct nfs_page *
-pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo,
- struct folio *folio)
-{
- return NULL;
-}
-
static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
{
return 0;
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
index 178001c90156..bf0f2d67e96c 100644
--- a/fs/nfs/pnfs_dev.c
+++ b/fs/nfs/pnfs_dev.c
@@ -110,9 +110,6 @@ nfs4_get_device_info(struct nfs_server *server,
* GETDEVICEINFO's maxcount
*/
max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
- if (server->pnfs_curr_ld->max_deviceinfo_size &&
- server->pnfs_curr_ld->max_deviceinfo_size < max_resp_sz)
- max_resp_sz = server->pnfs_curr_ld->max_deviceinfo_size;
max_pages = nfs_page_array_len(0, max_resp_sz);
dprintk("%s: server %p max_resp_sz %u max_pages %d\n",
__func__, server, max_resp_sz, max_pages);
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 88e061bd711b..91ef486f40b9 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -16,6 +16,7 @@
#include "nfs4session.h"
#include "internal.h"
#include "pnfs.h"
+#include "netns.h"
#define NFSDBG_FACILITY NFSDBG_PNFS
@@ -351,53 +352,6 @@ void pnfs_generic_recover_commit_reqs(struct list_head *dst,
}
EXPORT_SYMBOL_GPL(pnfs_generic_recover_commit_reqs);
-static struct nfs_page *
-pnfs_bucket_search_commit_reqs(struct pnfs_commit_bucket *buckets,
- unsigned int nbuckets, struct folio *folio)
-{
- struct nfs_page *req;
- struct pnfs_commit_bucket *b;
- unsigned int i;
-
- /* Linearly search the commit lists for each bucket until a matching
- * request is found */
- for (i = 0, b = buckets; i < nbuckets; i++, b++) {
- list_for_each_entry(req, &b->written, wb_list) {
- if (nfs_page_to_folio(req) == folio)
- return req->wb_head;
- }
- list_for_each_entry(req, &b->committing, wb_list) {
- if (nfs_page_to_folio(req) == folio)
- return req->wb_head;
- }
- }
- return NULL;
-}
-
-/* pnfs_generic_search_commit_reqs - Search lists in @cinfo for the head request
- * for @folio
- * @cinfo - commit info for current inode
- * @folio - page to search for matching head request
- *
- * Return: the head request if one is found, otherwise %NULL.
- */
-struct nfs_page *pnfs_generic_search_commit_reqs(struct nfs_commit_info *cinfo,
- struct folio *folio)
-{
- struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds;
- struct pnfs_commit_array *array;
- struct nfs_page *req;
-
- list_for_each_entry(array, &fl_cinfo->commits, cinfo_list) {
- req = pnfs_bucket_search_commit_reqs(array->buckets,
- array->nbuckets, folio);
- if (req)
- return req;
- }
- return NULL;
-}
-EXPORT_SYMBOL_GPL(pnfs_generic_search_commit_reqs);
-
static struct pnfs_layout_segment *
pnfs_bucket_get_committing(struct list_head *head,
struct pnfs_commit_bucket *bucket,
@@ -537,7 +491,7 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
nfs_initiate_commit(NFS_CLIENT(inode), data,
NFS_PROTO(data->inode),
data->mds_ops, how,
- RPC_TASK_CRED_NOREF);
+ RPC_TASK_CRED_NOREF, NULL);
} else {
nfs_init_commit(data, NULL, data->lseg, cinfo);
initiate_commit(data, how);
@@ -551,14 +505,14 @@ EXPORT_SYMBOL_GPL(pnfs_generic_commit_pagelist);
/*
* Data server cache
*
- * Data servers can be mapped to different device ids.
- * nfs4_pnfs_ds reference counting
+ * Data servers can be mapped to different device ids, but should
+ * never be shared between net namespaces.
+ *
+ * nfs4_pnfs_ds reference counting:
* - set to 1 on allocation
* - incremented when a device id maps a data server already in the cache.
* - decremented when deviceid is removed from the cache.
*/
-static DEFINE_SPINLOCK(nfs4_ds_cache_lock);
-static LIST_HEAD(nfs4_data_server_cache);
/* Debug routines */
static void
@@ -651,11 +605,11 @@ _same_data_server_addrs_locked(const struct list_head *dsaddrs1,
* Lookup DS by addresses. nfs4_ds_cache_lock is held
*/
static struct nfs4_pnfs_ds *
-_data_server_lookup_locked(const struct list_head *dsaddrs)
+_data_server_lookup_locked(const struct nfs_net *nn, const struct list_head *dsaddrs)
{
struct nfs4_pnfs_ds *ds;
- list_for_each_entry(ds, &nfs4_data_server_cache, ds_node)
+ list_for_each_entry(ds, &nn->nfs4_data_server_cache, ds_node)
if (_same_data_server_addrs_locked(&ds->ds_addrs, dsaddrs))
return ds;
return NULL;
@@ -700,10 +654,11 @@ static void destroy_ds(struct nfs4_pnfs_ds *ds)
void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds)
{
- if (refcount_dec_and_lock(&ds->ds_count,
- &nfs4_ds_cache_lock)) {
+ struct nfs_net *nn = net_generic(ds->ds_net, nfs_net_id);
+
+ if (refcount_dec_and_lock(&ds->ds_count, &nn->nfs4_data_server_lock)) {
list_del_init(&ds->ds_node);
- spin_unlock(&nfs4_ds_cache_lock);
+ spin_unlock(&nn->nfs4_data_server_lock);
destroy_ds(ds);
}
}
@@ -763,8 +718,9 @@ out_err:
* uncached and return cached struct nfs4_pnfs_ds.
*/
struct nfs4_pnfs_ds *
-nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags)
+nfs4_pnfs_ds_add(const struct net *net, struct list_head *dsaddrs, gfp_t gfp_flags)
{
+ struct nfs_net *nn = net_generic(net, nfs_net_id);
struct nfs4_pnfs_ds *tmp_ds, *ds = NULL;
char *remotestr;
@@ -780,16 +736,17 @@ nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags)
/* this is only used for debugging, so it's ok if its NULL */
remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags);
- spin_lock(&nfs4_ds_cache_lock);
- tmp_ds = _data_server_lookup_locked(dsaddrs);
+ spin_lock(&nn->nfs4_data_server_lock);
+ tmp_ds = _data_server_lookup_locked(nn, dsaddrs);
if (tmp_ds == NULL) {
INIT_LIST_HEAD(&ds->ds_addrs);
list_splice_init(dsaddrs, &ds->ds_addrs);
ds->ds_remotestr = remotestr;
refcount_set(&ds->ds_count, 1);
INIT_LIST_HEAD(&ds->ds_node);
+ ds->ds_net = net;
ds->ds_clp = NULL;
- list_add(&ds->ds_node, &nfs4_data_server_cache);
+ list_add(&ds->ds_node, &nn->nfs4_data_server_cache);
dprintk("%s add new data server %s\n", __func__,
ds->ds_remotestr);
} else {
@@ -801,7 +758,7 @@ nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags)
refcount_read(&tmp_ds->ds_count));
ds = tmp_ds;
}
- spin_unlock(&nfs4_ds_cache_lock);
+ spin_unlock(&nn->nfs4_data_server_lock);
out:
return ds;
}
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index ad3a321ae997..63e71310b9f6 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -153,13 +153,13 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
}
static int
-nfs_proc_lookup(struct inode *dir, struct dentry *dentry,
+nfs_proc_lookup(struct inode *dir, struct dentry *dentry, const struct qstr *name,
struct nfs_fh *fhandle, struct nfs_fattr *fattr)
{
struct nfs_diropargs arg = {
.fh = NFS_FH(dir),
- .name = dentry->d_name.name,
- .len = dentry->d_name.len
+ .name = name->name,
+ .len = name->len
};
struct nfs_diropok res = {
.fh = fhandle,
@@ -446,13 +446,14 @@ out:
return status;
}
-static int
+static struct dentry *
nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
{
struct nfs_createdata *data;
struct rpc_message msg = {
.rpc_proc = &nfs_procedures[NFSPROC_MKDIR],
};
+ struct dentry *alias = NULL;
int status = -ENOMEM;
dprintk("NFS call mkdir %pd\n", dentry);
@@ -464,12 +465,15 @@ nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_mark_for_revalidate(dir);
- if (status == 0)
- status = nfs_instantiate(dentry, data->res.fh, data->res.fattr);
+ if (status == 0) {
+ alias = nfs_add_or_obtain(dentry, data->res.fh, data->res.fattr);
+ status = PTR_ERR_OR_ZERO(alias);
+ } else
+ alias = ERR_PTR(status);
nfs_free_createdata(data);
out:
dprintk("NFS reply mkdir: %d\n", status);
- return status;
+ return alias;
}
static int
@@ -687,14 +691,22 @@ out_einval:
return -EINVAL;
}
-static int nfs_have_delegation(struct inode *inode, fmode_t flags)
+static int nfs_have_delegation(struct inode *inode, fmode_t type, int flags)
+{
+ return 0;
+}
+
+static int nfs_return_delegation(struct inode *inode)
{
+ if (S_ISREG(inode->i_mode))
+ nfs_wb_all(inode);
return 0;
}
static const struct inode_operations nfs_dir_inode_operations = {
.create = nfs_create,
.lookup = nfs_lookup,
+ .atomic_open = nfs_atomic_open_v23,
.link = nfs_link,
.unlink = nfs_unlink,
.symlink = nfs_symlink,
@@ -756,6 +768,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
.lock_check_bounds = nfs_lock_check_bounds,
.close_context = nfs_close_context,
.have_delegation = nfs_have_delegation,
+ .return_delegation = nfs_return_delegation,
.alloc_client = nfs_alloc_client,
.init_client = nfs_init_client,
.free_client = nfs_free_client,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index a142287d86f6..81bd1b9aba17 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -28,6 +28,7 @@
#include "fscache.h"
#include "pnfs.h"
#include "nfstrace.h"
+#include "delegation.h"
#define NFSDBG_FACILITY NFSDBG_PAGECACHE
@@ -47,8 +48,7 @@ static struct nfs_pgio_header *nfs_readhdr_alloc(void)
static void nfs_readhdr_free(struct nfs_pgio_header *rhdr)
{
- if (rhdr->res.scratch != NULL)
- kfree(rhdr->res.scratch);
+ kfree(rhdr->res.scratch);
kmem_cache_free(nfs_rdata_cachep, rhdr);
}
@@ -122,8 +122,6 @@ static void nfs_readpage_release(struct nfs_page *req, int error)
{
struct folio *folio = nfs_page_to_folio(req);
- if (nfs_error_is_fatal_on_server(error) && error != -ETIMEDOUT)
- folio_set_error(folio);
if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE))
if (nfs_netfs_folio_unlock(folio))
folio_unlock(folio);
@@ -288,7 +286,7 @@ int nfs_read_add_folio(struct nfs_pageio_descriptor *pgio,
struct nfs_open_context *ctx,
struct folio *folio)
{
- struct inode *inode = folio_file_mapping(folio)->host;
+ struct inode *inode = folio->mapping->host;
struct nfs_server *server = NFS_SERVER(inode);
size_t fsize = folio_size(folio);
unsigned int rsize = server->rsize;
@@ -324,21 +322,57 @@ out:
}
/*
- * Read a page over NFS.
- * We read the page synchronously in the following case:
- * - The error flag is set for this page. This happens only when a
- * previous async read operation failed.
+ * Actually read a folio over the wire.
*/
-int nfs_read_folio(struct file *file, struct folio *folio)
+static int nfs_do_read_folio(struct file *file, struct folio *folio)
{
struct inode *inode = file_inode(file);
struct nfs_pageio_descriptor pgio;
struct nfs_open_context *ctx;
int ret;
- trace_nfs_aop_readpage(inode, folio);
+ ctx = get_nfs_open_context(nfs_file_open_context(file));
+
+ xchg(&ctx->error, 0);
+ nfs_pageio_init_read(&pgio, inode, false,
+ &nfs_async_read_completion_ops);
+
+ ret = nfs_read_add_folio(&pgio, ctx, folio);
+ if (ret)
+ goto out_put;
+
+ nfs_pageio_complete_read(&pgio);
+ nfs_update_delegated_atime(inode);
+ if (pgio.pg_error < 0) {
+ ret = pgio.pg_error;
+ goto out_put;
+ }
+
+ ret = folio_wait_locked_killable(folio);
+ if (!folio_test_uptodate(folio) && !ret)
+ ret = xchg(&ctx->error, 0);
+
+out_put:
+ put_nfs_open_context(ctx);
+ return ret;
+}
+
+/*
+ * Synchronously read a folio.
+ *
+ * This is not heavily used as most users to try an asynchronous
+ * large read through ->readahead first.
+ */
+int nfs_read_folio(struct file *file, struct folio *folio)
+{
+ struct inode *inode = file_inode(file);
+ loff_t pos = folio_pos(folio);
+ size_t len = folio_size(folio);
+ int ret;
+
+ trace_nfs_aop_readpage(inode, pos, len);
nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
- task_io_account_read(folio_size(folio));
+ task_io_account_read(len);
/*
* Try to flush any pending writes to the file..
@@ -358,30 +392,10 @@ int nfs_read_folio(struct file *file, struct folio *folio)
goto out_unlock;
ret = nfs_netfs_read_folio(file, folio);
- if (!ret)
- goto out;
-
- ctx = get_nfs_open_context(nfs_file_open_context(file));
-
- xchg(&ctx->error, 0);
- nfs_pageio_init_read(&pgio, inode, false,
- &nfs_async_read_completion_ops);
-
- ret = nfs_read_add_folio(&pgio, ctx, folio);
if (ret)
- goto out_put;
-
- nfs_pageio_complete_read(&pgio);
- ret = pgio.pg_error < 0 ? pgio.pg_error : 0;
- if (!ret) {
- ret = folio_wait_locked_killable(folio);
- if (!folio_test_uptodate(folio) && !ret)
- ret = xchg(&ctx->error, 0);
- }
-out_put:
- put_nfs_open_context(ctx);
+ ret = nfs_do_read_folio(file, folio);
out:
- trace_nfs_aop_readpage_done(inode, folio, ret);
+ trace_nfs_aop_readpage_done(inode, pos, len, ret);
return ret;
out_unlock:
folio_unlock(folio);
@@ -428,6 +442,7 @@ void nfs_readahead(struct readahead_control *ractl)
}
nfs_pageio_complete_read(&pgio);
+ nfs_update_delegated_atime(inode);
put_nfs_open_context(ctx);
out:
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index dc03f98f7616..9eea9e62afc9 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -47,6 +47,7 @@
#include <linux/vfs.h>
#include <linux/inet.h>
#include <linux/in6.h>
+#include <linux/sched.h>
#include <linux/slab.h>
#include <net/ipv6.h>
#include <linux/netdevice.h>
@@ -72,6 +73,7 @@
#include "nfs.h"
#include "netns.h"
#include "sysfs.h"
+#include "nfs4idmap.h"
#define NFSDBG_FACILITY NFSDBG_VFS
@@ -228,6 +230,7 @@ static int __nfs_list_for_each_server(struct list_head *head,
ret = fn(server, data);
if (ret)
goto out;
+ cond_resched();
rcu_read_lock();
}
rcu_read_unlock();
@@ -451,8 +454,12 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
{ NFS_MOUNT_NONLM, ",nolock", "" },
{ NFS_MOUNT_NOACL, ",noacl", "" },
{ NFS_MOUNT_NORDIRPLUS, ",nordirplus", "" },
+ { NFS_MOUNT_FORCE_RDIRPLUS, ",rdirplus=force", "" },
{ NFS_MOUNT_UNSHARED, ",nosharecache", "" },
{ NFS_MOUNT_NORESVPORT, ",noresvport", "" },
+ { NFS_MOUNT_NETUNREACH_FATAL,
+ ",fatal_neterrors=ENETDOWN:ENETUNREACH",
+ ",fatal_neterrors=none" },
{ 0, NULL, NULL }
};
const struct proc_nfs_info *nfs_infop;
@@ -549,6 +556,9 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
else
seq_puts(m, ",local_lock=posix");
+ if (nfss->flags & NFS_MOUNT_NO_ALIGNWRITE)
+ seq_puts(m, ",noalignwrite");
+
if (nfss->flags & NFS_MOUNT_WRITE_EAGER) {
if (nfss->flags & NFS_MOUNT_WRITE_WAIT)
seq_puts(m, ",write=wait");
@@ -880,7 +890,15 @@ static int nfs_request_mount(struct fs_context *fc,
* Now ask the mount server to map our export path
* to a file handle.
*/
- status = nfs_mount(&request, ctx->timeo, ctx->retrans);
+ if ((request.protocol == XPRT_TRANSPORT_UDP) ==
+ !(ctx->flags & NFS_MOUNT_TCP))
+ /*
+ * NFS protocol and mount protocol are both UDP or neither UDP
+ * so timeouts are compatible. Use NFS timeouts for MOUNT
+ */
+ status = nfs_mount(&request, ctx->timeo, ctx->retrans);
+ else
+ status = nfs_mount(&request, NFS_UNSPEC_TIMEO, NFS_UNSPEC_RETRANS);
if (status != 0) {
dfprintk(MOUNT, "NFS: unable to mount server %s, error %d\n",
request.hostname, status);
@@ -901,6 +919,16 @@ static struct nfs_server *nfs_try_mount_request(struct fs_context *fc)
rpc_authflavor_t authlist[NFS_MAX_SECFLAVORS];
unsigned int authlist_len = ARRAY_SIZE(authlist);
+ /* make sure 'nolock'/'lock' override the 'local_lock' mount option */
+ if (ctx->lock_status) {
+ if (ctx->lock_status == NFS_LOCK_NOLOCK) {
+ ctx->flags |= NFS_MOUNT_NONLM;
+ ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL);
+ } else {
+ ctx->flags &= ~NFS_MOUNT_NONLM;
+ ctx->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL);
+ }
+ }
status = nfs_request_mount(fc, ctx->mntfh, authlist, &authlist_len);
if (status)
return ERR_PTR(status);
diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c
index 0e27a2e4e68b..58146e935402 100644
--- a/fs/nfs/symlink.c
+++ b/fs/nfs/symlink.c
@@ -32,47 +32,39 @@ static int nfs_symlink_filler(struct file *file, struct folio *folio)
int error;
error = NFS_PROTO(inode)->readlink(inode, &folio->page, 0, PAGE_SIZE);
- if (error < 0)
- goto error;
- folio_mark_uptodate(folio);
- folio_unlock(folio);
- return 0;
-
-error:
- folio_set_error(folio);
- folio_unlock(folio);
- return -EIO;
+ folio_end_read(folio, error == 0);
+ return error;
}
static const char *nfs_get_link(struct dentry *dentry,
struct inode *inode,
struct delayed_call *done)
{
- struct page *page;
+ struct folio *folio;
void *err;
if (!dentry) {
err = ERR_PTR(nfs_revalidate_mapping_rcu(inode));
if (err)
return err;
- page = find_get_page(inode->i_mapping, 0);
- if (!page)
+ folio = filemap_get_folio(inode->i_mapping, 0);
+ if (IS_ERR(folio))
return ERR_PTR(-ECHILD);
- if (!PageUptodate(page)) {
- put_page(page);
+ if (!folio_test_uptodate(folio)) {
+ folio_put(folio);
return ERR_PTR(-ECHILD);
}
} else {
err = ERR_PTR(nfs_revalidate_mapping(inode, inode->i_mapping));
if (err)
return err;
- page = read_cache_page(&inode->i_data, 0, nfs_symlink_filler,
+ folio = read_cache_folio(&inode->i_data, 0, nfs_symlink_filler,
NULL);
- if (IS_ERR(page))
- return ERR_CAST(page);
+ if (IS_ERR(folio))
+ return ERR_CAST(folio);
}
- set_delayed_call(done, page_put_link, page);
- return page_address(page);
+ set_delayed_call(done, page_put_link, folio);
+ return folio_address(folio);
}
/*
diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c
index e645be1a3381..f579df0e8d67 100644
--- a/fs/nfs/sysctl.c
+++ b/fs/nfs/sysctl.c
@@ -14,7 +14,7 @@
static struct ctl_table_header *nfs_callback_sysctl_table;
-static struct ctl_table nfs_cb_sysctls[] = {
+static const struct ctl_table nfs_cb_sysctls[] = {
{
.procname = "nfs_mountpoint_timeout",
.data = &nfs_mountpoint_expiry_timeout,
diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c
index bf378ecd5d9f..37cb2b776435 100644
--- a/fs/nfs/sysfs.c
+++ b/fs/nfs/sysfs.c
@@ -14,6 +14,7 @@
#include <linux/rcupdate.h>
#include <linux/lockd/lockd.h>
+#include "internal.h"
#include "nfs4_fs.h"
#include "netns.h"
#include "sysfs.h"
@@ -228,6 +229,25 @@ static void shutdown_client(struct rpc_clnt *clnt)
rpc_cancel_tasks(clnt, -EIO, shutdown_match_client, NULL);
}
+/*
+ * Shut down the nfs_client only once all the superblocks
+ * have been shut down.
+ */
+static void shutdown_nfs_client(struct nfs_client *clp)
+{
+ struct nfs_server *server;
+ rcu_read_lock();
+ list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+ if (!(server->flags & NFS_MOUNT_SHUTDOWN)) {
+ rcu_read_unlock();
+ return;
+ }
+ }
+ rcu_read_unlock();
+ nfs_mark_client_ready(clp, -EIO);
+ shutdown_client(clp->cl_rpcclient);
+}
+
static ssize_t
shutdown_show(struct kobject *kobj, struct kobj_attribute *attr,
char *buf)
@@ -259,7 +279,6 @@ shutdown_store(struct kobject *kobj, struct kobj_attribute *attr,
server->flags |= NFS_MOUNT_SHUTDOWN;
shutdown_client(server->client);
- shutdown_client(server->nfs_client->cl_rpcclient);
if (!IS_ERR(server->client_acl))
shutdown_client(server->client_acl);
@@ -267,11 +286,44 @@ shutdown_store(struct kobject *kobj, struct kobj_attribute *attr,
if (server->nlm_host)
shutdown_client(server->nlm_host->h_rpcclnt);
out:
+ shutdown_nfs_client(server->nfs_client);
return count;
}
static struct kobj_attribute nfs_sysfs_attr_shutdown = __ATTR_RW(shutdown);
+#if IS_ENABLED(CONFIG_NFS_V4_1)
+static ssize_t
+implid_domain_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct nfs_server *server = container_of(kobj, struct nfs_server, kobj);
+ struct nfs41_impl_id *impl_id = server->nfs_client->cl_implid;
+
+ if (!impl_id || strlen(impl_id->domain) == 0)
+ return 0; //sysfs_emit(buf, "");
+ return sysfs_emit(buf, "%s\n", impl_id->domain);
+}
+
+static struct kobj_attribute nfs_sysfs_attr_implid_domain = __ATTR_RO(implid_domain);
+
+
+static ssize_t
+implid_name_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct nfs_server *server = container_of(kobj, struct nfs_server, kobj);
+ struct nfs41_impl_id *impl_id = server->nfs_client->cl_implid;
+
+ if (!impl_id || strlen(impl_id->name) == 0)
+ return 0; //sysfs_emit(buf, "");
+ return sysfs_emit(buf, "%s\n", impl_id->name);
+}
+
+static struct kobj_attribute nfs_sysfs_attr_implid_name = __ATTR_RO(implid_name);
+
+#endif /* IS_ENABLED(CONFIG_NFS_V4_1) */
+
#define RPC_CLIENT_NAME_SIZE 64
void nfs_sysfs_link_rpc_client(struct nfs_server *server,
@@ -280,9 +332,9 @@ void nfs_sysfs_link_rpc_client(struct nfs_server *server,
char name[RPC_CLIENT_NAME_SIZE];
int ret;
- strcpy(name, clnt->cl_program->name);
- strcat(name, uniq ? uniq : "");
- strcat(name, "_client");
+ strscpy(name, clnt->cl_program->name, sizeof(name));
+ strncat(name, uniq ? uniq : "", sizeof(name) - strlen(name) - 1);
+ strncat(name, "_client", sizeof(name) - strlen(name) - 1);
ret = sysfs_create_link_nowarn(&server->kobj,
&clnt->cl_sysfs->kobject, name);
@@ -309,6 +361,32 @@ static struct kobj_type nfs_sb_ktype = {
.child_ns_type = nfs_netns_object_child_ns_type,
};
+#if IS_ENABLED(CONFIG_NFS_V4_1)
+static void nfs_sysfs_add_nfsv41_server(struct nfs_server *server)
+{
+ int ret;
+
+ if (!server->nfs_client->cl_implid)
+ return;
+
+ ret = sysfs_create_file_ns(&server->kobj, &nfs_sysfs_attr_implid_domain.attr,
+ nfs_netns_server_namespace(&server->kobj));
+ if (ret < 0)
+ pr_warn("NFS: sysfs_create_file_ns for server-%d failed (%d)\n",
+ server->s_sysfs_id, ret);
+
+ ret = sysfs_create_file_ns(&server->kobj, &nfs_sysfs_attr_implid_name.attr,
+ nfs_netns_server_namespace(&server->kobj));
+ if (ret < 0)
+ pr_warn("NFS: sysfs_create_file_ns for server-%d failed (%d)\n",
+ server->s_sysfs_id, ret);
+}
+#else /* CONFIG_NFS_V4_1 */
+static inline void nfs_sysfs_add_nfsv41_server(struct nfs_server *server)
+{
+}
+#endif /* CONFIG_NFS_V4_1 */
+
void nfs_sysfs_add_server(struct nfs_server *server)
{
int ret;
@@ -325,6 +403,8 @@ void nfs_sysfs_add_server(struct nfs_server *server)
if (ret < 0)
pr_warn("NFS: sysfs_create_file_ns for server-%d failed (%d)\n",
server->s_sysfs_id, ret);
+
+ nfs_sysfs_add_nfsv41_server(server);
}
EXPORT_SYMBOL_GPL(nfs_sysfs_add_server);
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 0110299643a2..b55467911648 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -232,6 +232,8 @@ nfs_complete_unlink(struct dentry *dentry, struct inode *inode)
dentry->d_fsdata = NULL;
spin_unlock(&dentry->d_lock);
+ NFS_PROTO(inode)->return_delegation(inode);
+
if (NFS_STALE(inode) || !nfs_call_unlink(dentry, inode, data))
nfs_free_unlinkdata(data);
}
@@ -462,18 +464,17 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
sdentry = NULL;
do {
- int slen;
dput(sdentry);
sillycounter++;
- slen = scnprintf(silly, sizeof(silly),
- SILLYNAME_PREFIX "%0*llx%0*x",
- SILLYNAME_FILEID_LEN, fileid,
- SILLYNAME_COUNTER_LEN, sillycounter);
+ scnprintf(silly, sizeof(silly),
+ SILLYNAME_PREFIX "%0*llx%0*x",
+ SILLYNAME_FILEID_LEN, fileid,
+ SILLYNAME_COUNTER_LEN, sillycounter);
dfprintk(VFS, "NFS: trying to rename %pd to %s\n",
dentry, silly);
- sdentry = lookup_one_len(silly, dentry->d_parent, slen);
+ sdentry = lookup_noperm(&QSTR(silly), dentry->d_parent);
/*
* N.B. Better to return EBUSY here ... it could be
* dangerous to delete the file while it's in use.
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 5de85d725fb9..23df8b214474 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -63,9 +63,6 @@ static void nfs_clear_request_commit(struct nfs_commit_info *cinfo,
struct nfs_page *req);
static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
struct inode *inode);
-static struct nfs_page *
-nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi,
- struct folio *folio);
static struct kmem_cache *nfs_wdata_cachep;
static mempool_t *nfs_wdata_mempool;
@@ -172,28 +169,23 @@ nfs_cancel_remove_inode(struct nfs_page *req, struct inode *inode)
return 0;
}
-static struct nfs_page *nfs_folio_private_request(struct folio *folio)
-{
- return folio_get_private(folio);
-}
-
/**
- * nfs_folio_find_private_request - find head request associated with a folio
+ * nfs_folio_find_head_request - find head request associated with a folio
* @folio: pointer to folio
*
* must be called while holding the inode lock.
*
* returns matching head request with reference held, or NULL if not found.
*/
-static struct nfs_page *nfs_folio_find_private_request(struct folio *folio)
+static struct nfs_page *nfs_folio_find_head_request(struct folio *folio)
{
- struct address_space *mapping = folio_file_mapping(folio);
+ struct address_space *mapping = folio->mapping;
struct nfs_page *req;
if (!folio_test_private(folio))
return NULL;
spin_lock(&mapping->i_private_lock);
- req = nfs_folio_private_request(folio);
+ req = folio->private;
if (req) {
WARN_ON_ONCE(req->wb_head != req);
kref_get(&req->wb_kref);
@@ -202,86 +194,20 @@ static struct nfs_page *nfs_folio_find_private_request(struct folio *folio)
return req;
}
-static struct nfs_page *nfs_folio_find_swap_request(struct folio *folio)
-{
- struct inode *inode = folio_file_mapping(folio)->host;
- struct nfs_inode *nfsi = NFS_I(inode);
- struct nfs_page *req = NULL;
- if (!folio_test_swapcache(folio))
- return NULL;
- mutex_lock(&nfsi->commit_mutex);
- if (folio_test_swapcache(folio)) {
- req = nfs_page_search_commits_for_head_request_locked(nfsi,
- folio);
- if (req) {
- WARN_ON_ONCE(req->wb_head != req);
- kref_get(&req->wb_kref);
- }
- }
- mutex_unlock(&nfsi->commit_mutex);
- return req;
-}
-
-/**
- * nfs_folio_find_head_request - find head request associated with a folio
- * @folio: pointer to folio
- *
- * returns matching head request with reference held, or NULL if not found.
- */
-static struct nfs_page *nfs_folio_find_head_request(struct folio *folio)
-{
- struct nfs_page *req;
-
- req = nfs_folio_find_private_request(folio);
- if (!req)
- req = nfs_folio_find_swap_request(folio);
- return req;
-}
-
-static struct nfs_page *nfs_folio_find_and_lock_request(struct folio *folio)
-{
- struct inode *inode = folio_file_mapping(folio)->host;
- struct nfs_page *req, *head;
- int ret;
-
- for (;;) {
- req = nfs_folio_find_head_request(folio);
- if (!req)
- return req;
- head = nfs_page_group_lock_head(req);
- if (head != req)
- nfs_release_request(req);
- if (IS_ERR(head))
- return head;
- ret = nfs_cancel_remove_inode(head, inode);
- if (ret < 0) {
- nfs_unlock_and_release_request(head);
- return ERR_PTR(ret);
- }
- /* Ensure that nobody removed the request before we locked it */
- if (head == nfs_folio_private_request(folio))
- break;
- if (folio_test_swapcache(folio))
- break;
- nfs_unlock_and_release_request(head);
- }
- return head;
-}
-
/* Adjust the file length if we're writing beyond the end */
static void nfs_grow_file(struct folio *folio, unsigned int offset,
unsigned int count)
{
- struct inode *inode = folio_file_mapping(folio)->host;
+ struct inode *inode = folio->mapping->host;
loff_t end, i_size;
pgoff_t end_index;
spin_lock(&inode->i_lock);
i_size = i_size_read(inode);
end_index = ((i_size - 1) >> folio_shift(folio)) << folio_order(folio);
- if (i_size > 0 && folio_index(folio) < end_index)
+ if (i_size > 0 && folio->index < end_index)
goto out;
- end = folio_file_pos(folio) + (loff_t)offset + (loff_t)count;
+ end = folio_pos(folio) + (loff_t)offset + (loff_t)count;
if (i_size >= end)
goto out;
trace_nfs_size_grow(inode, end);
@@ -289,6 +215,8 @@ static void nfs_grow_file(struct folio *folio, unsigned int offset,
NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_SIZE;
nfs_inc_stats(inode, NFSIOS_EXTENDWRITE);
out:
+ /* Atomically update timestamps if they are delegated to us. */
+ nfs_update_delegated_mtime_locked(inode);
spin_unlock(&inode->i_lock);
nfs_fscache_invalidate(inode, 0);
}
@@ -309,9 +237,8 @@ static void nfs_set_pageerror(struct address_space *mapping)
static void nfs_mapping_set_error(struct folio *folio, int error)
{
- struct address_space *mapping = folio_file_mapping(folio);
+ struct address_space *mapping = folio->mapping;
- folio_set_error(folio);
filemap_set_wb_err(mapping, error);
if (mapping->host)
errseq_set(&mapping->host->i_sb->s_wb_err,
@@ -410,7 +337,7 @@ int nfs_congestion_kb;
static void nfs_folio_set_writeback(struct folio *folio)
{
- struct nfs_server *nfss = NFS_SERVER(folio_file_mapping(folio)->host);
+ struct nfs_server *nfss = NFS_SERVER(folio->mapping->host);
folio_start_writeback(folio);
if (atomic_long_inc_return(&nfss->writeback) > NFS_CONGESTION_ON_THRESH)
@@ -419,12 +346,14 @@ static void nfs_folio_set_writeback(struct folio *folio)
static void nfs_folio_end_writeback(struct folio *folio)
{
- struct nfs_server *nfss = NFS_SERVER(folio_file_mapping(folio)->host);
+ struct nfs_server *nfss = NFS_SERVER(folio->mapping->host);
folio_end_writeback(folio);
if (atomic_long_dec_return(&nfss->writeback) <
- NFS_CONGESTION_OFF_THRESH)
+ NFS_CONGESTION_OFF_THRESH) {
nfss->write_congested = 0;
+ wake_up_all(&nfss->write_congestion_wait);
+ }
}
static void nfs_page_end_writeback(struct nfs_page *req)
@@ -548,6 +477,74 @@ void nfs_join_page_group(struct nfs_page *head, struct nfs_commit_info *cinfo,
nfs_destroy_unlinked_subrequests(destroy_list, head, inode);
}
+/**
+ * nfs_wait_on_request - Wait for a request to complete.
+ * @req: request to wait upon.
+ *
+ * Interruptible by fatal signals only.
+ * The user is responsible for holding a count on the request.
+ */
+static int nfs_wait_on_request(struct nfs_page *req)
+{
+ if (!test_bit(PG_BUSY, &req->wb_flags))
+ return 0;
+ set_bit(PG_CONTENDED2, &req->wb_flags);
+ smp_mb__after_atomic();
+ return wait_on_bit_io(&req->wb_flags, PG_BUSY,
+ TASK_UNINTERRUPTIBLE);
+}
+
+/*
+ * nfs_unroll_locks - unlock all newly locked reqs and wait on @req
+ * @head: head request of page group, must be holding head lock
+ * @req: request that couldn't lock and needs to wait on the req bit lock
+ *
+ * This is a helper function for nfs_lock_and_join_requests
+ * returns 0 on success, < 0 on error.
+ */
+static void
+nfs_unroll_locks(struct nfs_page *head, struct nfs_page *req)
+{
+ struct nfs_page *tmp;
+
+ /* relinquish all the locks successfully grabbed this run */
+ for (tmp = head->wb_this_page ; tmp != req; tmp = tmp->wb_this_page) {
+ if (!kref_read(&tmp->wb_kref))
+ continue;
+ nfs_unlock_and_release_request(tmp);
+ }
+}
+
+/*
+ * nfs_page_group_lock_subreq - try to lock a subrequest
+ * @head: head request of page group
+ * @subreq: request to lock
+ *
+ * This is a helper function for nfs_lock_and_join_requests which
+ * must be called with the head request and page group both locked.
+ * On error, it returns with the page group unlocked.
+ */
+static int
+nfs_page_group_lock_subreq(struct nfs_page *head, struct nfs_page *subreq)
+{
+ int ret;
+
+ if (!kref_get_unless_zero(&subreq->wb_kref))
+ return 0;
+ while (!nfs_lock_request(subreq)) {
+ nfs_page_group_unlock(head);
+ ret = nfs_wait_on_request(subreq);
+ if (!ret)
+ ret = nfs_page_group_lock(head);
+ if (ret < 0) {
+ nfs_unroll_locks(head, subreq);
+ nfs_release_request(subreq);
+ return ret;
+ }
+ }
+ return 0;
+}
+
/*
* nfs_lock_and_join_requests - join all subreqs to the head req
* @folio: the folio used to lookup the "page group" of nfs_page structures
@@ -565,31 +562,61 @@ void nfs_join_page_group(struct nfs_page *head, struct nfs_commit_info *cinfo,
*/
static struct nfs_page *nfs_lock_and_join_requests(struct folio *folio)
{
- struct inode *inode = folio_file_mapping(folio)->host;
- struct nfs_page *head;
+ struct inode *inode = folio->mapping->host;
+ struct nfs_page *head, *subreq;
struct nfs_commit_info cinfo;
int ret;
- nfs_init_cinfo_from_inode(&cinfo, inode);
/*
* A reference is taken only on the head request which acts as a
* reference to the whole page group - the group will not be destroyed
* until the head reference is released.
*/
- head = nfs_folio_find_and_lock_request(folio);
- if (IS_ERR_OR_NULL(head))
- return head;
+retry:
+ head = nfs_folio_find_head_request(folio);
+ if (!head)
+ return NULL;
- /* lock each request in the page group */
- ret = nfs_page_group_lock_subrequests(head);
- if (ret < 0) {
+ while (!nfs_lock_request(head)) {
+ ret = nfs_wait_on_request(head);
+ if (ret < 0) {
+ nfs_release_request(head);
+ return ERR_PTR(ret);
+ }
+ }
+
+ /* Ensure that nobody removed the request before we locked it */
+ if (head != folio->private) {
nfs_unlock_and_release_request(head);
- return ERR_PTR(ret);
+ goto retry;
}
- nfs_join_page_group(head, &cinfo, inode);
+ ret = nfs_cancel_remove_inode(head, inode);
+ if (ret < 0)
+ goto out_unlock;
+
+ ret = nfs_page_group_lock(head);
+ if (ret < 0)
+ goto out_unlock;
+ /* lock each request in the page group */
+ for (subreq = head->wb_this_page;
+ subreq != head;
+ subreq = subreq->wb_this_page) {
+ ret = nfs_page_group_lock_subreq(head, subreq);
+ if (ret < 0)
+ goto out_unlock;
+ }
+
+ nfs_page_group_unlock(head);
+
+ nfs_init_cinfo_from_inode(&cinfo, inode);
+ nfs_join_page_group(head, &cinfo, inode);
return head;
+
+out_unlock:
+ nfs_unlock_and_release_request(head);
+ return ERR_PTR(ret);
}
static void nfs_write_error(struct nfs_page *req, int error)
@@ -641,7 +668,7 @@ static int nfs_page_async_flush(struct folio *folio,
nfs_redirty_request(req);
pgio->pg_error = 0;
} else
- nfs_add_stats(folio_file_mapping(folio)->host,
+ nfs_add_stats(folio->mapping->host,
NFSIOS_WRITEPAGES, 1);
out:
return ret;
@@ -653,7 +680,7 @@ out_launder:
static int nfs_do_writepage(struct folio *folio, struct writeback_control *wbc,
struct nfs_pageio_descriptor *pgio)
{
- nfs_pageio_cond_complete(pgio, folio_index(folio));
+ nfs_pageio_cond_complete(pgio, folio->index);
return nfs_page_async_flush(folio, wbc, pgio);
}
@@ -664,7 +691,7 @@ static int nfs_writepage_locked(struct folio *folio,
struct writeback_control *wbc)
{
struct nfs_pageio_descriptor pgio;
- struct inode *inode = folio_file_mapping(folio)->host;
+ struct inode *inode = folio->mapping->host;
int err;
nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
@@ -698,12 +725,17 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
struct nfs_pageio_descriptor pgio;
struct nfs_io_completion *ioc = NULL;
unsigned int mntflags = NFS_SERVER(inode)->flags;
+ struct nfs_server *nfss = NFS_SERVER(inode);
int priority = 0;
int err;
- if (wbc->sync_mode == WB_SYNC_NONE &&
- NFS_SERVER(inode)->write_congested)
- return 0;
+ /* Wait with writeback until write congestion eases */
+ if (wbc->sync_mode == WB_SYNC_NONE && nfss->write_congested) {
+ err = wait_event_killable(nfss->write_congestion_wait,
+ nfss->write_congested == 0);
+ if (err)
+ return err;
+ }
nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
@@ -742,24 +774,17 @@ out_err:
static void nfs_inode_add_request(struct nfs_page *req)
{
struct folio *folio = nfs_page_to_folio(req);
- struct address_space *mapping = folio_file_mapping(folio);
+ struct address_space *mapping = folio->mapping;
struct nfs_inode *nfsi = NFS_I(mapping->host);
WARN_ON_ONCE(req->wb_this_page != req);
/* Lock the request! */
nfs_lock_request(req);
-
- /*
- * Swap-space should not get truncated. Hence no need to plug the race
- * with invalidate/truncate.
- */
spin_lock(&mapping->i_private_lock);
- if (likely(!folio_test_swapcache(folio))) {
- set_bit(PG_MAPPED, &req->wb_flags);
- folio_set_private(folio);
- folio->private = req;
- }
+ set_bit(PG_MAPPED, &req->wb_flags);
+ folio_set_private(folio);
+ folio->private = req;
spin_unlock(&mapping->i_private_lock);
atomic_long_inc(&nfsi->nrequests);
/* this a head request for a page group - mark it as having an
@@ -779,10 +804,10 @@ static void nfs_inode_remove_request(struct nfs_page *req)
if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) {
struct folio *folio = nfs_page_to_folio(req->wb_head);
- struct address_space *mapping = folio_file_mapping(folio);
+ struct address_space *mapping = folio->mapping;
spin_lock(&mapping->i_private_lock);
- if (likely(folio && !folio_test_swapcache(folio))) {
+ if (likely(folio)) {
folio->private = NULL;
folio_clear_private(folio);
clear_bit(PG_MAPPED, &req->wb_head->wb_flags);
@@ -803,38 +828,6 @@ static void nfs_mark_request_dirty(struct nfs_page *req)
filemap_dirty_folio(folio_mapping(folio), folio);
}
-/*
- * nfs_page_search_commits_for_head_request_locked
- *
- * Search through commit lists on @inode for the head request for @folio.
- * Must be called while holding the inode (which is cinfo) lock.
- *
- * Returns the head request if found, or NULL if not found.
- */
-static struct nfs_page *
-nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi,
- struct folio *folio)
-{
- struct nfs_page *freq, *t;
- struct nfs_commit_info cinfo;
- struct inode *inode = &nfsi->vfs_inode;
-
- nfs_init_cinfo_from_inode(&cinfo, inode);
-
- /* search through pnfs commit lists */
- freq = pnfs_search_commit_reqs(inode, &cinfo, folio);
- if (freq)
- return freq->wb_head;
-
- /* Linearly search the commit list for the correct request */
- list_for_each_entry_safe(freq, t, &cinfo.mds->list, wb_list) {
- if (nfs_page_to_folio(freq) == folio)
- return freq->wb_head;
- }
-
- return NULL;
-}
-
/**
* nfs_request_add_commit_list_locked - add request to a commit list
* @req: pointer to a struct nfs_page
@@ -941,7 +934,7 @@ static void nfs_folio_clear_commit(struct folio *folio)
long nr = folio_nr_pages(folio);
node_stat_mod_folio(folio, NR_WRITEBACK, -nr);
- wb_stat_mod(&inode_to_bdi(folio_file_mapping(folio)->host)->wb,
+ wb_stat_mod(&inode_to_bdi(folio->mapping->host)->wb,
WB_WRITEBACK, -nr);
}
}
@@ -1126,7 +1119,7 @@ out_flushme:
*/
nfs_mark_request_dirty(req);
nfs_unlock_and_release_request(req);
- error = nfs_wb_folio(folio_file_mapping(folio)->host, folio);
+ error = nfs_wb_folio(folio->mapping->host, folio);
return (error < 0) ? ERR_PTR(error) : NULL;
}
@@ -1202,7 +1195,7 @@ int nfs_flush_incompatible(struct file *file, struct folio *folio)
nfs_release_request(req);
if (!do_flush)
return 0;
- status = nfs_wb_folio(folio_file_mapping(folio)->host, folio);
+ status = nfs_wb_folio(folio->mapping->host, folio);
} while (status == 0);
return status;
}
@@ -1276,7 +1269,7 @@ out:
*/
static bool nfs_folio_write_uptodate(struct folio *folio, unsigned int pagelen)
{
- struct inode *inode = folio_file_mapping(folio)->host;
+ struct inode *inode = folio->mapping->host;
struct nfs_inode *nfsi = NFS_I(inode);
if (nfs_have_delegated_attributes(inode))
@@ -1315,12 +1308,15 @@ static int nfs_can_extend_write(struct file *file, struct folio *folio,
struct file_lock_context *flctx = locks_inode_context(inode);
struct file_lock *fl;
int ret;
+ unsigned int mntflags = NFS_SERVER(inode)->flags;
+ if (mntflags & NFS_MOUNT_NO_ALIGNWRITE)
+ return 0;
if (file->f_flags & O_DSYNC)
return 0;
if (!nfs_folio_write_uptodate(folio, pagelen))
return 0;
- if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
+ if (nfs_have_write_delegation(inode))
return 1;
if (!flctx || (list_empty_careful(&flctx->flc_flock) &&
list_empty_careful(&flctx->flc_posix)))
@@ -1354,7 +1350,7 @@ int nfs_update_folio(struct file *file, struct folio *folio,
unsigned int offset, unsigned int count)
{
struct nfs_open_context *ctx = nfs_file_open_context(file);
- struct address_space *mapping = folio_file_mapping(folio);
+ struct address_space *mapping = folio->mapping;
struct inode *inode = mapping->host;
unsigned int pagelen = nfs_folio_length(folio);
int status = 0;
@@ -1362,14 +1358,18 @@ int nfs_update_folio(struct file *file, struct folio *folio,
nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE);
dprintk("NFS: nfs_update_folio(%pD2 %d@%lld)\n", file, count,
- (long long)(folio_file_pos(folio) + offset));
+ (long long)(folio_pos(folio) + offset));
if (!count)
goto out;
if (nfs_can_extend_write(file, folio, pagelen)) {
- count = max(count + offset, pagelen);
- offset = 0;
+ unsigned int end = count + offset;
+
+ offset = round_down(offset, PAGE_SIZE);
+ if (end < pagelen)
+ end = min(round_up(end, PAGE_SIZE), pagelen);
+ count = end - offset;
}
status = nfs_writepage_setup(ctx, folio, offset, count);
@@ -1514,6 +1514,13 @@ void nfs_writeback_update_inode(struct nfs_pgio_header *hdr)
struct nfs_fattr *fattr = &hdr->fattr;
struct inode *inode = hdr->inode;
+ if (nfs_have_delegated_mtime(inode)) {
+ spin_lock(&inode->i_lock);
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS);
+ spin_unlock(&inode->i_lock);
+ return;
+ }
+
spin_lock(&inode->i_lock);
nfs_writeback_check_extend(hdr, fattr);
nfs_post_op_update_inode_force_wcc_locked(inode, fattr);
@@ -1670,7 +1677,8 @@ EXPORT_SYMBOL_GPL(nfs_commitdata_release);
int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data,
const struct nfs_rpc_ops *nfs_ops,
const struct rpc_call_ops *call_ops,
- int how, int flags)
+ int how, int flags,
+ struct nfsd_file *localio)
{
struct rpc_task *task;
int priority = flush_task_priority(how);
@@ -1699,6 +1707,9 @@ int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data,
dprintk("NFS: initiated commit call\n");
+ if (localio)
+ return nfs_local_commit(localio, data, call_ops, how);
+
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
@@ -1798,6 +1809,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how,
struct nfs_commit_info *cinfo)
{
struct nfs_commit_data *data;
+ struct nfsd_file *localio;
unsigned short task_flags = 0;
/* another commit raced with us */
@@ -1814,9 +1826,13 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how,
nfs_init_commit(data, head, NULL, cinfo);
if (NFS_SERVER(inode)->nfs_client->cl_minorversion)
task_flags = RPC_TASK_MOVEABLE;
+
+ localio = nfs_local_open_fh(NFS_SERVER(inode)->nfs_client, data->cred,
+ data->args.fh, &data->context->nfl,
+ data->context->mode);
return nfs_initiate_commit(NFS_CLIENT(inode), data, NFS_PROTO(inode),
data->mds_ops, how,
- RPC_TASK_CRED_NOREF | task_flags);
+ RPC_TASK_CRED_NOREF | task_flags, localio);
}
/*
@@ -1837,7 +1853,6 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
struct nfs_page *req;
int status = data->task.tk_status;
struct nfs_commit_info cinfo;
- struct nfs_server *nfss;
struct folio *folio;
while (!list_empty(&data->pages)) {
@@ -1880,9 +1895,6 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
/* Latency breaker */
cond_resched();
}
- nfss = NFS_SERVER(data->inode);
- if (atomic_long_read(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
- nfss->write_congested = 0;
nfs_init_cinfo(&cinfo, data->inode, data->dreq);
nfs_commit_end(cinfo.mds);
@@ -2073,17 +2085,17 @@ int nfs_wb_folio_cancel(struct inode *inode, struct folio *folio)
*/
int nfs_wb_folio(struct inode *inode, struct folio *folio)
{
- loff_t range_start = folio_file_pos(folio);
- loff_t range_end = range_start + (loff_t)folio_size(folio) - 1;
+ loff_t range_start = folio_pos(folio);
+ size_t len = folio_size(folio);
struct writeback_control wbc = {
.sync_mode = WB_SYNC_ALL,
.nr_to_write = 0,
.range_start = range_start,
- .range_end = range_end,
+ .range_end = range_start + len - 1,
};
int ret;
- trace_nfs_writeback_folio(inode, folio);
+ trace_nfs_writeback_folio(inode, range_start, len);
for (;;) {
folio_wait_writeback(folio);
@@ -2101,7 +2113,7 @@ int nfs_wb_folio(struct inode *inode, struct folio *folio)
goto out_error;
}
out_error:
- trace_nfs_writeback_folio_done(inode, folio, ret);
+ trace_nfs_writeback_folio_done(inode, range_start, len, ret);
return ret;
}
@@ -2120,10 +2132,10 @@ int nfs_migrate_folio(struct address_space *mapping, struct folio *dst,
if (folio_test_private(src))
return -EBUSY;
- if (folio_test_fscache(src)) {
+ if (folio_test_private_2(src)) { /* [DEPRECATED] */
if (mode == MIGRATE_ASYNC)
return -EBUSY;
- folio_wait_fscache(src);
+ folio_wait_private_2(src);
}
return migrate_folio(mapping, dst, src, mode);