summaryrefslogtreecommitdiff
path: root/fs/nfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/nfs')
-rw-r--r--fs/nfs/Kconfig8
-rw-r--r--fs/nfs/Makefile1
-rw-r--r--fs/nfs/blocklayout/blocklayout.c38
-rw-r--r--fs/nfs/blocklayout/blocklayout.h11
-rw-r--r--fs/nfs/blocklayout/dev.c164
-rw-r--r--fs/nfs/callback.c9
-rw-r--r--fs/nfs/callback.h5
-rw-r--r--fs/nfs/callback_proc.c21
-rw-r--r--fs/nfs/callback_xdr.c48
-rw-r--r--fs/nfs/client.c99
-rw-r--r--fs/nfs/delegation.c132
-rw-r--r--fs/nfs/delegation.h46
-rw-r--r--fs/nfs/dir.c199
-rw-r--r--fs/nfs/direct.c68
-rw-r--r--fs/nfs/file.c75
-rw-r--r--fs/nfs/filelayout/filelayout.c31
-rw-r--r--fs/nfs/filelayout/filelayoutdev.c2
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c100
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.h1
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayoutdev.c6
-rw-r--r--fs/nfs/fs_context.c26
-rw-r--r--fs/nfs/fscache.c48
-rw-r--r--fs/nfs/fscache.h12
-rw-r--r--fs/nfs/getroot.c2
-rw-r--r--fs/nfs/inode.c193
-rw-r--r--fs/nfs/internal.h104
-rw-r--r--fs/nfs/io.c44
-rw-r--r--fs/nfs/iostat.h9
-rw-r--r--fs/nfs/localio.c867
-rw-r--r--fs/nfs/mount_clnt.c5
-rw-r--r--fs/nfs/namespace.c4
-rw-r--r--fs/nfs/netns.h2
-rw-r--r--fs/nfs/nfs.h4
-rw-r--r--fs/nfs/nfs2super.c1
-rw-r--r--fs/nfs/nfs2xdr.c70
-rw-r--r--fs/nfs/nfs3client.c1
-rw-r--r--fs/nfs/nfs3proc.c64
-rw-r--r--fs/nfs/nfs3super.c1
-rw-r--r--fs/nfs/nfs3xdr.c108
-rw-r--r--fs/nfs/nfs42.h7
-rw-r--r--fs/nfs/nfs42proc.c26
-rw-r--r--fs/nfs/nfs42xattr.c6
-rw-r--r--fs/nfs/nfs42xdr.c4
-rw-r--r--fs/nfs/nfs4_fs.h8
-rw-r--r--fs/nfs/nfs4client.c8
-rw-r--r--fs/nfs/nfs4file.c2
-rw-r--r--fs/nfs/nfs4proc.c483
-rw-r--r--fs/nfs/nfs4state.c85
-rw-r--r--fs/nfs/nfs4super.c25
-rw-r--r--fs/nfs/nfs4sysctl.c2
-rw-r--r--fs/nfs/nfs4trace.c9
-rw-r--r--fs/nfs/nfs4trace.h184
-rw-r--r--fs/nfs/nfs4xdr.c247
-rw-r--r--fs/nfs/nfsroot.c4
-rw-r--r--fs/nfs/nfstrace.h106
-rw-r--r--fs/nfs/pagelist.c139
-rw-r--r--fs/nfs/pnfs.c267
-rw-r--r--fs/nfs/pnfs.h56
-rw-r--r--fs/nfs/pnfs_dev.c3
-rw-r--r--fs/nfs/pnfs_nfs.c93
-rw-r--r--fs/nfs/proc.c17
-rw-r--r--fs/nfs/read.c85
-rw-r--r--fs/nfs/super.c36
-rw-r--r--fs/nfs/symlink.c12
-rw-r--r--fs/nfs/sysctl.c2
-rw-r--r--fs/nfs/sysfs.c6
-rw-r--r--fs/nfs/unlink.c2
-rw-r--r--fs/nfs/write.c364
68 files changed, 3377 insertions, 1540 deletions
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index f7e32d76e34d..d3f76101ad4b 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -4,6 +4,7 @@ config NFS_FS
depends on INET && FILE_LOCKING && MULTIUSER
select LOCKD
select SUNRPC
+ select NFS_COMMON
select NFS_ACL_SUPPORT if NFS_V3_ACL
help
Choose Y here if you want to access files residing on other
@@ -33,12 +34,12 @@ config NFS_FS
config NFS_V2
tristate "NFS client support for NFS version 2"
depends on NFS_FS
- default y
+ default n
help
This option enables support for version 2 of the NFS protocol
(RFC 1094) in the kernel's NFS client.
- If unsure, say Y.
+ If unsure, say N.
config NFS_V3
tristate "NFS client support for NFS version 3"
@@ -169,7 +170,8 @@ config ROOT_NFS
config NFS_FSCACHE
bool "Provide NFS client caching support"
- depends on NFS_FS=m && NETFS_SUPPORT || NFS_FS=y && NETFS_SUPPORT=y
+ depends on NFS_FS
+ select NETFS_SUPPORT
select FSCACHE
help
Say Y here if you want NFS data to be cached locally on disc through
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 5f6db37f461e..9fb2f2cac87e 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -13,6 +13,7 @@ nfs-y := client.o dir.o file.o getroot.o inode.o super.o \
nfs-$(CONFIG_ROOT_NFS) += nfsroot.o
nfs-$(CONFIG_SYSCTL) += sysctl.o
nfs-$(CONFIG_NFS_FSCACHE) += fscache.o
+nfs-$(CONFIG_NFS_LOCALIO) += localio.o
obj-$(CONFIG_NFS_V2) += nfsv2.o
nfsv2-y := nfs2super.o proc.o nfs2xdr.o
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 6be13e0ec170..47189476b553 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -564,25 +564,45 @@ bl_find_get_deviceid(struct nfs_server *server,
gfp_t gfp_mask)
{
struct nfs4_deviceid_node *node;
- unsigned long start, end;
+ int err = -ENODEV;
retry:
node = nfs4_find_get_deviceid(server, id, cred, gfp_mask);
if (!node)
return ERR_PTR(-ENODEV);
- if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags) == 0)
- return node;
+ /*
+ * Devices that are marked unavailable are left in the cache with a
+ * timeout to avoid sending GETDEVINFO after every LAYOUTGET, or
+ * constantly attempting to register the device. Once marked as
+ * unavailable they must be deleted and never reused.
+ */
+ if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags)) {
+ unsigned long end = jiffies;
+ unsigned long start = end - PNFS_DEVICE_RETRY_TIMEOUT;
+
+ if (!time_in_range(node->timestamp_unavailable, start, end)) {
+ /* Uncork subsequent GETDEVINFO operations for this device */
+ nfs4_delete_deviceid(node->ld, node->nfs_client, id);
+ goto retry;
+ }
+ goto out_put;
+ }
- end = jiffies;
- start = end - PNFS_DEVICE_RETRY_TIMEOUT;
- if (!time_in_range(node->timestamp_unavailable, start, end)) {
- nfs4_delete_deviceid(node->ld, node->nfs_client, id);
- goto retry;
+ if (!bl_register_dev(container_of(node, struct pnfs_block_dev, node))) {
+ /*
+ * If we cannot register, treat this device as transient:
+ * Make a negative cache entry for the device
+ */
+ nfs4_mark_deviceid_unavailable(node);
+ goto out_put;
}
+ return node;
+
+out_put:
nfs4_put_deviceid_node(node);
- return ERR_PTR(-ENODEV);
+ return ERR_PTR(err);
}
static int
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index b4294a8aa2d4..6da40ca19570 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -104,20 +104,26 @@ struct pnfs_block_dev {
u64 start;
u64 len;
+ enum pnfs_block_volume_type type;
u32 nr_children;
struct pnfs_block_dev *children;
u64 chunk_size;
- struct bdev_handle *bdev_handle;
+ struct file *bdev_file;
u64 disk_offset;
+ unsigned long flags;
u64 pr_key;
- bool pr_registered;
bool (*map)(struct pnfs_block_dev *dev, u64 offset,
struct pnfs_block_dev_map *map);
};
+/* pnfs_block_dev flag bits */
+enum {
+ PNFS_BDEV_REGISTERED = 0,
+};
+
/* sector_t fields are all in 512-byte sectors */
struct pnfs_block_extent {
union {
@@ -172,6 +178,7 @@ struct bl_msg_hdr {
#define BL_DEVICE_REQUEST_ERR 0x2 /* User level process fails */
/* dev.c */
+bool bl_register_dev(struct pnfs_block_dev *d);
struct nfs4_deviceid_node *bl_alloc_deviceid_node(struct nfs_server *server,
struct pnfs_device *pdev, gfp_t gfp_mask);
void bl_free_deviceid_node(struct nfs4_deviceid_node *d);
diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c
index c97ebc42ec0f..cab8809f0e0f 100644
--- a/fs/nfs/blocklayout/dev.c
+++ b/fs/nfs/blocklayout/dev.c
@@ -10,12 +10,81 @@
#include <linux/pr.h>
#include "blocklayout.h"
+#include "../nfs4trace.h"
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
+static void bl_unregister_scsi(struct pnfs_block_dev *dev)
+{
+ struct block_device *bdev = file_bdev(dev->bdev_file);
+ const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
+ int status;
+
+ status = ops->pr_register(bdev, dev->pr_key, 0, false);
+ if (status)
+ trace_bl_pr_key_unreg_err(bdev, dev->pr_key, status);
+ else
+ trace_bl_pr_key_unreg(bdev, dev->pr_key);
+}
+
+static bool bl_register_scsi(struct pnfs_block_dev *dev)
+{
+ struct block_device *bdev = file_bdev(dev->bdev_file);
+ const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
+ int status;
+
+ if (test_and_set_bit(PNFS_BDEV_REGISTERED, &dev->flags))
+ return true;
+
+ status = ops->pr_register(bdev, 0, dev->pr_key, true);
+ if (status) {
+ trace_bl_pr_key_reg_err(bdev, dev->pr_key, status);
+ return false;
+ }
+ trace_bl_pr_key_reg(bdev, dev->pr_key);
+ return true;
+}
+
+static void bl_unregister_dev(struct pnfs_block_dev *dev)
+{
+ u32 i;
+
+ if (dev->nr_children) {
+ for (i = 0; i < dev->nr_children; i++)
+ bl_unregister_dev(&dev->children[i]);
+ return;
+ }
+
+ if (dev->type == PNFS_BLOCK_VOLUME_SCSI &&
+ test_and_clear_bit(PNFS_BDEV_REGISTERED, &dev->flags))
+ bl_unregister_scsi(dev);
+}
+
+bool bl_register_dev(struct pnfs_block_dev *dev)
+{
+ u32 i;
+
+ if (dev->nr_children) {
+ for (i = 0; i < dev->nr_children; i++) {
+ if (!bl_register_dev(&dev->children[i])) {
+ while (i > 0)
+ bl_unregister_dev(&dev->children[--i]);
+ return false;
+ }
+ }
+ return true;
+ }
+
+ if (dev->type == PNFS_BLOCK_VOLUME_SCSI)
+ return bl_register_scsi(dev);
+ return true;
+}
+
static void
bl_free_device(struct pnfs_block_dev *dev)
{
+ bl_unregister_dev(dev);
+
if (dev->nr_children) {
int i;
@@ -23,19 +92,8 @@ bl_free_device(struct pnfs_block_dev *dev)
bl_free_device(&dev->children[i]);
kfree(dev->children);
} else {
- if (dev->pr_registered) {
- const struct pr_ops *ops =
- dev->bdev_handle->bdev->bd_disk->fops->pr_ops;
- int error;
-
- error = ops->pr_register(dev->bdev_handle->bdev,
- dev->pr_key, 0, false);
- if (error)
- pr_err("failed to unregister PR key.\n");
- }
-
- if (dev->bdev_handle)
- bdev_release(dev->bdev_handle);
+ if (dev->bdev_file)
+ fput(dev->bdev_file);
}
}
@@ -169,7 +227,7 @@ static bool bl_map_simple(struct pnfs_block_dev *dev, u64 offset,
map->start = dev->start;
map->len = dev->len;
map->disk_offset = dev->disk_offset;
- map->bdev = dev->bdev_handle->bdev;
+ map->bdev = file_bdev(dev->bdev_file);
return true;
}
@@ -236,26 +294,26 @@ bl_parse_simple(struct nfs_server *server, struct pnfs_block_dev *d,
struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
{
struct pnfs_block_volume *v = &volumes[idx];
- struct bdev_handle *bdev_handle;
+ struct file *bdev_file;
dev_t dev;
dev = bl_resolve_deviceid(server, v, gfp_mask);
if (!dev)
return -EIO;
- bdev_handle = bdev_open_by_dev(dev, BLK_OPEN_READ | BLK_OPEN_WRITE,
+ bdev_file = bdev_file_open_by_dev(dev, BLK_OPEN_READ | BLK_OPEN_WRITE,
NULL, NULL);
- if (IS_ERR(bdev_handle)) {
+ if (IS_ERR(bdev_file)) {
printk(KERN_WARNING "pNFS: failed to open device %d:%d (%ld)\n",
- MAJOR(dev), MINOR(dev), PTR_ERR(bdev_handle));
- return PTR_ERR(bdev_handle);
+ MAJOR(dev), MINOR(dev), PTR_ERR(bdev_file));
+ return PTR_ERR(bdev_file);
}
- d->bdev_handle = bdev_handle;
- d->len = bdev_nr_bytes(bdev_handle->bdev);
+ d->bdev_file = bdev_file;
+ d->len = bdev_nr_bytes(file_bdev(bdev_file));
d->map = bl_map_simple;
printk(KERN_INFO "pNFS: using block device %s\n",
- bdev_handle->bdev->bd_disk->disk_name);
+ file_bdev(bdev_file)->bd_disk->disk_name);
return 0;
}
@@ -300,10 +358,10 @@ bl_validate_designator(struct pnfs_block_volume *v)
}
}
-static struct bdev_handle *
+static struct file *
bl_open_path(struct pnfs_block_volume *v, const char *prefix)
{
- struct bdev_handle *bdev_handle;
+ struct file *bdev_file;
const char *devname;
devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/%s%*phN",
@@ -311,15 +369,15 @@ bl_open_path(struct pnfs_block_volume *v, const char *prefix)
if (!devname)
return ERR_PTR(-ENOMEM);
- bdev_handle = bdev_open_by_path(devname, BLK_OPEN_READ | BLK_OPEN_WRITE,
+ bdev_file = bdev_file_open_by_path(devname, BLK_OPEN_READ | BLK_OPEN_WRITE,
NULL, NULL);
- if (IS_ERR(bdev_handle)) {
- pr_warn("pNFS: failed to open device %s (%ld)\n",
- devname, PTR_ERR(bdev_handle));
+ if (IS_ERR(bdev_file)) {
+ dprintk("failed to open device %s (%ld)\n",
+ devname, PTR_ERR(bdev_file));
}
kfree(devname);
- return bdev_handle;
+ return bdev_file;
}
static int
@@ -327,8 +385,9 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
{
struct pnfs_block_volume *v = &volumes[idx];
- struct bdev_handle *bdev_handle;
+ struct block_device *bdev;
const struct pr_ops *ops;
+ struct file *bdev_file;
int error;
if (!bl_validate_designator(v))
@@ -340,43 +399,38 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
* On other distributions like Debian, the default SCSI by-id path will
* point to the dm-multipath device if one exists.
*/
- bdev_handle = bl_open_path(v, "dm-uuid-mpath-0x");
- if (IS_ERR(bdev_handle))
- bdev_handle = bl_open_path(v, "wwn-0x");
- if (IS_ERR(bdev_handle))
- return PTR_ERR(bdev_handle);
- d->bdev_handle = bdev_handle;
-
- d->len = bdev_nr_bytes(d->bdev_handle->bdev);
+ bdev_file = bl_open_path(v, "dm-uuid-mpath-0x");
+ if (IS_ERR(bdev_file))
+ bdev_file = bl_open_path(v, "wwn-0x");
+ if (IS_ERR(bdev_file))
+ bdev_file = bl_open_path(v, "nvme-eui.");
+ if (IS_ERR(bdev_file)) {
+ pr_warn("pNFS: no device found for volume %*phN\n",
+ v->scsi.designator_len, v->scsi.designator);
+ return PTR_ERR(bdev_file);
+ }
+ d->bdev_file = bdev_file;
+ bdev = file_bdev(bdev_file);
+
+ d->len = bdev_nr_bytes(bdev);
d->map = bl_map_simple;
d->pr_key = v->scsi.pr_key;
if (d->len == 0)
return -ENODEV;
- pr_info("pNFS: using block device %s (reservation key 0x%llx)\n",
- d->bdev_handle->bdev->bd_disk->disk_name, d->pr_key);
-
- ops = d->bdev_handle->bdev->bd_disk->fops->pr_ops;
+ ops = bdev->bd_disk->fops->pr_ops;
if (!ops) {
pr_err("pNFS: block device %s does not support reservations.",
- d->bdev_handle->bdev->bd_disk->disk_name);
+ bdev->bd_disk->disk_name);
error = -EINVAL;
goto out_blkdev_put;
}
- error = ops->pr_register(d->bdev_handle->bdev, 0, d->pr_key, true);
- if (error) {
- pr_err("pNFS: failed to register key for block device %s.",
- d->bdev_handle->bdev->bd_disk->disk_name);
- goto out_blkdev_put;
- }
-
- d->pr_registered = true;
return 0;
out_blkdev_put:
- bdev_release(d->bdev_handle);
+ fput(d->bdev_file);
return error;
}
@@ -458,7 +512,9 @@ static int
bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
{
- switch (volumes[idx].type) {
+ d->type = volumes[idx].type;
+
+ switch (d->type) {
case PNFS_BLOCK_VOLUME_SIMPLE:
return bl_parse_simple(server, d, volumes, idx, gfp_mask);
case PNFS_BLOCK_VOLUME_SLICE:
@@ -470,7 +526,7 @@ bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
case PNFS_BLOCK_VOLUME_SCSI:
return bl_parse_scsi(server, d, volumes, idx, gfp_mask);
default:
- dprintk("unsupported volume type: %d\n", volumes[idx].type);
+ dprintk("unsupported volume type: %d\n", d->type);
return -EIO;
}
}
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 760d27dd7225..86bdc7d23fb9 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -76,6 +76,8 @@ nfs4_callback_svc(void *vrqstp)
{
struct svc_rqst *rqstp = vrqstp;
+ svc_thread_init_status(rqstp, 0);
+
set_freezable();
while (!svc_thread_should_stop(rqstp))
@@ -209,10 +211,6 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion)
return ERR_PTR(-ENOMEM);
}
cb_info->serv = serv;
- /* As there is only one thread we need to over-ride the
- * default maximum of 80 connections
- */
- serv->sv_maxconn = 1024;
dprintk("nfs_callback_create_svc: service created\n");
return serv;
}
@@ -356,15 +354,12 @@ static const struct svc_version *nfs4_callback_version[] = {
[4] = &nfs4_callback_version4,
};
-static struct svc_stat nfs4_callback_stats;
-
static struct svc_program nfs4_callback_program = {
.pg_prog = NFS4_CALLBACK, /* RPC service number */
.pg_nvers = ARRAY_SIZE(nfs4_callback_version), /* Number of entries */
.pg_vers = nfs4_callback_version, /* version table */
.pg_name = "NFSv4 callback", /* service name */
.pg_class = "nfs", /* authentication class */
- .pg_stats = &nfs4_callback_stats,
.pg_authenticate = nfs_callback_authenticate,
.pg_init_request = svc_generic_init_request,
.pg_rpcbind_set = svc_generic_rpcbind_set,
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index 650758ee0d5f..154a6ed1299f 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -46,14 +46,15 @@ struct cb_compound_hdr_res {
struct cb_getattrargs {
struct nfs_fh fh;
- uint32_t bitmap[2];
+ uint32_t bitmap[3];
};
struct cb_getattrres {
__be32 status;
- uint32_t bitmap[2];
+ uint32_t bitmap[3];
uint64_t size;
uint64_t change_attr;
+ struct timespec64 atime;
struct timespec64 ctime;
struct timespec64 mtime;
};
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 76cea34477ae..8397c43358bd 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -37,7 +37,7 @@ __be32 nfs4_callback_getattr(void *argp, void *resp,
if (!cps->clp) /* Always set for v4.0. Set in cb_sequence for v4.1 */
goto out;
- res->bitmap[0] = res->bitmap[1] = 0;
+ memset(res->bitmap, 0, sizeof(res->bitmap));
res->status = htonl(NFS4ERR_BADHANDLE);
dprintk_rcu("NFS: GETATTR callback request from %s\n",
@@ -59,12 +59,16 @@ __be32 nfs4_callback_getattr(void *argp, void *resp,
res->change_attr = delegation->change_attr;
if (nfs_have_writebacks(inode))
res->change_attr++;
+ res->atime = inode_get_atime(inode);
res->ctime = inode_get_ctime(inode);
res->mtime = inode_get_mtime(inode);
- res->bitmap[0] = (FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE) &
- args->bitmap[0];
- res->bitmap[1] = (FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY) &
- args->bitmap[1];
+ res->bitmap[0] = (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE) &
+ args->bitmap[0];
+ res->bitmap[1] = (FATTR4_WORD1_TIME_ACCESS |
+ FATTR4_WORD1_TIME_METADATA |
+ FATTR4_WORD1_TIME_MODIFY) & args->bitmap[1];
+ res->bitmap[2] = (FATTR4_WORD2_TIME_DELEG_ACCESS |
+ FATTR4_WORD2_TIME_DELEG_MODIFY) & args->bitmap[2];
res->status = 0;
out_iput:
rcu_read_unlock();
@@ -319,9 +323,10 @@ static u32 initiate_bulk_draining(struct nfs_client *clp,
int stat;
if (args->cbl_recall_type == RETURN_FSID)
- stat = pnfs_destroy_layouts_byfsid(clp, &args->cbl_fsid, true);
+ stat = pnfs_layout_destroy_byfsid(clp, &args->cbl_fsid,
+ PNFS_LAYOUT_BULK_RETURN);
else
- stat = pnfs_destroy_layouts_byclid(clp, true);
+ stat = pnfs_layout_destroy_byclid(clp, PNFS_LAYOUT_BULK_RETURN);
if (stat != 0)
return NFS4ERR_DELAY;
return NFS4ERR_NOMATCHING_LAYOUT;
@@ -713,7 +718,7 @@ __be32 nfs4_callback_offload(void *data, void *dummy,
copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_KERNEL);
if (!copy)
- return htonl(NFS4ERR_SERVERFAULT);
+ return cpu_to_be32(NFS4ERR_DELAY);
spin_lock(&cps->clp->cl_lock);
rcu_read_lock();
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 9369488f2ed4..4254ba3ee7c5 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -25,8 +25,9 @@
#define CB_OP_GETATTR_BITMAP_MAXSZ (4 * 4) // bitmap length, 3 bitmaps
#define CB_OP_GETATTR_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \
CB_OP_GETATTR_BITMAP_MAXSZ + \
- /* change, size, ctime, mtime */\
- (2 + 2 + 3 + 3) * 4)
+ /* change, size, atime, ctime,
+ * mtime, deleg_atime, deleg_mtime */\
+ (2 + 2 + 3 + 3 + 3 + 3 + 3) * 4)
#define CB_OP_RECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ)
#if defined(CONFIG_NFS_V4_1)
@@ -117,7 +118,9 @@ static __be32 decode_bitmap(struct xdr_stream *xdr, uint32_t *bitmap)
if (likely(attrlen > 0))
bitmap[0] = ntohl(*p++);
if (attrlen > 1)
- bitmap[1] = ntohl(*p);
+ bitmap[1] = ntohl(*p++);
+ if (attrlen > 2)
+ bitmap[2] = ntohl(*p);
return 0;
}
@@ -372,6 +375,8 @@ static __be32 decode_rc_list(struct xdr_stream *xdr,
rc_list->rcl_nrefcalls = ntohl(*p++);
if (rc_list->rcl_nrefcalls) {
+ if (unlikely(rc_list->rcl_nrefcalls > xdr->buf->len))
+ goto out;
p = xdr_inline_decode(xdr,
rc_list->rcl_nrefcalls * 2 * sizeof(uint32_t));
if (unlikely(p == NULL))
@@ -445,7 +450,7 @@ static __be32 decode_recallany_args(struct svc_rqst *rqstp,
void *argp)
{
struct cb_recallanyargs *args = argp;
- uint32_t bitmap[2];
+ uint32_t bitmap[3];
__be32 *p, status;
p = xdr_inline_decode(xdr, 4);
@@ -635,6 +640,13 @@ static __be32 encode_attr_time(struct xdr_stream *xdr, const struct timespec64 *
return 0;
}
+static __be32 encode_attr_atime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec64 *time)
+{
+ if (!(bitmap[1] & FATTR4_WORD1_TIME_ACCESS))
+ return 0;
+ return encode_attr_time(xdr,time);
+}
+
static __be32 encode_attr_ctime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec64 *time)
{
if (!(bitmap[1] & FATTR4_WORD1_TIME_METADATA))
@@ -649,6 +661,24 @@ static __be32 encode_attr_mtime(struct xdr_stream *xdr, const uint32_t *bitmap,
return encode_attr_time(xdr,time);
}
+static __be32 encode_attr_delegatime(struct xdr_stream *xdr,
+ const uint32_t *bitmap,
+ const struct timespec64 *time)
+{
+ if (!(bitmap[2] & FATTR4_WORD2_TIME_DELEG_ACCESS))
+ return 0;
+ return encode_attr_time(xdr,time);
+}
+
+static __be32 encode_attr_delegmtime(struct xdr_stream *xdr,
+ const uint32_t *bitmap,
+ const struct timespec64 *time)
+{
+ if (!(bitmap[2] & FATTR4_WORD2_TIME_DELEG_MODIFY))
+ return 0;
+ return encode_attr_time(xdr,time);
+}
+
static __be32 encode_compound_hdr_res(struct xdr_stream *xdr, struct cb_compound_hdr_res *hdr)
{
__be32 status;
@@ -699,10 +729,19 @@ static __be32 encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr,
status = encode_attr_size(xdr, res->bitmap, res->size);
if (unlikely(status != 0))
goto out;
+ status = encode_attr_atime(xdr, res->bitmap, &res->atime);
+ if (unlikely(status != 0))
+ goto out;
status = encode_attr_ctime(xdr, res->bitmap, &res->ctime);
if (unlikely(status != 0))
goto out;
status = encode_attr_mtime(xdr, res->bitmap, &res->mtime);
+ if (unlikely(status != 0))
+ goto out;
+ status = encode_attr_delegatime(xdr, res->bitmap, &res->atime);
+ if (unlikely(status != 0))
+ goto out;
+ status = encode_attr_delegmtime(xdr, res->bitmap, &res->mtime);
*savep = htonl((unsigned int)((char *)xdr->p - (char *)(savep+1)));
out:
return status;
@@ -945,6 +984,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp)
nfs_put_client(cps.clp);
goto out_invalidcred;
}
+ svc_xprt_set_valid(rqstp->rq_xprt);
}
cps.minorversion = hdr_arg.minorversion;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index fbdc9ca80f71..3b0918ade53c 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -38,7 +38,7 @@
#include <linux/sunrpc/bc_xprt.h>
#include <linux/nsproxy.h>
#include <linux/pid_namespace.h>
-
+#include <linux/nfslocalio.h>
#include "nfs4_fs.h"
#include "callback.h"
@@ -55,9 +55,13 @@
#define NFSDBG_FACILITY NFSDBG_CLIENT
static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq);
-static DEFINE_SPINLOCK(nfs_version_lock);
-static DEFINE_MUTEX(nfs_version_mutex);
-static LIST_HEAD(nfs_versions);
+static DEFINE_RWLOCK(nfs_version_lock);
+
+static struct nfs_subversion *nfs_version_mods[5] = {
+ [2] = NULL,
+ [3] = NULL,
+ [4] = NULL,
+};
/*
* RPC cruft for NFS
@@ -73,46 +77,41 @@ const struct rpc_program nfs_program = {
.number = NFS_PROGRAM,
.nrvers = ARRAY_SIZE(nfs_version),
.version = nfs_version,
- .stats = &nfs_rpcstat,
.pipe_dir_name = NFS_PIPE_DIRNAME,
};
-struct rpc_stat nfs_rpcstat = {
- .program = &nfs_program
-};
-
-static struct nfs_subversion *find_nfs_version(unsigned int version)
+static struct nfs_subversion *__find_nfs_version(unsigned int version)
{
struct nfs_subversion *nfs;
- spin_lock(&nfs_version_lock);
-
- list_for_each_entry(nfs, &nfs_versions, list) {
- if (nfs->rpc_ops->version == version) {
- spin_unlock(&nfs_version_lock);
- return nfs;
- }
- }
- spin_unlock(&nfs_version_lock);
- return ERR_PTR(-EPROTONOSUPPORT);
+ read_lock(&nfs_version_lock);
+ nfs = nfs_version_mods[version];
+ read_unlock(&nfs_version_lock);
+ return nfs;
}
-struct nfs_subversion *get_nfs_version(unsigned int version)
+struct nfs_subversion *find_nfs_version(unsigned int version)
{
- struct nfs_subversion *nfs = find_nfs_version(version);
+ struct nfs_subversion *nfs = __find_nfs_version(version);
- if (IS_ERR(nfs)) {
- mutex_lock(&nfs_version_mutex);
- request_module("nfsv%d", version);
- nfs = find_nfs_version(version);
- mutex_unlock(&nfs_version_mutex);
- }
+ if (!nfs && request_module("nfsv%d", version) == 0)
+ nfs = __find_nfs_version(version);
+
+ if (!nfs)
+ return ERR_PTR(-EPROTONOSUPPORT);
- if (!IS_ERR(nfs) && !try_module_get(nfs->owner))
+ if (!get_nfs_version(nfs))
return ERR_PTR(-EAGAIN);
+
return nfs;
}
+int get_nfs_version(struct nfs_subversion *nfs)
+{
+ return try_module_get(nfs->owner);
+}
+EXPORT_SYMBOL_GPL(get_nfs_version);
+
void put_nfs_version(struct nfs_subversion *nfs)
{
module_put(nfs->owner);
@@ -120,23 +119,23 @@ void put_nfs_version(struct nfs_subversion *nfs)
void register_nfs_version(struct nfs_subversion *nfs)
{
- spin_lock(&nfs_version_lock);
+ write_lock(&nfs_version_lock);
- list_add(&nfs->list, &nfs_versions);
+ nfs_version_mods[nfs->rpc_ops->version] = nfs;
nfs_version[nfs->rpc_ops->version] = nfs->rpc_vers;
- spin_unlock(&nfs_version_lock);
+ write_unlock(&nfs_version_lock);
}
EXPORT_SYMBOL_GPL(register_nfs_version);
void unregister_nfs_version(struct nfs_subversion *nfs)
{
- spin_lock(&nfs_version_lock);
+ write_lock(&nfs_version_lock);
nfs_version[nfs->rpc_ops->version] = NULL;
- list_del(&nfs->list);
+ nfs_version_mods[nfs->rpc_ops->version] = NULL;
- spin_unlock(&nfs_version_lock);
+ write_unlock(&nfs_version_lock);
}
EXPORT_SYMBOL_GPL(unregister_nfs_version);
@@ -156,7 +155,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
clp->cl_minorversion = cl_init->minorversion;
clp->cl_nfs_mod = cl_init->nfs_mod;
- if (!try_module_get(clp->cl_nfs_mod->owner))
+ if (!get_nfs_version(clp->cl_nfs_mod))
goto error_dealloc;
clp->rpc_ops = clp->cl_nfs_mod->rpc_ops;
@@ -183,6 +182,13 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
clp->cl_max_connect = cl_init->max_connect ? cl_init->max_connect : 1;
clp->cl_net = get_net(cl_init->net);
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+ seqlock_init(&clp->cl_boot_lock);
+ ktime_get_real_ts64(&clp->cl_nfssvc_boot);
+ nfs_uuid_init(&clp->cl_uuid);
+ INIT_WORK(&clp->cl_local_probe_work, nfs_local_probe_async_work);
+#endif /* CONFIG_NFS_LOCALIO */
+
clp->cl_principal = "*";
clp->cl_xprtsec = cl_init->xprtsec;
return clp;
@@ -238,6 +244,8 @@ static void pnfs_init_server(struct nfs_server *server)
*/
void nfs_free_client(struct nfs_client *clp)
{
+ nfs_localio_disable_client(clp);
+
/* -EIO all pending I/O */
if (!IS_ERR(clp->cl_rpcclient))
rpc_shutdown_client(clp->cl_rpcclient);
@@ -429,7 +437,10 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init)
list_add_tail(&new->cl_share_link,
&nn->nfs_client_list);
spin_unlock(&nn->nfs_client_lock);
- return rpc_ops->init_client(new, cl_init);
+ new = rpc_ops->init_client(new, cl_init);
+ if (!IS_ERR(new))
+ nfs_local_probe(new);
+ return new;
}
spin_unlock(&nn->nfs_client_lock);
@@ -502,6 +513,7 @@ int nfs_create_rpc_client(struct nfs_client *clp,
const struct nfs_client_initdata *cl_init,
rpc_authflavor_t flavor)
{
+ struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
struct rpc_clnt *clnt = NULL;
struct rpc_create_args args = {
.net = clp->cl_net,
@@ -513,6 +525,7 @@ int nfs_create_rpc_client(struct nfs_client *clp,
.servername = clp->cl_hostname,
.nodename = cl_init->nodename,
.program = &nfs_program,
+ .stats = &nn->rpcstats,
.version = clp->rpc_ops->version,
.authflavor = flavor,
.cred = cl_init->cred,
@@ -986,6 +999,7 @@ struct nfs_server *nfs_alloc_server(void)
INIT_LIST_HEAD(&server->layouts);
INIT_LIST_HEAD(&server->state_owners_lru);
INIT_LIST_HEAD(&server->ss_copies);
+ INIT_LIST_HEAD(&server->ss_src_copies);
atomic_set(&server->active, 0);
@@ -997,8 +1011,11 @@ struct nfs_server *nfs_alloc_server(void)
server->change_attr_type = NFS4_CHANGE_TYPE_IS_UNDEFINED;
- ida_init(&server->openowner_id);
- ida_init(&server->lockowner_id);
+ init_waitqueue_head(&server->write_congestion_wait);
+ atomic_long_set(&server->writeback, 0);
+
+ atomic64_set(&server->owner_ctr, 0);
+
pnfs_init_server(server);
rpc_init_wait_queue(&server->uoc_rpcwaitq, "NFS UOC");
@@ -1037,8 +1054,6 @@ void nfs_free_server(struct nfs_server *server)
}
ida_free(&s_sysfs_ids, server->s_sysfs_id);
- ida_destroy(&server->lockowner_id);
- ida_destroy(&server->openowner_id);
put_cred(server->cred);
nfs_release_automount_timer();
call_rcu(&server->rcu, delayed_free);
@@ -1182,6 +1197,8 @@ void nfs_clients_init(struct net *net)
#endif
spin_lock_init(&nn->nfs_client_lock);
nn->boot_time = ktime_get_real();
+ memset(&nn->rpcstats, 0, sizeof(nn->rpcstats));
+ nn->rpcstats.program = &nfs_program;
nfs_netns_sysfs_setup(nn, net);
}
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index fa1a14def45c..4db912f56230 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -82,11 +82,10 @@ static void nfs_mark_return_delegation(struct nfs_server *server,
set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state);
}
-static bool
-nfs4_is_valid_delegation(const struct nfs_delegation *delegation,
- fmode_t flags)
+static bool nfs4_is_valid_delegation(const struct nfs_delegation *delegation,
+ fmode_t type)
{
- if (delegation != NULL && (delegation->type & flags) == flags &&
+ if (delegation != NULL && (delegation->type & type) == type &&
!test_bit(NFS_DELEGATION_REVOKED, &delegation->flags) &&
!test_bit(NFS_DELEGATION_RETURNING, &delegation->flags))
return true;
@@ -103,19 +102,22 @@ struct nfs_delegation *nfs4_get_valid_delegation(const struct inode *inode)
return NULL;
}
-static int
-nfs4_do_check_delegation(struct inode *inode, fmode_t flags, bool mark)
+static int nfs4_do_check_delegation(struct inode *inode, fmode_t type,
+ int flags, bool mark)
{
struct nfs_delegation *delegation;
int ret = 0;
- flags &= FMODE_READ|FMODE_WRITE;
+ type &= FMODE_READ|FMODE_WRITE;
rcu_read_lock();
delegation = rcu_dereference(NFS_I(inode)->delegation);
- if (nfs4_is_valid_delegation(delegation, flags)) {
+ if (nfs4_is_valid_delegation(delegation, type)) {
if (mark)
nfs_mark_delegation_referenced(delegation);
ret = 1;
+ if ((flags & NFS_DELEGATION_FLAG_TIME) &&
+ !test_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags))
+ ret = 0;
}
rcu_read_unlock();
return ret;
@@ -124,22 +126,23 @@ nfs4_do_check_delegation(struct inode *inode, fmode_t flags, bool mark)
* nfs4_have_delegation - check if inode has a delegation, mark it
* NFS_DELEGATION_REFERENCED if there is one.
* @inode: inode to check
- * @flags: delegation types to check for
+ * @type: delegation types to check for
+ * @flags: various modifiers
*
* Returns one if inode has the indicated delegation, otherwise zero.
*/
-int nfs4_have_delegation(struct inode *inode, fmode_t flags)
+int nfs4_have_delegation(struct inode *inode, fmode_t type, int flags)
{
- return nfs4_do_check_delegation(inode, flags, true);
+ return nfs4_do_check_delegation(inode, type, flags, true);
}
/*
* nfs4_check_delegation - check if inode has a delegation, do not mark
* NFS_DELEGATION_REFERENCED if it has one.
*/
-int nfs4_check_delegation(struct inode *inode, fmode_t flags)
+int nfs4_check_delegation(struct inode *inode, fmode_t type)
{
- return nfs4_do_check_delegation(inode, flags, false);
+ return nfs4_do_check_delegation(inode, type, 0, false);
}
static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_stateid *stateid)
@@ -156,8 +159,8 @@ static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_state
list = &flctx->flc_posix;
spin_lock(&flctx->flc_lock);
restart:
- list_for_each_entry(fl, list, fl_list) {
- if (nfs_file_open_context(fl->fl_file)->state != state)
+ for_each_file_lock(fl, list) {
+ if (nfs_file_open_context(fl->c.flc_file)->state != state)
continue;
spin_unlock(&flctx->flc_lock);
status = nfs4_lock_delegation_recall(fl, state, stateid);
@@ -181,7 +184,6 @@ static int nfs_delegation_claim_opens(struct inode *inode,
struct nfs_open_context *ctx;
struct nfs4_state_owner *sp;
struct nfs4_state *state;
- unsigned int seq;
int err;
again:
@@ -202,12 +204,9 @@ again:
sp = state->owner;
/* Block nfs4_proc_unlck */
mutex_lock(&sp->so_delegreturn_mutex);
- seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
err = nfs4_open_delegation_recall(ctx, state, stateid);
if (!err)
err = nfs_delegation_claim_locks(state, stateid);
- if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
- err = -EAGAIN;
mutex_unlock(&sp->so_delegreturn_mutex);
put_nfs_open_context(ctx);
if (err != 0)
@@ -225,11 +224,12 @@ again:
* @type: delegation type
* @stateid: delegation stateid
* @pagemod_limit: write delegation "space_limit"
+ * @deleg_type: raw delegation type
*
*/
void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred,
fmode_t type, const nfs4_stateid *stateid,
- unsigned long pagemod_limit)
+ unsigned long pagemod_limit, u32 deleg_type)
{
struct nfs_delegation *delegation;
const struct cred *oldcred = NULL;
@@ -243,6 +243,14 @@ void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred,
delegation->pagemod_limit = pagemod_limit;
oldcred = delegation->cred;
delegation->cred = get_cred(cred);
+ switch (deleg_type) {
+ case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG:
+ case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG:
+ set_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags);
+ break;
+ default:
+ clear_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags);
+ }
clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags);
if (test_and_clear_bit(NFS_DELEGATION_REVOKED,
&delegation->flags))
@@ -254,11 +262,13 @@ void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred,
} else {
rcu_read_unlock();
nfs_inode_set_delegation(inode, cred, type, stateid,
- pagemod_limit);
+ pagemod_limit, deleg_type);
}
}
-static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync)
+static int nfs_do_return_delegation(struct inode *inode,
+ struct nfs_delegation *delegation,
+ int issync)
{
const struct cred *cred;
int res = 0;
@@ -267,9 +277,8 @@ static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *
spin_lock(&delegation->lock);
cred = get_cred(delegation->cred);
spin_unlock(&delegation->lock);
- res = nfs4_proc_delegreturn(inode, cred,
- &delegation->stateid,
- issync);
+ res = nfs4_proc_delegreturn(inode, cred, &delegation->stateid,
+ delegation, issync);
put_cred(cred);
}
return res;
@@ -422,13 +431,13 @@ nfs_update_inplace_delegation(struct nfs_delegation *delegation,
* @type: delegation type
* @stateid: delegation stateid
* @pagemod_limit: write delegation "space_limit"
+ * @deleg_type: raw delegation type
*
* Returns zero on success, or a negative errno value.
*/
int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred,
- fmode_t type,
- const nfs4_stateid *stateid,
- unsigned long pagemod_limit)
+ fmode_t type, const nfs4_stateid *stateid,
+ unsigned long pagemod_limit, u32 deleg_type)
{
struct nfs_server *server = NFS_SERVER(inode);
struct nfs_client *clp = server->nfs_client;
@@ -448,6 +457,11 @@ int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred,
delegation->cred = get_cred(cred);
delegation->inode = inode;
delegation->flags = 1<<NFS_DELEGATION_REFERENCED;
+ switch (deleg_type) {
+ case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG:
+ case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG:
+ delegation->flags |= BIT(NFS_DELEGATION_DELEGTIME);
+ }
delegation->test_gen = 0;
spin_lock_init(&delegation->lock);
@@ -512,6 +526,11 @@ add_new:
atomic_long_inc(&nfs_active_delegations);
trace_nfs4_set_delegation(inode, type);
+
+ /* If we hold writebacks and have delegated mtime then update */
+ if (deleg_type == NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG &&
+ nfs_have_writebacks(inode))
+ nfs_update_delegated_mtime(inode);
out:
spin_unlock(&clp->cl_lock);
if (delegation != NULL)
@@ -628,6 +647,9 @@ restart:
prev = delegation;
continue;
}
+ inode = nfs_delegation_grab_inode(delegation);
+ if (inode == NULL)
+ continue;
if (prev) {
struct inode *tmp = nfs_delegation_grab_inode(prev);
@@ -638,12 +660,6 @@ restart:
}
}
- inode = nfs_delegation_grab_inode(delegation);
- if (inode == NULL) {
- rcu_read_unlock();
- iput(to_put);
- goto restart;
- }
delegation = nfs_start_delegation_return_locked(NFS_I(inode));
rcu_read_unlock();
@@ -765,6 +781,43 @@ int nfs4_inode_return_delegation(struct inode *inode)
}
/**
+ * nfs4_inode_set_return_delegation_on_close - asynchronously return a delegation
+ * @inode: inode to process
+ *
+ * This routine is called to request that the delegation be returned as soon
+ * as the file is closed. If the file is already closed, the delegation is
+ * immediately returned.
+ */
+void nfs4_inode_set_return_delegation_on_close(struct inode *inode)
+{
+ struct nfs_delegation *delegation;
+ struct nfs_delegation *ret = NULL;
+
+ if (!inode)
+ return;
+ rcu_read_lock();
+ delegation = nfs4_get_valid_delegation(inode);
+ if (!delegation)
+ goto out;
+ spin_lock(&delegation->lock);
+ if (!delegation->inode)
+ goto out_unlock;
+ if (list_empty(&NFS_I(inode)->open_files) &&
+ !test_and_set_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) {
+ /* Refcount matched in nfs_end_delegation_return() */
+ ret = nfs_get_delegation(delegation);
+ } else
+ set_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags);
+out_unlock:
+ spin_unlock(&delegation->lock);
+ if (ret)
+ nfs_clear_verifier_delegated(inode);
+out:
+ rcu_read_unlock();
+ nfs_end_delegation_return(inode, ret, 0);
+}
+
+/**
* nfs4_inode_return_delegation_on_close - asynchronously return a delegation
* @inode: inode to process
*
@@ -985,6 +1038,11 @@ void nfs_delegation_mark_returned(struct inode *inode,
}
nfs_mark_delegation_revoked(delegation);
+ clear_bit(NFS_DELEGATION_RETURNING, &delegation->flags);
+ spin_unlock(&delegation->lock);
+ if (nfs_detach_delegation(NFS_I(inode), delegation, NFS_SERVER(inode)))
+ nfs_put_delegation(delegation);
+ goto out_rcu_unlock;
out_clear_returning:
clear_bit(NFS_DELEGATION_RETURNING, &delegation->flags);
@@ -1165,7 +1223,6 @@ static int nfs_server_reap_unclaimed_delegations(struct nfs_server *server,
struct inode *inode;
restart:
rcu_read_lock();
-restart_locked:
list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
if (test_bit(NFS_DELEGATION_INODE_FREEING,
&delegation->flags) ||
@@ -1176,7 +1233,7 @@ restart_locked:
continue;
inode = nfs_delegation_grab_inode(delegation);
if (inode == NULL)
- goto restart_locked;
+ continue;
delegation = nfs_start_delegation_return_locked(NFS_I(inode));
rcu_read_unlock();
if (delegation != NULL) {
@@ -1299,7 +1356,6 @@ static int nfs_server_reap_expired_delegations(struct nfs_server *server,
restart:
rcu_read_lock();
-restart_locked:
list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
if (test_bit(NFS_DELEGATION_INODE_FREEING,
&delegation->flags) ||
@@ -1311,7 +1367,7 @@ restart_locked:
continue;
inode = nfs_delegation_grab_inode(delegation);
if (inode == NULL)
- goto restart_locked;
+ continue;
spin_lock(&delegation->lock);
cred = get_cred_rcu(delegation->cred);
nfs4_stateid_copy(&stateid, &delegation->stateid);
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index a6f495d012cf..8ff5ab9c5c25 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -38,14 +38,18 @@ enum {
NFS_DELEGATION_TEST_EXPIRED,
NFS_DELEGATION_INODE_FREEING,
NFS_DELEGATION_RETURN_DELAYED,
+ NFS_DELEGATION_DELEGTIME,
};
int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred,
- fmode_t type, const nfs4_stateid *stateid, unsigned long pagemod_limit);
+ fmode_t type, const nfs4_stateid *stateid,
+ unsigned long pagemod_limit, u32 deleg_type);
void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred,
- fmode_t type, const nfs4_stateid *stateid, unsigned long pagemod_limit);
+ fmode_t type, const nfs4_stateid *stateid,
+ unsigned long pagemod_limit, u32 deleg_type);
int nfs4_inode_return_delegation(struct inode *inode);
void nfs4_inode_return_delegation_on_close(struct inode *inode);
+void nfs4_inode_set_return_delegation_on_close(struct inode *inode);
int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
void nfs_inode_evict_delegation(struct inode *inode);
@@ -67,7 +71,9 @@ void nfs_test_expired_all_delegations(struct nfs_client *clp);
void nfs_reap_expired_delegations(struct nfs_client *clp);
/* NFSv4 delegation-related procedures */
-int nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, const nfs4_stateid *stateid, int issync);
+int nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred,
+ const nfs4_stateid *stateid,
+ struct nfs_delegation *delegation, int issync);
int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid);
int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid);
bool nfs4_copy_delegation_stateid(struct inode *inode, fmode_t flags, nfs4_stateid *dst, const struct cred **cred);
@@ -75,8 +81,8 @@ bool nfs4_refresh_delegation_stateid(nfs4_stateid *dst, struct inode *inode);
struct nfs_delegation *nfs4_get_valid_delegation(const struct inode *inode);
void nfs_mark_delegation_referenced(struct nfs_delegation *delegation);
-int nfs4_have_delegation(struct inode *inode, fmode_t flags);
-int nfs4_check_delegation(struct inode *inode, fmode_t flags);
+int nfs4_have_delegation(struct inode *inode, fmode_t type, int flags);
+int nfs4_check_delegation(struct inode *inode, fmode_t type);
bool nfs4_delegation_flush_on_close(const struct inode *inode);
void nfs_inode_find_delegation_state_and_recover(struct inode *inode,
const nfs4_stateid *stateid);
@@ -84,9 +90,37 @@ int nfs4_inode_make_writeable(struct inode *inode);
#endif
+#define NFS_DELEGATION_FLAG_TIME BIT(1)
+
+void nfs_update_delegated_atime(struct inode *inode);
+void nfs_update_delegated_mtime(struct inode *inode);
+void nfs_update_delegated_mtime_locked(struct inode *inode);
+
+static inline int nfs_have_read_or_write_delegation(struct inode *inode)
+{
+ return NFS_PROTO(inode)->have_delegation(inode, FMODE_READ, 0);
+}
+
+static inline int nfs_have_write_delegation(struct inode *inode)
+{
+ return NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE, 0);
+}
+
static inline int nfs_have_delegated_attributes(struct inode *inode)
{
- return NFS_PROTO(inode)->have_delegation(inode, FMODE_READ);
+ return NFS_PROTO(inode)->have_delegation(inode, FMODE_READ, 0);
+}
+
+static inline int nfs_have_delegated_atime(struct inode *inode)
+{
+ return NFS_PROTO(inode)->have_delegation(inode, FMODE_READ,
+ NFS_DELEGATION_FLAG_TIME);
+}
+
+static inline int nfs_have_delegated_mtime(struct inode *inode)
+{
+ return NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE,
+ NFS_DELEGATION_FLAG_TIME);
}
#endif
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index ac505671efbd..2b04038b0e40 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -56,6 +56,8 @@ static int nfs_readdir(struct file *, struct dir_context *);
static int nfs_fsync_dir(struct file *, loff_t, loff_t, int);
static loff_t nfs_llseek_dir(struct file *, loff_t, int);
static void nfs_readdir_clear_array(struct folio *);
+static int nfs_do_create(struct inode *dir, struct dentry *dentry,
+ umode_t mode, int open_flags);
const struct file_operations nfs_dir_operations = {
.llseek = nfs_llseek_dir,
@@ -149,7 +151,7 @@ struct nfs_cache_array {
unsigned char folio_full : 1,
folio_is_eof : 1,
cookies_are_ordered : 1;
- struct nfs_cache_array_entry array[];
+ struct nfs_cache_array_entry array[] __counted_by(size);
};
struct nfs_readdir_descriptor {
@@ -326,7 +328,8 @@ static int nfs_readdir_folio_array_append(struct folio *folio,
goto out;
}
- cache_entry = &array->array[array->size];
+ array->size++;
+ cache_entry = &array->array[array->size - 1];
cache_entry->cookie = array->last_cookie;
cache_entry->ino = entry->ino;
cache_entry->d_type = entry->d_type;
@@ -335,7 +338,6 @@ static int nfs_readdir_folio_array_append(struct folio *folio,
array->last_cookie = entry->cookie;
if (array->last_cookie <= cache_entry->cookie)
array->cookies_are_ordered = 0;
- array->size++;
if (entry->eof != 0)
nfs_readdir_array_set_eof(array);
out:
@@ -1435,7 +1437,7 @@ static void nfs_set_verifier_locked(struct dentry *dentry, unsigned long verf)
if (!dir || !nfs_verify_change_attribute(dir, verf))
return;
- if (inode && NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
+ if (inode && NFS_PROTO(inode)->have_delegation(inode, FMODE_READ, 0))
nfs_set_verifier_delegated(&verf);
dentry->d_time = verf;
}
@@ -1625,7 +1627,16 @@ nfs_lookup_revalidate_done(struct inode *dir, struct dentry *dentry,
switch (error) {
case 1:
break;
- case 0:
+ case -ETIMEDOUT:
+ if (inode && (IS_ROOT(dentry) ||
+ NFS_SERVER(inode)->flags & NFS_MOUNT_SOFTREVAL))
+ error = 1;
+ break;
+ case -ESTALE:
+ case -ENOENT:
+ error = 0;
+ fallthrough;
+ default:
/*
* We can't d_drop the root of a disconnected tree:
* its d_hash is on the s_anon list and d_drop() would hide
@@ -1661,7 +1672,7 @@ nfs_lookup_revalidate_delegated(struct inode *dir, struct dentry *dentry,
return nfs_lookup_revalidate_done(dir, dentry, inode, 1);
}
-static int nfs_lookup_revalidate_dentry(struct inode *dir,
+static int nfs_lookup_revalidate_dentry(struct inode *dir, const struct qstr *name,
struct dentry *dentry,
struct inode *inode, unsigned int flags)
{
@@ -1679,19 +1690,9 @@ static int nfs_lookup_revalidate_dentry(struct inode *dir,
goto out;
dir_verifier = nfs_save_change_attribute(dir);
- ret = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr);
- if (ret < 0) {
- switch (ret) {
- case -ESTALE:
- case -ENOENT:
- ret = 0;
- break;
- case -ETIMEDOUT:
- if (NFS_SERVER(inode)->flags & NFS_MOUNT_SOFTREVAL)
- ret = 1;
- }
+ ret = NFS_PROTO(dir)->lookup(dir, dentry, name, fhandle, fattr);
+ if (ret < 0)
goto out;
- }
/* Request help from readdirplus */
nfs_lookup_advise_force_readdirplus(dir, flags);
@@ -1731,11 +1732,11 @@ out:
* cached dentry and do a new lookup.
*/
static int
-nfs_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
- unsigned int flags)
+nfs_do_lookup_revalidate(struct inode *dir, const struct qstr *name,
+ struct dentry *dentry, unsigned int flags)
{
struct inode *inode;
- int error;
+ int error = 0;
nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
inode = d_inode(dentry);
@@ -1774,47 +1775,59 @@ nfs_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
if (NFS_STALE(inode))
goto out_bad;
- return nfs_lookup_revalidate_dentry(dir, dentry, inode, flags);
+ return nfs_lookup_revalidate_dentry(dir, name, dentry, inode, flags);
out_valid:
return nfs_lookup_revalidate_done(dir, dentry, inode, 1);
out_bad:
if (flags & LOOKUP_RCU)
return -ECHILD;
- return nfs_lookup_revalidate_done(dir, dentry, inode, 0);
+ return nfs_lookup_revalidate_done(dir, dentry, inode, error);
}
static int
-__nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags,
- int (*reval)(struct inode *, struct dentry *, unsigned int))
+__nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
{
- struct dentry *parent;
- struct inode *dir;
- int ret;
-
if (flags & LOOKUP_RCU) {
if (dentry->d_fsdata == NFS_FSDATA_BLOCKED)
return -ECHILD;
- parent = READ_ONCE(dentry->d_parent);
- dir = d_inode_rcu(parent);
- if (!dir)
- return -ECHILD;
- ret = reval(dir, dentry, flags);
- if (parent != READ_ONCE(dentry->d_parent))
- return -ECHILD;
} else {
- /* Wait for unlink to complete */
+ /* Wait for unlink to complete - see unblock_revalidate() */
wait_var_event(&dentry->d_fsdata,
- dentry->d_fsdata != NFS_FSDATA_BLOCKED);
- parent = dget_parent(dentry);
- ret = reval(d_inode(parent), dentry, flags);
- dput(parent);
+ smp_load_acquire(&dentry->d_fsdata)
+ != NFS_FSDATA_BLOCKED);
}
- return ret;
+ return 0;
+}
+
+static int nfs_lookup_revalidate(struct inode *dir, const struct qstr *name,
+ struct dentry *dentry, unsigned int flags)
+{
+ if (__nfs_lookup_revalidate(dentry, flags))
+ return -ECHILD;
+ return nfs_do_lookup_revalidate(dir, name, dentry, flags);
+}
+
+static void block_revalidate(struct dentry *dentry)
+{
+ /* old devname - just in case */
+ kfree(dentry->d_fsdata);
+
+ /* Any new reference that could lead to an open
+ * will take ->d_lock in lookup_open() -> d_lookup().
+ * Holding this lock ensures we cannot race with
+ * __nfs_lookup_revalidate() and removes and need
+ * for further barriers.
+ */
+ lockdep_assert_held(&dentry->d_lock);
+
+ dentry->d_fsdata = NFS_FSDATA_BLOCKED;
}
-static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
+static void unblock_revalidate(struct dentry *dentry)
{
- return __nfs_lookup_revalidate(dentry, flags, nfs_do_lookup_revalidate);
+ /* store_release ensures wait_var_event() sees the update */
+ smp_store_release(&dentry->d_fsdata, NULL);
+ wake_up_var(&dentry->d_fsdata);
}
/*
@@ -1957,7 +1970,8 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in
dir_verifier = nfs_save_change_attribute(dir);
trace_nfs_lookup_enter(dir, dentry, flags);
- error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr);
+ error = NFS_PROTO(dir)->lookup(dir, dentry, &dentry->d_name,
+ fhandle, fattr);
if (error == -ENOENT) {
if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE))
dir_verifier = inode_peek_iversion_raw(dir);
@@ -2000,7 +2014,8 @@ void nfs_d_prune_case_insensitive_aliases(struct inode *inode)
EXPORT_SYMBOL_GPL(nfs_d_prune_case_insensitive_aliases);
#if IS_ENABLED(CONFIG_NFS_V4)
-static int nfs4_lookup_revalidate(struct dentry *, unsigned int);
+static int nfs4_lookup_revalidate(struct inode *, const struct qstr *,
+ struct dentry *, unsigned int);
const struct dentry_operations nfs4_dentry_operations = {
.d_revalidate = nfs4_lookup_revalidate,
@@ -2189,11 +2204,14 @@ no_open:
EXPORT_SYMBOL_GPL(nfs_atomic_open);
static int
-nfs4_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
- unsigned int flags)
+nfs4_lookup_revalidate(struct inode *dir, const struct qstr *name,
+ struct dentry *dentry, unsigned int flags)
{
struct inode *inode;
+ if (__nfs_lookup_revalidate(dentry, flags))
+ return -ECHILD;
+
trace_nfs_lookup_revalidate_enter(dir, dentry, flags);
if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY))
@@ -2229,19 +2247,51 @@ nfs4_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
reval_dentry:
if (flags & LOOKUP_RCU)
return -ECHILD;
- return nfs_lookup_revalidate_dentry(dir, dentry, inode, flags);
+ return nfs_lookup_revalidate_dentry(dir, name, dentry, inode, flags);
full_reval:
- return nfs_do_lookup_revalidate(dir, dentry, flags);
+ return nfs_do_lookup_revalidate(dir, name, dentry, flags);
}
-static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
+#endif /* CONFIG_NFSV4 */
+
+int nfs_atomic_open_v23(struct inode *dir, struct dentry *dentry,
+ struct file *file, unsigned int open_flags,
+ umode_t mode)
{
- return __nfs_lookup_revalidate(dentry, flags,
- nfs4_do_lookup_revalidate);
-}
-#endif /* CONFIG_NFSV4 */
+ /* Same as look+open from lookup_open(), but with different O_TRUNC
+ * handling.
+ */
+ int error = 0;
+
+ if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
+ return -ENAMETOOLONG;
+
+ if (open_flags & O_CREAT) {
+ file->f_mode |= FMODE_CREATED;
+ error = nfs_do_create(dir, dentry, mode, open_flags);
+ if (error)
+ return error;
+ return finish_open(file, dentry, NULL);
+ } else if (d_in_lookup(dentry)) {
+ /* The only flags nfs_lookup considers are
+ * LOOKUP_EXCL and LOOKUP_RENAME_TARGET, and
+ * we want those to be zero so the lookup isn't skipped.
+ */
+ struct dentry *res = nfs_lookup(dir, dentry, 0);
+
+ d_lookup_done(dentry);
+ if (unlikely(res)) {
+ if (IS_ERR(res))
+ return PTR_ERR(res);
+ return finish_no_open(file, res);
+ }
+ }
+ return finish_no_open(file, NULL);
+
+}
+EXPORT_SYMBOL_GPL(nfs_atomic_open_v23);
struct dentry *
nfs_add_or_obtain(struct dentry *dentry, struct nfs_fh *fhandle,
@@ -2256,7 +2306,8 @@ nfs_add_or_obtain(struct dentry *dentry, struct nfs_fh *fhandle,
d_drop(dentry);
if (fhandle->size == 0) {
- error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr);
+ error = NFS_PROTO(dir)->lookup(dir, dentry, &dentry->d_name,
+ fhandle, fattr);
if (error)
goto out_error;
}
@@ -2303,18 +2354,23 @@ EXPORT_SYMBOL_GPL(nfs_instantiate);
* that the operation succeeded on the server, but an error in the
* reply path made it appear to have failed.
*/
-int nfs_create(struct mnt_idmap *idmap, struct inode *dir,
- struct dentry *dentry, umode_t mode, bool excl)
+static int nfs_do_create(struct inode *dir, struct dentry *dentry,
+ umode_t mode, int open_flags)
{
struct iattr attr;
- int open_flags = excl ? O_CREAT | O_EXCL : O_CREAT;
int error;
+ open_flags |= O_CREAT;
+
dfprintk(VFS, "NFS: create(%s/%lu), %pd\n",
dir->i_sb->s_id, dir->i_ino, dentry);
attr.ia_mode = mode;
attr.ia_valid = ATTR_MODE;
+ if (open_flags & O_TRUNC) {
+ attr.ia_size = 0;
+ attr.ia_valid |= ATTR_SIZE;
+ }
trace_nfs_create_enter(dir, dentry, open_flags);
error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags);
@@ -2326,6 +2382,12 @@ out_err:
d_drop(dentry);
return error;
}
+
+int nfs_create(struct mnt_idmap *idmap, struct inode *dir,
+ struct dentry *dentry, umode_t mode, bool excl)
+{
+ return nfs_do_create(dir, dentry, mode, excl ? O_EXCL : 0);
+}
EXPORT_SYMBOL_GPL(nfs_create);
/*
@@ -2501,15 +2563,12 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry)
spin_unlock(&dentry->d_lock);
goto out;
}
- /* old devname */
- kfree(dentry->d_fsdata);
- dentry->d_fsdata = NFS_FSDATA_BLOCKED;
+ block_revalidate(dentry);
spin_unlock(&dentry->d_lock);
error = nfs_safe_remove(dentry);
nfs_dentry_remove_handle_error(dir, dentry, error);
- dentry->d_fsdata = NULL;
- wake_up_var(&dentry->d_fsdata);
+ unblock_revalidate(dentry);
out:
trace_nfs_unlink_exit(dir, dentry, error);
return error;
@@ -2616,8 +2675,7 @@ nfs_unblock_rename(struct rpc_task *task, struct nfs_renamedata *data)
{
struct dentry *new_dentry = data->new_dentry;
- new_dentry->d_fsdata = NULL;
- wake_up_var(&new_dentry->d_fsdata);
+ unblock_revalidate(new_dentry);
}
/*
@@ -2679,11 +2737,6 @@ int nfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
if (WARN_ON(new_dentry->d_flags & DCACHE_NFSFS_RENAMED) ||
WARN_ON(new_dentry->d_fsdata == NFS_FSDATA_BLOCKED))
goto out;
- if (new_dentry->d_fsdata) {
- /* old devname */
- kfree(new_dentry->d_fsdata);
- new_dentry->d_fsdata = NULL;
- }
spin_lock(&new_dentry->d_lock);
if (d_count(new_dentry) > 2) {
@@ -2705,7 +2758,7 @@ int nfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
new_dentry = dentry;
new_inode = NULL;
} else {
- new_dentry->d_fsdata = NFS_FSDATA_BLOCKED;
+ block_revalidate(new_dentry);
must_unblock = true;
spin_unlock(&new_dentry->d_lock);
}
@@ -2717,6 +2770,8 @@ int nfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry,
must_unblock ? nfs_unblock_rename : NULL);
if (IS_ERR(task)) {
+ if (must_unblock)
+ unblock_revalidate(new_dentry);
error = PTR_ERR(task);
goto out;
}
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index c03926a1cc73..f32f8d7c9122 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -56,6 +56,7 @@
#include <linux/uaccess.h>
#include <linux/atomic.h>
+#include "delegation.h"
#include "internal.h"
#include "iostat.h"
#include "pnfs.h"
@@ -130,6 +131,20 @@ static void nfs_direct_truncate_request(struct nfs_direct_req *dreq,
dreq->count = req_start;
}
+static void nfs_direct_file_adjust_size_locked(struct inode *inode,
+ loff_t offset, size_t count)
+{
+ loff_t newsize = offset + (loff_t)count;
+ loff_t oldsize = i_size_read(inode);
+
+ if (newsize > oldsize) {
+ i_size_write(inode, newsize);
+ NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_SIZE;
+ trace_nfs_size_grow(inode, newsize);
+ nfs_inc_stats(inode, NFSIOS_EXTENDWRITE);
+ }
+}
+
/**
* nfs_swap_rw - NFS address space operation for swap I/O
* @iocb: target I/O control block
@@ -141,8 +156,6 @@ int nfs_swap_rw(struct kiocb *iocb, struct iov_iter *iter)
{
ssize_t ret;
- VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE);
-
if (iov_iter_rw(iter) == READ)
ret = nfs_file_direct_read(iocb, iter, true);
else
@@ -274,6 +287,8 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
nfs_direct_count_bytes(dreq, hdr);
spin_unlock(&dreq->lock);
+ nfs_update_delegated_atime(dreq->inode);
+
while (!list_empty(&hdr->pages)) {
struct nfs_page *req = nfs_list_entry(hdr->pages.next);
struct page *page = req->wb_page;
@@ -305,6 +320,7 @@ static void nfs_read_sync_pgio_error(struct list_head *head, int error)
static void nfs_direct_pgio_init(struct nfs_pgio_header *hdr)
{
get_dreq(hdr->dreq);
+ set_bit(NFS_IOHDR_ODIRECT, &hdr->flags);
}
static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = {
@@ -456,8 +472,16 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
if (user_backed_iter(iter))
dreq->flags = NFS_ODIRECT_SHOULD_DIRTY;
- if (!swap)
- nfs_start_io_direct(inode);
+ if (!swap) {
+ result = nfs_start_io_direct(inode);
+ if (result) {
+ /* release the reference that would usually be
+ * consumed by nfs_direct_read_schedule_iovec()
+ */
+ nfs_direct_req_release(dreq);
+ goto out_release;
+ }
+ }
NFS_I(inode)->read_io += count;
requested = nfs_direct_read_schedule_iovec(dreq, iter, iocb->ki_pos);
@@ -606,6 +630,7 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)
trace_nfs_direct_commit_complete(dreq);
+ spin_lock(&dreq->lock);
if (status < 0) {
/* Errors in commit are fatal */
dreq->error = status;
@@ -613,6 +638,7 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)
} else {
status = dreq->error;
}
+ spin_unlock(&dreq->lock);
nfs_init_cinfo_from_dreq(&cinfo, dreq);
@@ -625,7 +651,10 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)
spin_unlock(&dreq->lock);
nfs_release_request(req);
} else if (!nfs_write_match_verf(verf, req)) {
- dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+ spin_lock(&dreq->lock);
+ if (dreq->flags == 0)
+ dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+ spin_unlock(&dreq->lock);
/*
* Despite the reboot, the write was successful,
* so reset wb_nio.
@@ -667,10 +696,17 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
LIST_HEAD(mds_list);
nfs_init_cinfo_from_dreq(&cinfo, dreq);
+ nfs_commit_begin(cinfo.mds);
nfs_scan_commit(dreq->inode, &mds_list, &cinfo);
res = nfs_generic_commit_list(dreq->inode, &mds_list, 0, &cinfo);
- if (res < 0) /* res == -ENOMEM */
- nfs_direct_write_reschedule(dreq);
+ if (res < 0) { /* res == -ENOMEM */
+ spin_lock(&dreq->lock);
+ if (dreq->flags == 0)
+ dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+ spin_unlock(&dreq->lock);
+ }
+ if (nfs_commit_end(cinfo.mds))
+ nfs_direct_write_complete(dreq);
}
static void nfs_direct_write_clear_reqs(struct nfs_direct_req *dreq)
@@ -722,6 +758,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
struct nfs_direct_req *dreq = hdr->dreq;
struct nfs_commit_info cinfo;
struct nfs_page *req = nfs_list_entry(hdr->pages.next);
+ struct inode *inode = dreq->inode;
int flags = NFS_ODIRECT_DONE;
trace_nfs_direct_write_completion(dreq);
@@ -743,6 +780,11 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
}
spin_unlock(&dreq->lock);
+ spin_lock(&inode->i_lock);
+ nfs_direct_file_adjust_size_locked(inode, dreq->io_start, dreq->count);
+ nfs_update_delegated_mtime_locked(dreq->inode);
+ spin_unlock(&inode->i_lock);
+
while (!list_empty(&hdr->pages)) {
req = nfs_list_entry(hdr->pages.next);
@@ -997,7 +1039,14 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
requested = nfs_direct_write_schedule_iovec(dreq, iter, pos,
FLUSH_STABLE);
} else {
- nfs_start_io_direct(inode);
+ result = nfs_start_io_direct(inode);
+ if (result) {
+ /* release the reference that would usually be
+ * consumed by nfs_direct_write_schedule_iovec()
+ */
+ nfs_direct_req_release(dreq);
+ goto out_release;
+ }
requested = nfs_direct_write_schedule_iovec(dreq, iter, pos,
FLUSH_COND_STABLE);
@@ -1037,8 +1086,7 @@ int __init nfs_init_directcache(void)
{
nfs_direct_cachep = kmem_cache_create("nfs_direct_cache",
sizeof(struct nfs_direct_req),
- 0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD),
+ 0, SLAB_RECLAIM_ACCOUNT,
NULL);
if (nfs_direct_cachep == NULL)
return -ENOMEM;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 8577ccf621f5..033feeab8c34 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -29,6 +29,7 @@
#include <linux/pagemap.h>
#include <linux/gfp.h>
#include <linux/swap.h>
+#include <linux/compaction.h>
#include <linux/uaccess.h>
#include <linux/filelock.h>
@@ -166,7 +167,10 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
iocb->ki_filp,
iov_iter_count(to), (unsigned long) iocb->ki_pos);
- nfs_start_io_read(inode);
+ result = nfs_start_io_read(inode);
+ if (result)
+ return result;
+
result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
if (!result) {
result = generic_file_read_iter(iocb, to);
@@ -187,7 +191,10 @@ nfs_file_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe
dprintk("NFS: splice_read(%pD2, %zu@%llu)\n", in, len, *ppos);
- nfs_start_io_read(inode);
+ result = nfs_start_io_read(inode);
+ if (result)
+ return result;
+
result = nfs_revalidate_mapping(inode, in->f_mapping);
if (!result) {
result = filemap_splice_read(in, ppos, pipe, len, flags);
@@ -336,9 +343,10 @@ static bool nfs_want_read_modify_write(struct file *file, struct folio *folio,
* increment the page use counts until he is done with the page.
*/
static int nfs_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, struct page **pagep,
+ loff_t pos, unsigned len, struct folio **foliop,
void **fsdata)
{
+ fgf_t fgp = FGP_WRITEBEGIN;
struct folio *folio;
int once_thru = 0;
int ret;
@@ -346,12 +354,13 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
dfprintk(PAGECACHE, "NFS: write_begin(%pD2(%lu), %u@%lld)\n",
file, mapping->host->i_ino, len, (long long) pos);
+ fgp |= fgf_set_order(len);
start:
- folio = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, FGP_WRITEBEGIN,
+ folio = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, fgp,
mapping_gfp_mask(mapping));
if (IS_ERR(folio))
return PTR_ERR(folio);
- *pagep = &folio->page;
+ *foliop = folio;
ret = nfs_flush_incompatible(file, folio);
if (ret) {
@@ -370,10 +379,9 @@ start:
static int nfs_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+ struct folio *folio, void *fsdata)
{
struct nfs_open_context *ctx = nfs_file_open_context(file);
- struct folio *folio = page_folio(page);
unsigned offset = offset_in_folio(folio, pos);
int status;
@@ -425,7 +433,7 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
static void nfs_invalidate_folio(struct folio *folio, size_t offset,
size_t length)
{
- struct inode *inode = folio_file_mapping(folio)->host;
+ struct inode *inode = folio->mapping->host;
dfprintk(PAGECACHE, "NFS: invalidate_folio(%lu, %zu, %zu)\n",
folio->index, offset, length);
@@ -433,8 +441,8 @@ static void nfs_invalidate_folio(struct folio *folio, size_t offset,
return;
/* Cancel any unstarted writes on this page */
nfs_wb_folio_cancel(inode, folio);
- folio_wait_fscache(folio);
- trace_nfs_invalidate_folio(inode, folio);
+ folio_wait_private_2(folio); /* [DEPRECATED] */
+ trace_nfs_invalidate_folio(inode, folio_pos(folio) + offset, length);
}
/*
@@ -450,9 +458,9 @@ static bool nfs_release_folio(struct folio *folio, gfp_t gfp)
/* If the private flag is set, then the folio is not freeable */
if (folio_test_private(folio)) {
if ((current_gfp_context(gfp) & GFP_KERNEL) != GFP_KERNEL ||
- current_is_kswapd())
+ current_is_kswapd() || current_is_kcompactd())
return false;
- if (nfs_wb_folio(folio_file_mapping(folio)->host, folio) < 0)
+ if (nfs_wb_folio(folio->mapping->host, folio) < 0)
return false;
}
return nfs_fscache_release_folio(folio, gfp);
@@ -500,9 +508,10 @@ static int nfs_launder_folio(struct folio *folio)
dfprintk(PAGECACHE, "NFS: launder_folio(%ld, %llu)\n",
inode->i_ino, folio_pos(folio));
- folio_wait_fscache(folio);
+ folio_wait_private_2(folio); /* [DEPRECATED] */
ret = nfs_wb_folio(inode, folio);
- trace_nfs_launder_folio_done(inode, folio, ret);
+ trace_nfs_launder_folio_done(inode, folio_pos(folio),
+ folio_size(folio), ret);
return ret;
}
@@ -588,13 +597,13 @@ static vm_fault_t nfs_vm_page_mkwrite(struct vm_fault *vmf)
dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%pD2(%lu), offset %lld)\n",
filp, filp->f_mapping->host->i_ino,
- (long long)folio_file_pos(folio));
+ (long long)folio_pos(folio));
sb_start_pagefault(inode->i_sb);
/* make sure the cache has finished storing the page */
- if (folio_test_fscache(folio) &&
- folio_wait_fscache_killable(folio) < 0) {
+ if (folio_test_private_2(folio) && /* [DEPRECATED] */
+ folio_wait_private_2_killable(folio) < 0) {
ret = VM_FAULT_RETRY;
goto out;
}
@@ -604,7 +613,7 @@ static vm_fault_t nfs_vm_page_mkwrite(struct vm_fault *vmf)
TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
folio_lock(folio);
- mapping = folio_file_mapping(folio);
+ mapping = folio->mapping;
if (mapping != inode->i_mapping)
goto out_unlock;
@@ -666,7 +675,9 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
nfs_clear_invalid_mapping(file->f_mapping);
since = filemap_sample_wb_err(file->f_mapping);
- nfs_start_io_write(inode);
+ error = nfs_start_io_write(inode);
+ if (error)
+ return error;
result = generic_write_checks(iocb, from);
if (result > 0)
result = generic_perform_write(iocb, from);
@@ -720,17 +731,17 @@ do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
{
struct inode *inode = filp->f_mapping->host;
int status = 0;
- unsigned int saved_type = fl->fl_type;
+ unsigned int saved_type = fl->c.flc_type;
/* Try local locking first */
posix_test_lock(filp, fl);
- if (fl->fl_type != F_UNLCK) {
+ if (fl->c.flc_type != F_UNLCK) {
/* found a conflict */
goto out;
}
- fl->fl_type = saved_type;
+ fl->c.flc_type = saved_type;
- if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
+ if (nfs_have_read_or_write_delegation(inode))
goto out_noconflict;
if (is_local)
@@ -740,7 +751,7 @@ do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
out:
return status;
out_noconflict:
- fl->fl_type = F_UNLCK;
+ fl->c.flc_type = F_UNLCK;
goto out;
}
@@ -765,7 +776,7 @@ do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
* If we're signalled while cleaning up locks on process exit, we
* still need to complete the unlock.
*/
- if (status < 0 && !(fl->fl_flags & FL_CLOSE))
+ if (status < 0 && !(fl->c.flc_flags & FL_CLOSE))
return status;
}
@@ -813,7 +824,7 @@ do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
* This makes locking act as a cache coherency point.
*/
nfs_sync_mapping(filp->f_mapping);
- if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) {
+ if (!nfs_have_read_or_write_delegation(inode)) {
nfs_zap_caches(inode);
if (mapping_mapped(filp->f_mapping))
nfs_revalidate_mapping(inode, filp->f_mapping);
@@ -832,12 +843,12 @@ int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
int is_local = 0;
dprintk("NFS: lock(%pD2, t=%x, fl=%x, r=%lld:%lld)\n",
- filp, fl->fl_type, fl->fl_flags,
+ filp, fl->c.flc_type, fl->c.flc_flags,
(long long)fl->fl_start, (long long)fl->fl_end);
nfs_inc_stats(inode, NFSIOS_VFSLOCK);
- if (fl->fl_flags & FL_RECLAIM)
+ if (fl->c.flc_flags & FL_RECLAIM)
return -ENOGRACE;
if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FCNTL)
@@ -851,7 +862,7 @@ int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
if (IS_GETLK(cmd))
ret = do_getlk(filp, cmd, fl, is_local);
- else if (fl->fl_type == F_UNLCK)
+ else if (lock_is_unlock(fl))
ret = do_unlk(filp, cmd, fl, is_local);
else
ret = do_setlk(filp, cmd, fl, is_local);
@@ -869,16 +880,16 @@ int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
int is_local = 0;
dprintk("NFS: flock(%pD2, t=%x, fl=%x)\n",
- filp, fl->fl_type, fl->fl_flags);
+ filp, fl->c.flc_type, fl->c.flc_flags);
- if (!(fl->fl_flags & FL_FLOCK))
+ if (!(fl->c.flc_flags & FL_FLOCK))
return -ENOLCK;
if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FLOCK)
is_local = 1;
/* We're simulating flock() locks using posix locks on the server */
- if (fl->fl_type == F_UNLCK)
+ if (lock_is_unlock(fl))
return do_unlk(filp, cmd, fl, is_local);
return do_setlk(filp, cmd, fl, is_local);
}
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index ce8f8934bca5..d39a1f58e18d 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -488,7 +488,7 @@ filelayout_read_pagelist(struct nfs_pgio_header *hdr)
/* Perform an asynchronous read to ds */
nfs_initiate_pgio(ds_clnt, hdr, hdr->cred,
NFS_PROTO(hdr->inode), &filelayout_read_call_ops,
- 0, RPC_TASK_SOFTCONN);
+ 0, RPC_TASK_SOFTCONN, NULL);
return PNFS_ATTEMPTED;
}
@@ -530,7 +530,7 @@ filelayout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
/* Perform an asynchronous write */
nfs_initiate_pgio(ds_clnt, hdr, hdr->cred,
NFS_PROTO(hdr->inode), &filelayout_write_call_ops,
- sync, RPC_TASK_SOFTCONN);
+ sync, RPC_TASK_SOFTCONN, NULL);
return PNFS_ATTEMPTED;
}
@@ -605,14 +605,6 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
dprintk("--> %s\n", __func__);
- /* FIXME: remove this check when layout segment support is added */
- if (lgr->range.offset != 0 ||
- lgr->range.length != NFS4_MAX_UINT64) {
- dprintk("%s Only whole file layouts supported. Use MDS i/o\n",
- __func__);
- goto out;
- }
-
if (fl->pattern_offset > lgr->range.offset) {
dprintk("%s pattern_offset %lld too large\n",
__func__, fl->pattern_offset);
@@ -875,15 +867,15 @@ static void
filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req)
{
- pnfs_generic_pg_check_layout(pgio);
+ pnfs_generic_pg_check_layout(pgio, req);
if (!pgio->pg_lseg) {
pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode,
nfs_req_openctx(req),
- 0,
- NFS4_MAX_UINT64,
+ req_offset(req),
+ req->wb_bytes,
IOMODE_READ,
false,
- GFP_KERNEL);
+ nfs_io_gfp_mask());
if (IS_ERR(pgio->pg_lseg)) {
pgio->pg_error = PTR_ERR(pgio->pg_lseg);
pgio->pg_lseg = NULL;
@@ -899,15 +891,15 @@ static void
filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req)
{
- pnfs_generic_pg_check_layout(pgio);
+ pnfs_generic_pg_check_layout(pgio, req);
if (!pgio->pg_lseg) {
pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode,
nfs_req_openctx(req),
- 0,
- NFS4_MAX_UINT64,
+ req_offset(req),
+ req->wb_bytes,
IOMODE_RW,
false,
- GFP_NOFS);
+ nfs_io_gfp_mask());
if (IS_ERR(pgio->pg_lseg)) {
pgio->pg_error = PTR_ERR(pgio->pg_lseg);
pgio->pg_lseg = NULL;
@@ -1019,7 +1011,7 @@ static int filelayout_initiate_commit(struct nfs_commit_data *data, int how)
data->args.fh = fh;
return nfs_initiate_commit(ds_clnt, data, NFS_PROTO(data->inode),
&filelayout_commit_call_ops, how,
- RPC_TASK_SOFTCONN);
+ RPC_TASK_SOFTCONN, NULL);
out_err:
pnfs_generic_prepare_to_resend_writes(data);
pnfs_generic_commit_release(data);
@@ -1118,7 +1110,6 @@ static const struct pnfs_commit_ops filelayout_commit_ops = {
.clear_request_commit = pnfs_generic_clear_request_commit,
.scan_commit_lists = pnfs_generic_scan_commit_lists,
.recover_commit_reqs = pnfs_generic_recover_commit_reqs,
- .search_commit_reqs = pnfs_generic_search_commit_reqs,
.commit_pagelist = filelayout_commit_pagelist,
};
diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c
index acf4b88889dc..4fa304fa5bc4 100644
--- a/fs/nfs/filelayout/filelayoutdev.c
+++ b/fs/nfs/filelayout/filelayoutdev.c
@@ -35,6 +35,7 @@
#include "../internal.h"
#include "../nfs4session.h"
#include "filelayout.h"
+#include "../nfs4trace.h"
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
@@ -172,6 +173,7 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
dsaddr->ds_list[i] = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags);
if (!dsaddr->ds_list[i])
goto out_err_drain_dsaddrs;
+ trace_fl_getdevinfo(server, &pdev->dev_id, dsaddr->ds_list[i]->ds_remotestr);
/* If DS was already in cache, free ds addrs */
while (!list_empty(&dsaddrs)) {
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index ef817a0475ff..98b45b636be3 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -11,6 +11,7 @@
#include <linux/nfs_mount.h>
#include <linux/nfs_page.h>
#include <linux/module.h>
+#include <linux/file.h>
#include <linux/sched/mm.h>
#include <linux/sunrpc/metrics.h>
@@ -162,6 +163,20 @@ decode_name(struct xdr_stream *xdr, u32 *id)
return 0;
}
+static struct nfsd_file *
+ff_local_open_fh(struct pnfs_layout_segment *lseg, u32 ds_idx,
+ struct nfs_client *clp, const struct cred *cred,
+ struct nfs_fh *fh, fmode_t mode)
+{
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+ struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx);
+
+ return nfs_local_open_fh(clp, cred, fh, &mirror->nfl, mode);
+#else
+ return NULL;
+#endif
+}
+
static bool ff_mirror_match_fh(const struct nfs4_ff_layout_mirror *m1,
const struct nfs4_ff_layout_mirror *m2)
{
@@ -231,16 +246,18 @@ static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags)
spin_lock_init(&mirror->lock);
refcount_set(&mirror->ref, 1);
INIT_LIST_HEAD(&mirror->mirrors);
+ nfs_localio_file_init(&mirror->nfl);
}
return mirror;
}
static void ff_layout_free_mirror(struct nfs4_ff_layout_mirror *mirror)
{
- const struct cred *cred;
+ const struct cred *cred;
ff_layout_remove_mirror(mirror);
kfree(mirror->fh_versions);
+ nfs_close_local_fh(&mirror->nfl);
cred = rcu_access_pointer(mirror->ro_cred);
put_cred(cred);
cred = rcu_access_pointer(mirror->rw_cred);
@@ -823,14 +840,6 @@ ff_layout_pg_get_read(struct nfs_pageio_descriptor *pgio,
}
static void
-ff_layout_pg_check_layout(struct nfs_pageio_descriptor *pgio,
- struct nfs_page *req)
-{
- pnfs_generic_pg_check_layout(pgio);
- pnfs_generic_pg_check_range(pgio, req);
-}
-
-static void
ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req)
{
@@ -839,8 +848,11 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
struct nfs4_pnfs_ds *ds;
u32 ds_idx;
+ if (NFS_SERVER(pgio->pg_inode)->flags &
+ (NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR))
+ pgio->pg_maxretrans = io_maxretrans;
retry:
- ff_layout_pg_check_layout(pgio, req);
+ pnfs_generic_pg_check_layout(pgio, req);
/* Use full layout for now */
if (!pgio->pg_lseg) {
ff_layout_pg_get_read(pgio, req, false);
@@ -852,6 +864,8 @@ retry:
if (!pgio->pg_lseg)
goto out_nolseg;
}
+ /* Reset wb_nio, since getting layout segment was successful */
+ req->wb_nio = 0;
ds = ff_layout_get_ds_for_read(pgio, &ds_idx);
if (!ds) {
@@ -868,14 +882,24 @@ retry:
pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize;
pgio->pg_mirror_idx = ds_idx;
-
- if (NFS_SERVER(pgio->pg_inode)->flags &
- (NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR))
- pgio->pg_maxretrans = io_maxretrans;
return;
out_nolseg:
- if (pgio->pg_error < 0)
- return;
+ if (pgio->pg_error < 0) {
+ if (pgio->pg_error != -EAGAIN)
+ return;
+ /* Retry getting layout segment if lower layer returned -EAGAIN */
+ if (pgio->pg_maxretrans && req->wb_nio++ > pgio->pg_maxretrans) {
+ if (NFS_SERVER(pgio->pg_inode)->flags & NFS_MOUNT_SOFTERR)
+ pgio->pg_error = -ETIMEDOUT;
+ else
+ pgio->pg_error = -EIO;
+ return;
+ }
+ pgio->pg_error = 0;
+ /* Sleep for 1 second before retrying */
+ ssleep(1);
+ goto retry;
+ }
out_mds:
trace_pnfs_mds_fallback_pg_init_read(pgio->pg_inode,
0, NFS4_MAX_UINT64, IOMODE_READ,
@@ -895,7 +919,7 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio,
u32 i;
retry:
- ff_layout_pg_check_layout(pgio, req);
+ pnfs_generic_pg_check_layout(pgio, req);
if (!pgio->pg_lseg) {
pgio->pg_lseg =
pnfs_update_layout(pgio->pg_inode, nfs_req_openctx(req),
@@ -1764,6 +1788,7 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
struct pnfs_layout_segment *lseg = hdr->lseg;
struct nfs4_pnfs_ds *ds;
struct rpc_clnt *ds_clnt;
+ struct nfsd_file *localio;
struct nfs4_ff_layout_mirror *mirror;
const struct cred *ds_cred;
loff_t offset = hdr->args.offset;
@@ -1810,11 +1835,18 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
hdr->args.offset = offset;
hdr->mds_offset = offset;
+ /* Start IO accounting for local read */
+ localio = ff_local_open_fh(lseg, idx, ds->ds_clp, ds_cred, fh, FMODE_READ);
+ if (localio) {
+ hdr->task.tk_start = ktime_get();
+ ff_layout_read_record_layoutstats_start(&hdr->task, hdr);
+ }
+
/* Perform an asynchronous read to ds */
nfs_initiate_pgio(ds_clnt, hdr, ds_cred, ds->ds_clp->rpc_ops,
vers == 3 ? &ff_layout_read_call_ops_v3 :
&ff_layout_read_call_ops_v4,
- 0, RPC_TASK_SOFTCONN);
+ 0, RPC_TASK_SOFTCONN, localio);
put_cred(ds_cred);
return PNFS_ATTEMPTED;
@@ -1834,6 +1866,7 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
struct pnfs_layout_segment *lseg = hdr->lseg;
struct nfs4_pnfs_ds *ds;
struct rpc_clnt *ds_clnt;
+ struct nfsd_file *localio;
struct nfs4_ff_layout_mirror *mirror;
const struct cred *ds_cred;
loff_t offset = hdr->args.offset;
@@ -1878,11 +1911,19 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
*/
hdr->args.offset = offset;
+ /* Start IO accounting for local write */
+ localio = ff_local_open_fh(lseg, idx, ds->ds_clp, ds_cred, fh,
+ FMODE_READ|FMODE_WRITE);
+ if (localio) {
+ hdr->task.tk_start = ktime_get();
+ ff_layout_write_record_layoutstats_start(&hdr->task, hdr);
+ }
+
/* Perform an asynchronous write */
nfs_initiate_pgio(ds_clnt, hdr, ds_cred, ds->ds_clp->rpc_ops,
vers == 3 ? &ff_layout_write_call_ops_v3 :
&ff_layout_write_call_ops_v4,
- sync, RPC_TASK_SOFTCONN);
+ sync, RPC_TASK_SOFTCONN, localio);
put_cred(ds_cred);
return PNFS_ATTEMPTED;
@@ -1916,6 +1957,7 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how)
struct pnfs_layout_segment *lseg = data->lseg;
struct nfs4_pnfs_ds *ds;
struct rpc_clnt *ds_clnt;
+ struct nfsd_file *localio;
struct nfs4_ff_layout_mirror *mirror;
const struct cred *ds_cred;
u32 idx;
@@ -1954,10 +1996,18 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how)
if (fh)
data->args.fh = fh;
+ /* Start IO accounting for local commit */
+ localio = ff_local_open_fh(lseg, idx, ds->ds_clp, ds_cred, fh,
+ FMODE_READ|FMODE_WRITE);
+ if (localio) {
+ data->task.tk_start = ktime_get();
+ ff_layout_commit_record_layoutstats_start(&data->task, data);
+ }
+
ret = nfs_initiate_commit(ds_clnt, data, ds->ds_clp->rpc_ops,
vers == 3 ? &ff_layout_commit_call_ops_v3 :
&ff_layout_commit_call_ops_v4,
- how, RPC_TASK_SOFTCONN);
+ how, RPC_TASK_SOFTCONN, localio);
put_cred(ds_cred);
return ret;
out_err:
@@ -2016,7 +2066,7 @@ static void ff_layout_cancel_io(struct pnfs_layout_segment *lseg)
for (idx = 0; idx < flseg->mirror_array_cnt; idx++) {
mirror = flseg->mirror_array[idx];
mirror_ds = mirror->mirror_ds;
- if (!mirror_ds)
+ if (IS_ERR_OR_NULL(mirror_ds))
continue;
ds = mirror->mirror_ds->ds;
if (!ds)
@@ -2095,12 +2145,6 @@ static int ff_layout_encode_ioerr(struct xdr_stream *xdr,
}
static void
-encode_opaque_fixed(struct xdr_stream *xdr, const void *buf, size_t len)
-{
- WARN_ON_ONCE(xdr_stream_encode_opaque_fixed(xdr, buf, len) < 0);
-}
-
-static void
ff_layout_encode_ff_iostat_head(struct xdr_stream *xdr,
const nfs4_stateid *stateid,
const struct nfs42_layoutstat_devinfo *devinfo)
@@ -2556,7 +2600,7 @@ ff_layout_set_layoutdriver(struct nfs_server *server,
const struct nfs_fh *dummy)
{
#if IS_ENABLED(CONFIG_NFS_V4_2)
- server->caps |= NFS_CAP_LAYOUTSTATS;
+ server->caps |= NFS_CAP_LAYOUTSTATS | NFS_CAP_REBOOT_LAYOUTRETURN;
#endif
return 0;
}
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h
index f84b3fb0dddd..095df09017a5 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.h
+++ b/fs/nfs/flexfilelayout/flexfilelayout.h
@@ -83,6 +83,7 @@ struct nfs4_ff_layout_mirror {
nfs4_stateid stateid;
const struct cred __rcu *ro_cred;
const struct cred __rcu *rw_cred;
+ struct nfs_file_localio nfl;
refcount_t ref;
spinlock_t lock;
unsigned long flags;
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index e028f5a0ef5f..e58bedfb1dcc 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -395,6 +395,12 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg,
/* connect success, check rsize/wsize limit */
if (!status) {
+ /*
+ * ds_clp is put in destroy_ds().
+ * keep ds_clp even if DS is local, so that if local IO cannot
+ * proceed somehow, we can fall back to NFS whenever we want.
+ */
+ nfs_local_probe(ds->ds_clp);
max_payload =
nfs_block_size(rpc_max_payload(ds->ds_clp->cl_rpcclient),
NULL);
diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
index 853e8d609bb3..b069385eea17 100644
--- a/fs/nfs/fs_context.c
+++ b/fs/nfs/fs_context.c
@@ -49,6 +49,7 @@ enum nfs_param {
Opt_bsize,
Opt_clientaddr,
Opt_cto,
+ Opt_alignwrite,
Opt_fg,
Opt_fscache,
Opt_fscache_flag,
@@ -149,6 +150,7 @@ static const struct fs_parameter_spec nfs_fs_parameters[] = {
fsparam_u32 ("bsize", Opt_bsize),
fsparam_string("clientaddr", Opt_clientaddr),
fsparam_flag_no("cto", Opt_cto),
+ fsparam_flag_no("alignwrite", Opt_alignwrite),
fsparam_flag ("fg", Opt_fg),
fsparam_flag_no("fsc", Opt_fscache_flag),
fsparam_string("fsc", Opt_fscache),
@@ -592,6 +594,12 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
else
ctx->flags |= NFS_MOUNT_TRUNK_DISCOVERY;
break;
+ case Opt_alignwrite:
+ if (result.negated)
+ ctx->flags |= NFS_MOUNT_NO_ALIGNWRITE;
+ else
+ ctx->flags &= ~NFS_MOUNT_NO_ALIGNWRITE;
+ break;
case Opt_ac:
if (result.negated)
ctx->flags |= NFS_MOUNT_NOAC;
@@ -600,9 +608,11 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
break;
case Opt_lock:
if (result.negated) {
+ ctx->lock_status = NFS_LOCK_NOLOCK;
ctx->flags |= NFS_MOUNT_NONLM;
ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL);
} else {
+ ctx->lock_status = NFS_LOCK_LOCK;
ctx->flags &= ~NFS_MOUNT_NONLM;
ctx->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL);
}
@@ -652,6 +662,7 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
ctx->fscache_uniq = NULL;
break;
case Opt_fscache:
+ trace_nfs_mount_assign(param->key, param->string);
ctx->options |= NFS_OPTION_FSCACHE;
kfree(ctx->fscache_uniq);
ctx->fscache_uniq = param->string;
@@ -1111,9 +1122,12 @@ static int nfs23_parse_monolithic(struct fs_context *fc,
ctx->acdirmax = data->acdirmax;
ctx->need_mount = false;
- memcpy(sap, &data->addr, sizeof(data->addr));
- ctx->nfs_server.addrlen = sizeof(data->addr);
- ctx->nfs_server.port = ntohs(data->addr.sin_port);
+ if (!is_remount_fc(fc)) {
+ memcpy(sap, &data->addr, sizeof(data->addr));
+ ctx->nfs_server.addrlen = sizeof(data->addr);
+ ctx->nfs_server.port = ntohs(data->addr.sin_port);
+ }
+
if (sap->ss_family != AF_INET ||
!nfs_verify_server_address(sap))
goto out_no_address;
@@ -1453,7 +1467,7 @@ static int nfs_fs_context_validate(struct fs_context *fc)
/* Load the NFS protocol module if we haven't done so yet */
if (!ctx->nfs_mod) {
- nfs_mod = get_nfs_version(ctx->version);
+ nfs_mod = find_nfs_version(ctx->version);
if (IS_ERR(nfs_mod)) {
ret = PTR_ERR(nfs_mod);
goto out_version_unavailable;
@@ -1527,7 +1541,7 @@ static int nfs_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc)
}
nfs_copy_fh(ctx->mntfh, src->mntfh);
- __module_get(ctx->nfs_mod->owner);
+ get_nfs_version(ctx->nfs_mod);
ctx->client_address = NULL;
ctx->mount_server.hostname = NULL;
ctx->nfs_server.export_path = NULL;
@@ -1619,7 +1633,7 @@ static int nfs_init_fs_context(struct fs_context *fc)
}
ctx->nfs_mod = nfss->nfs_client->cl_nfs_mod;
- __module_get(ctx->nfs_mod->owner);
+ get_nfs_version(ctx->nfs_mod);
} else {
/* defaults */
ctx->timeo = NFS_UNSPEC_TIMEO;
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index 2d1bfee225c3..e278a1ad1ca3 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -263,15 +263,25 @@ int nfs_netfs_readahead(struct readahead_control *ractl)
static atomic_t nfs_netfs_debug_id;
static int nfs_netfs_init_request(struct netfs_io_request *rreq, struct file *file)
{
+ if (!file) {
+ if (WARN_ON_ONCE(rreq->origin != NETFS_PGPRIV2_COPY_TO_CACHE))
+ return -EIO;
+ return 0;
+ }
+
rreq->netfs_priv = get_nfs_open_context(nfs_file_open_context(file));
rreq->debug_id = atomic_inc_return(&nfs_netfs_debug_id);
+ /* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */
+ __set_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags);
+ rreq->io_streams[0].sreq_max_len = NFS_SB(rreq->inode->i_sb)->rsize;
return 0;
}
static void nfs_netfs_free_request(struct netfs_io_request *rreq)
{
- put_nfs_open_context(rreq->netfs_priv);
+ if (rreq->netfs_priv)
+ put_nfs_open_context(rreq->netfs_priv);
}
static struct nfs_netfs_io_data *nfs_netfs_alloc(struct netfs_io_subrequest *sreq)
@@ -286,14 +296,6 @@ static struct nfs_netfs_io_data *nfs_netfs_alloc(struct netfs_io_subrequest *sre
return netfs;
}
-static bool nfs_netfs_clamp_length(struct netfs_io_subrequest *sreq)
-{
- size_t rsize = NFS_SB(sreq->rreq->inode->i_sb)->rsize;
-
- sreq->len = min(sreq->len, rsize);
- return true;
-}
-
static void nfs_netfs_issue_read(struct netfs_io_subrequest *sreq)
{
struct nfs_netfs_io_data *netfs;
@@ -301,34 +303,32 @@ static void nfs_netfs_issue_read(struct netfs_io_subrequest *sreq)
struct inode *inode = sreq->rreq->inode;
struct nfs_open_context *ctx = sreq->rreq->netfs_priv;
struct page *page;
+ unsigned long idx;
+ pgoff_t start, last;
int err;
- pgoff_t start = (sreq->start + sreq->transferred) >> PAGE_SHIFT;
- pgoff_t last = ((sreq->start + sreq->len -
- sreq->transferred - 1) >> PAGE_SHIFT);
- XA_STATE(xas, &sreq->rreq->mapping->i_pages, start);
+
+ start = (sreq->start + sreq->transferred) >> PAGE_SHIFT;
+ last = ((sreq->start + sreq->len - sreq->transferred - 1) >> PAGE_SHIFT);
nfs_pageio_init_read(&pgio, inode, false,
&nfs_async_read_completion_ops);
netfs = nfs_netfs_alloc(sreq);
- if (!netfs)
- return netfs_subreq_terminated(sreq, -ENOMEM, false);
+ if (!netfs) {
+ sreq->error = -ENOMEM;
+ return netfs_read_subreq_terminated(sreq);
+ }
pgio.pg_netfs = netfs; /* used in completion */
- xas_lock(&xas);
- xas_for_each(&xas, page, last) {
+ xa_for_each_range(&sreq->rreq->mapping->i_pages, idx, page, start, last) {
/* nfs_read_add_folio() may schedule() due to pNFS layout and other RPCs */
- xas_pause(&xas);
- xas_unlock(&xas);
err = nfs_read_add_folio(&pgio, ctx, page_folio(page));
if (err < 0) {
netfs->error = err;
goto out;
}
- xas_lock(&xas);
}
- xas_unlock(&xas);
out:
nfs_pageio_complete_read(&pgio);
nfs_netfs_put(netfs);
@@ -346,7 +346,7 @@ void nfs_netfs_initiate_read(struct nfs_pgio_header *hdr)
int nfs_netfs_folio_unlock(struct folio *folio)
{
- struct inode *inode = folio_file_mapping(folio)->host;
+ struct inode *inode = folio->mapping->host;
/*
* If fscache is enabled, netfs will unlock pages.
@@ -366,7 +366,8 @@ void nfs_netfs_read_completion(struct nfs_pgio_header *hdr)
return;
sreq = netfs->sreq;
- if (test_bit(NFS_IOHDR_EOF, &hdr->flags))
+ if (test_bit(NFS_IOHDR_EOF, &hdr->flags) &&
+ sreq->rreq->origin != NETFS_DIO_READ)
__set_bit(NETFS_SREQ_CLEAR_TAIL, &sreq->flags);
if (hdr->error)
@@ -382,5 +383,4 @@ const struct netfs_request_ops nfs_netfs_ops = {
.init_request = nfs_netfs_init_request,
.free_request = nfs_netfs_free_request,
.issue_read = nfs_netfs_issue_read,
- .clamp_length = nfs_netfs_clamp_length
};
diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h
index e3cb4923316b..9d86868f4998 100644
--- a/fs/nfs/fscache.h
+++ b/fs/nfs/fscache.h
@@ -60,8 +60,6 @@ static inline void nfs_netfs_get(struct nfs_netfs_io_data *netfs)
static inline void nfs_netfs_put(struct nfs_netfs_io_data *netfs)
{
- ssize_t final_len;
-
/* Only the last RPC completion should call netfs_subreq_terminated() */
if (!refcount_dec_and_test(&netfs->refcount))
return;
@@ -74,8 +72,10 @@ static inline void nfs_netfs_put(struct nfs_netfs_io_data *netfs)
* Correct the final length here to be no larger than the netfs subrequest
* length, and thus avoid netfs's "Subreq overread" warning message.
*/
- final_len = min_t(s64, netfs->sreq->len, atomic64_read(&netfs->transferred));
- netfs_subreq_terminated(netfs->sreq, netfs->error ?: final_len, false);
+ netfs->sreq->transferred = min_t(s64, netfs->sreq->len,
+ atomic64_read(&netfs->transferred));
+ netfs->sreq->error = netfs->error;
+ netfs_read_subreq_terminated(netfs->sreq);
kfree(netfs);
}
static inline void nfs_netfs_inode_init(struct nfs_inode *nfsi)
@@ -101,10 +101,10 @@ extern int nfs_netfs_read_folio(struct file *file, struct folio *folio);
static inline bool nfs_fscache_release_folio(struct folio *folio, gfp_t gfp)
{
- if (folio_test_fscache(folio)) {
+ if (folio_test_private_2(folio)) { /* [DEPRECATED] */
if (current_is_kswapd() || !(gfp & __GFP_FS))
return false;
- folio_wait_fscache(folio);
+ folio_wait_private_2(folio);
}
fscache_note_page_release(netfs_i_cookie(netfs_inode(folio->mapping->host)));
return true;
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 11ff2b2e060f..f13d25d95b85 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -62,7 +62,7 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i
}
/*
- * get an NFS2/NFS3 root dentry from the root filehandle
+ * get a root dentry from the root filehandle
*/
int nfs_get_root(struct super_block *s, struct fs_context *fc)
{
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index ebb8d60e1152..1aa67fca69b2 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -190,9 +190,8 @@ static bool nfs_has_xattr_cache(const struct nfs_inode *nfsi)
void nfs_set_cache_invalid(struct inode *inode, unsigned long flags)
{
struct nfs_inode *nfsi = NFS_I(inode);
- bool have_delegation = NFS_PROTO(inode)->have_delegation(inode, FMODE_READ);
- if (have_delegation) {
+ if (nfs_have_delegated_attributes(inode)) {
if (!(flags & NFS_INO_REVAL_FORCED))
flags &= ~(NFS_INO_INVALID_MODE |
NFS_INO_INVALID_OTHER |
@@ -206,12 +205,15 @@ void nfs_set_cache_invalid(struct inode *inode, unsigned long flags)
nfs_fscache_invalidate(inode, 0);
flags &= ~NFS_INO_REVAL_FORCED;
- nfsi->cache_validity |= flags;
+ flags |= nfsi->cache_validity;
+ if (inode->i_mapping->nrpages == 0)
+ flags &= ~NFS_INO_INVALID_DATA;
- if (inode->i_mapping->nrpages == 0) {
- nfsi->cache_validity &= ~NFS_INO_INVALID_DATA;
- nfs_ooo_clear(nfsi);
- } else if (nfsi->cache_validity & NFS_INO_INVALID_DATA) {
+ /* pairs with nfs_clear_invalid_mapping()'s smp_load_acquire() */
+ smp_store_release(&nfsi->cache_validity, flags);
+
+ if (inode->i_mapping->nrpages == 0 ||
+ nfsi->cache_validity & NFS_INO_INVALID_DATA) {
nfs_ooo_clear(nfsi);
}
trace_nfs_set_cache_invalid(inode, 0);
@@ -276,6 +278,8 @@ EXPORT_SYMBOL_GPL(nfs_zap_acl_cache);
void nfs_invalidate_atime(struct inode *inode)
{
+ if (nfs_have_delegated_atime(inode))
+ return;
spin_lock(&inode->i_lock);
nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME);
spin_unlock(&inode->i_lock);
@@ -491,6 +495,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
inode->i_fop = NFS_SB(sb)->nfs_client->rpc_ops->file_ops;
inode->i_data.a_ops = &nfs_file_aops;
nfs_inode_init_regular(nfsi);
+ mapping_set_large_folios(inode->i_mapping);
} else if (S_ISDIR(inode->i_mode)) {
inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops;
inode->i_fop = &nfs_dir_operations;
@@ -604,6 +609,67 @@ out_no_inode:
}
EXPORT_SYMBOL_GPL(nfs_fhget);
+static void
+nfs_fattr_fixup_delegated(struct inode *inode, struct nfs_fattr *fattr)
+{
+ unsigned long cache_validity = NFS_I(inode)->cache_validity;
+
+ if (nfs_have_delegated_mtime(inode)) {
+ if (!(cache_validity & NFS_INO_INVALID_CTIME))
+ fattr->valid &= ~(NFS_ATTR_FATTR_PRECTIME |
+ NFS_ATTR_FATTR_CTIME);
+
+ if (!(cache_validity & NFS_INO_INVALID_MTIME))
+ fattr->valid &= ~(NFS_ATTR_FATTR_PREMTIME |
+ NFS_ATTR_FATTR_MTIME);
+
+ if (!(cache_validity & NFS_INO_INVALID_ATIME))
+ fattr->valid &= ~NFS_ATTR_FATTR_ATIME;
+ } else if (nfs_have_delegated_atime(inode)) {
+ if (!(cache_validity & NFS_INO_INVALID_ATIME))
+ fattr->valid &= ~NFS_ATTR_FATTR_ATIME;
+ }
+}
+
+static void nfs_update_timestamps(struct inode *inode, unsigned int ia_valid)
+{
+ enum file_time_flags time_flags = 0;
+ unsigned int cache_flags = 0;
+
+ if (ia_valid & ATTR_MTIME) {
+ time_flags |= S_MTIME | S_CTIME;
+ cache_flags |= NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME;
+ }
+ if (ia_valid & ATTR_ATIME) {
+ time_flags |= S_ATIME;
+ cache_flags |= NFS_INO_INVALID_ATIME;
+ }
+ inode_update_timestamps(inode, time_flags);
+ NFS_I(inode)->cache_validity &= ~cache_flags;
+}
+
+void nfs_update_delegated_atime(struct inode *inode)
+{
+ spin_lock(&inode->i_lock);
+ if (nfs_have_delegated_atime(inode))
+ nfs_update_timestamps(inode, ATTR_ATIME);
+ spin_unlock(&inode->i_lock);
+}
+
+void nfs_update_delegated_mtime_locked(struct inode *inode)
+{
+ if (nfs_have_delegated_mtime(inode))
+ nfs_update_timestamps(inode, ATTR_MTIME);
+}
+
+void nfs_update_delegated_mtime(struct inode *inode)
+{
+ spin_lock(&inode->i_lock);
+ nfs_update_delegated_mtime_locked(inode);
+ spin_unlock(&inode->i_lock);
+}
+EXPORT_SYMBOL_GPL(nfs_update_delegated_mtime);
+
#define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET|ATTR_FILE|ATTR_OPEN)
int
@@ -631,6 +697,18 @@ nfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
attr->ia_valid &= ~ATTR_SIZE;
}
+ if (nfs_have_delegated_mtime(inode) && attr->ia_valid & ATTR_MTIME) {
+ spin_lock(&inode->i_lock);
+ nfs_update_timestamps(inode, attr->ia_valid);
+ spin_unlock(&inode->i_lock);
+ attr->ia_valid &= ~(ATTR_MTIME | ATTR_ATIME);
+ } else if (nfs_have_delegated_atime(inode) &&
+ attr->ia_valid & ATTR_ATIME &&
+ !(attr->ia_valid & ATTR_MTIME)) {
+ nfs_update_delegated_atime(inode);
+ attr->ia_valid &= ~ATTR_ATIME;
+ }
+
/* Optimization: if the end result is no change, don't RPC */
if (((attr->ia_valid & NFS_VALID_ATTRS) & ~(ATTR_FILE|ATTR_OPEN)) == 0)
return 0;
@@ -686,6 +764,7 @@ static int nfs_vmtruncate(struct inode * inode, loff_t offset)
spin_unlock(&inode->i_lock);
truncate_pagecache(inode, offset);
+ nfs_update_delegated_mtime_locked(inode);
spin_lock(&inode->i_lock);
out:
return err;
@@ -709,8 +788,9 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr,
spin_lock(&inode->i_lock);
NFS_I(inode)->attr_gencount = fattr->gencount;
if ((attr->ia_valid & ATTR_SIZE) != 0) {
- nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME |
- NFS_INO_INVALID_BLOCKS);
+ if (!nfs_have_delegated_mtime(inode))
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME);
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS);
nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC);
nfs_vmtruncate(inode, attr->ia_size);
}
@@ -856,8 +936,12 @@ int nfs_getattr(struct mnt_idmap *idmap, const struct path *path,
/* Flush out writes to the server in order to update c/mtime/version. */
if ((request_mask & (STATX_CTIME | STATX_MTIME | STATX_CHANGE_COOKIE)) &&
- S_ISREG(inode->i_mode))
- filemap_write_and_wait(inode->i_mapping);
+ S_ISREG(inode->i_mode)) {
+ if (nfs_have_delegated_mtime(inode))
+ filemap_fdatawrite(inode->i_mapping);
+ else
+ filemap_write_and_wait(inode->i_mapping);
+ }
/*
* We may force a getattr if the user cares about atime.
@@ -1012,7 +1096,7 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync)
if (!is_sync)
return;
inode = d_inode(ctx->dentry);
- if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
+ if (nfs_have_read_or_write_delegation(inode))
return;
nfsi = NFS_I(inode);
if (inode->i_mapping->nrpages == 0)
@@ -1053,6 +1137,8 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry,
ctx->lock_context.open_context = ctx;
INIT_LIST_HEAD(&ctx->list);
ctx->mdsthreshold = NULL;
+ nfs_localio_file_init(&ctx->nfl);
+
return ctx;
}
EXPORT_SYMBOL_GPL(alloc_nfs_open_context);
@@ -1084,6 +1170,7 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
nfs_sb_deactive(sb);
put_rpccred(rcu_dereference_protected(ctx->ll_cred, 1));
kfree(ctx->mdsthreshold);
+ nfs_close_local_fh(&ctx->nfl);
kfree_rcu(ctx, rcu_head);
}
@@ -1340,6 +1427,13 @@ int nfs_clear_invalid_mapping(struct address_space *mapping)
TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
if (ret)
goto out;
+ smp_rmb(); /* pairs with smp_wmb() below */
+ if (test_bit(NFS_INO_INVALIDATING, bitlock))
+ continue;
+ /* pairs with nfs_set_cache_invalid()'s smp_store_release() */
+ if (!(smp_load_acquire(&nfsi->cache_validity) & NFS_INO_INVALID_DATA))
+ goto out;
+ /* Slow-path that double-checks with spinlock held */
spin_lock(&inode->i_lock);
if (test_bit(NFS_INO_INVALIDATING, bitlock)) {
spin_unlock(&inode->i_lock);
@@ -1482,7 +1576,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
unsigned long invalid = 0;
struct timespec64 ts;
- if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
+ if (nfs_have_delegated_attributes(inode))
return 0;
if (!(fattr->valid & NFS_ATTR_FATTR_FILEID)) {
@@ -1565,6 +1659,7 @@ void nfs_fattr_init(struct nfs_fattr *fattr)
fattr->gencount = nfs_inc_attr_generation_counter();
fattr->owner_name = NULL;
fattr->group_name = NULL;
+ fattr->mdsthreshold = NULL;
}
EXPORT_SYMBOL_GPL(nfs_fattr_init);
@@ -2118,6 +2213,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
*/
nfsi->read_cache_jiffies = fattr->time_start;
+ /* Fix up any delegated attributes in the struct nfs_fattr */
+ nfs_fattr_fixup_delegated(inode, fattr);
+
save_cache_validity = nfsi->cache_validity;
nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR
| NFS_INO_INVALID_ATIME
@@ -2372,7 +2470,7 @@ static int __init nfs_init_inodecache(void)
nfs_inode_cachep = kmem_cache_create("nfs_inode_cache",
sizeof(struct nfs_inode),
0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD|SLAB_ACCOUNT),
+ SLAB_ACCOUNT),
init_once);
if (nfs_inode_cachep == NULL)
return -ENOMEM;
@@ -2390,35 +2488,54 @@ static void nfs_destroy_inodecache(void)
kmem_cache_destroy(nfs_inode_cachep);
}
+struct workqueue_struct *nfslocaliod_workqueue;
struct workqueue_struct *nfsiod_workqueue;
EXPORT_SYMBOL_GPL(nfsiod_workqueue);
/*
- * start up the nfsiod workqueue
+ * Destroy the nfsiod workqueues
*/
-static int nfsiod_start(void)
+static void nfsiod_stop(void)
{
struct workqueue_struct *wq;
- dprintk("RPC: creating workqueue nfsiod\n");
- wq = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
- if (wq == NULL)
- return -ENOMEM;
- nfsiod_workqueue = wq;
- return 0;
+
+ wq = nfsiod_workqueue;
+ if (wq != NULL) {
+ nfsiod_workqueue = NULL;
+ destroy_workqueue(wq);
+ }
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+ wq = nfslocaliod_workqueue;
+ if (wq != NULL) {
+ nfslocaliod_workqueue = NULL;
+ destroy_workqueue(wq);
+ }
+#endif /* CONFIG_NFS_LOCALIO */
}
/*
- * Destroy the nfsiod workqueue
+ * Start the nfsiod workqueues
*/
-static void nfsiod_stop(void)
+static int nfsiod_start(void)
{
- struct workqueue_struct *wq;
-
- wq = nfsiod_workqueue;
- if (wq == NULL)
- return;
- nfsiod_workqueue = NULL;
- destroy_workqueue(wq);
+ dprintk("RPC: creating workqueue nfsiod\n");
+ nfsiod_workqueue = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
+ if (nfsiod_workqueue == NULL)
+ return -ENOMEM;
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+ /*
+ * localio writes need to use a normal (non-memreclaim) workqueue.
+ * When we start getting low on space, XFS goes and calls flush_work() on
+ * a non-memreclaim work queue, which causes a priority inversion problem.
+ */
+ dprintk("RPC: creating workqueue nfslocaliod\n");
+ nfslocaliod_workqueue = alloc_workqueue("nfslocaliod", WQ_UNBOUND, 0);
+ if (unlikely(nfslocaliod_workqueue == NULL)) {
+ nfsiod_stop();
+ return -ENOMEM;
+ }
+#endif /* CONFIG_NFS_LOCALIO */
+ return 0;
}
unsigned int nfs_net_id;
@@ -2426,12 +2543,21 @@ EXPORT_SYMBOL_GPL(nfs_net_id);
static int nfs_net_init(struct net *net)
{
+ struct nfs_net *nn = net_generic(net, nfs_net_id);
+
nfs_clients_init(net);
+
+ if (!rpc_proc_register(net, &nn->rpcstats)) {
+ nfs_clients_exit(net);
+ return -ENOMEM;
+ }
+
return nfs_fs_proc_net_init(net);
}
static void nfs_net_exit(struct net *net)
{
+ rpc_proc_unregister(net, "nfs");
nfs_fs_proc_net_exit(net);
nfs_clients_exit(net);
}
@@ -2486,15 +2612,12 @@ static int __init init_nfs_fs(void)
if (err)
goto out1;
- rpc_proc_register(&init_net, &nfs_rpcstat);
-
err = register_nfs_fs();
if (err)
goto out0;
return 0;
out0:
- rpc_proc_unregister(&init_net, "nfs");
nfs_destroy_directcache();
out1:
nfs_destroy_writepagecache();
@@ -2524,7 +2647,6 @@ static void __exit exit_nfs_fs(void)
nfs_destroy_inodecache();
nfs_destroy_nfspagecache();
unregister_pernet_subsys(&nfs_net_ops);
- rpc_proc_unregister(&init_net, "nfs");
unregister_nfs_fs();
nfs_fs_proc_exit();
nfsiod_stop();
@@ -2533,6 +2655,7 @@ static void __exit exit_nfs_fs(void)
/* Not quite true; I just maintain it */
MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>");
+MODULE_DESCRIPTION("NFS client support");
MODULE_LICENSE("GPL");
module_param(enable_ino64, bool, 0644);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index e3722ce6722e..fae2c7ae4acc 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -6,12 +6,14 @@
#include "nfs4_fs.h"
#include <linux/fs_context.h>
#include <linux/security.h>
+#include <linux/compiler_attributes.h>
#include <linux/crc32.h>
#include <linux/sunrpc/addr.h>
#include <linux/nfs_page.h>
+#include <linux/nfslocalio.h>
#include <linux/wait_bit.h>
-#define NFS_SB_MASK (SB_RDONLY|SB_NOSUID|SB_NODEV|SB_NOEXEC|SB_SYNCHRONOUS)
+#define NFS_SB_MASK (SB_NOSUID|SB_NODEV|SB_NOEXEC|SB_SYNCHRONOUS)
extern const struct export_operations nfs_export_ops;
@@ -112,6 +114,7 @@ struct nfs_fs_context {
unsigned short protofamily;
unsigned short mountfamily;
bool has_sec_mnt_opts;
+ int lock_status;
struct {
union {
@@ -153,6 +156,12 @@ struct nfs_fs_context {
} clone_data;
};
+enum nfs_lock_status {
+ NFS_LOCK_NOT_SET = 0,
+ NFS_LOCK_LOCK = 1,
+ NFS_LOCK_NOLOCK = 2,
+};
+
#define nfs_errorf(fc, fmt, ...) ((fc)->log.log ? \
errorf(fc, fmt, ## __VA_ARGS__) : \
({ dprintk(fmt "\n", ## __VA_ARGS__); }))
@@ -301,7 +310,8 @@ void nfs_pgio_header_free(struct nfs_pgio_header *);
int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *);
int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr,
const struct cred *cred, const struct nfs_rpc_ops *rpc_ops,
- const struct rpc_call_ops *call_ops, int how, int flags);
+ const struct rpc_call_ops *call_ops, int how, int flags,
+ struct nfsd_file *localio);
void nfs_free_request(struct nfs_page *req);
struct nfs_pgio_mirror *
nfs_pgio_current_mirror(struct nfs_pageio_descriptor *desc);
@@ -431,6 +441,7 @@ int nfs_check_flags(int);
/* inode.c */
extern struct workqueue_struct *nfsiod_workqueue;
+extern struct workqueue_struct *nfslocaliod_workqueue;
extern struct inode *nfs_alloc_inode(struct super_block *sb);
extern void nfs_free_inode(struct inode *);
extern int nfs_write_inode(struct inode *, struct writeback_control *);
@@ -442,6 +453,54 @@ extern void nfs_set_cache_invalid(struct inode *inode, unsigned long flags);
extern bool nfs_check_cache_invalid(struct inode *, unsigned long);
extern int nfs_wait_bit_killable(struct wait_bit_key *key, int mode);
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+/* localio.c */
+extern void nfs_local_probe(struct nfs_client *);
+extern void nfs_local_probe_async(struct nfs_client *);
+extern void nfs_local_probe_async_work(struct work_struct *);
+extern struct nfsd_file *nfs_local_open_fh(struct nfs_client *,
+ const struct cred *,
+ struct nfs_fh *,
+ struct nfs_file_localio *,
+ const fmode_t);
+extern int nfs_local_doio(struct nfs_client *,
+ struct nfsd_file *,
+ struct nfs_pgio_header *,
+ const struct rpc_call_ops *);
+extern int nfs_local_commit(struct nfsd_file *,
+ struct nfs_commit_data *,
+ const struct rpc_call_ops *, int);
+extern bool nfs_server_is_local(const struct nfs_client *clp);
+
+#else /* CONFIG_NFS_LOCALIO */
+static inline void nfs_local_probe(struct nfs_client *clp) {}
+static inline void nfs_local_probe_async(struct nfs_client *clp) {}
+static inline struct nfsd_file *
+nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred,
+ struct nfs_fh *fh, struct nfs_file_localio *nfl,
+ const fmode_t mode)
+{
+ return NULL;
+}
+static inline int nfs_local_doio(struct nfs_client *clp,
+ struct nfsd_file *localio,
+ struct nfs_pgio_header *hdr,
+ const struct rpc_call_ops *call_ops)
+{
+ return -EINVAL;
+}
+static inline int nfs_local_commit(struct nfsd_file *localio,
+ struct nfs_commit_data *data,
+ const struct rpc_call_ops *call_ops, int how)
+{
+ return -EINVAL;
+}
+static inline bool nfs_server_is_local(const struct nfs_client *clp)
+{
+ return false;
+}
+#endif /* CONFIG_NFS_LOCALIO */
+
/* super.c */
extern const struct super_operations nfs_sops;
bool nfs_auth_info_match(const struct nfs_auth_info *, rpc_authflavor_t);
@@ -449,8 +508,6 @@ int nfs_try_get_tree(struct fs_context *);
int nfs_get_tree_common(struct fs_context *);
void nfs_kill_super(struct super_block *);
-extern struct rpc_stat nfs_rpcstat;
-
extern int __init register_nfs_fs(void);
extern void __exit unregister_nfs_fs(void);
extern bool nfs_sb_active(struct super_block *sb);
@@ -463,11 +520,11 @@ extern const struct netfs_request_ops nfs_netfs_ops;
#endif
/* io.c */
-extern void nfs_start_io_read(struct inode *inode);
+extern __must_check int nfs_start_io_read(struct inode *inode);
extern void nfs_end_io_read(struct inode *inode);
-extern void nfs_start_io_write(struct inode *inode);
+extern __must_check int nfs_start_io_write(struct inode *inode);
extern void nfs_end_io_write(struct inode *inode);
-extern void nfs_start_io_direct(struct inode *inode);
+extern __must_check int nfs_start_io_direct(struct inode *inode);
extern void nfs_end_io_direct(struct inode *inode);
static inline bool nfs_file_io_is_buffered(struct nfs_inode *nfsi)
@@ -500,7 +557,6 @@ extern int nfs_read_add_folio(struct nfs_pageio_descriptor *pgio,
struct nfs_open_context *ctx,
struct folio *folio);
extern void nfs_pageio_complete_read(struct nfs_pageio_descriptor *pgio);
-extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
/* super.c */
@@ -523,7 +579,8 @@ extern int nfs_initiate_commit(struct rpc_clnt *clnt,
struct nfs_commit_data *data,
const struct nfs_rpc_ops *nfs_ops,
const struct rpc_call_ops *call_ops,
- int how, int flags);
+ int how, int flags,
+ struct nfsd_file *localio);
extern void nfs_init_commit(struct nfs_commit_data *data,
struct list_head *head,
struct pnfs_layout_segment *lseg,
@@ -712,9 +769,9 @@ unsigned long nfs_block_bits(unsigned long bsize, unsigned char *nrbitsp)
if ((bsize & (bsize - 1)) || nrbitsp) {
unsigned char nrbits;
- for (nrbits = 31; nrbits && !(bsize & (1 << nrbits)); nrbits--)
+ for (nrbits = 31; nrbits && !(bsize & (1UL << nrbits)); nrbits--)
;
- bsize = 1 << nrbits;
+ bsize = 1UL << nrbits;
if (nrbitsp)
*nrbitsp = nrbits;
}
@@ -780,7 +837,7 @@ static inline void nfs_folio_mark_unstable(struct folio *folio,
struct nfs_commit_info *cinfo)
{
if (folio && !cinfo->dreq) {
- struct inode *inode = folio_file_mapping(folio)->host;
+ struct inode *inode = folio->mapping->host;
long nr = folio_nr_pages(folio);
/* This page is really still in write-back - just that the
@@ -795,31 +852,12 @@ static inline void nfs_folio_mark_unstable(struct folio *folio,
/*
* Determine the number of bytes of data the page contains
*/
-static inline
-unsigned int nfs_page_length(struct page *page)
-{
- loff_t i_size = i_size_read(page_file_mapping(page)->host);
-
- if (i_size > 0) {
- pgoff_t index = page_index(page);
- pgoff_t end_index = (i_size - 1) >> PAGE_SHIFT;
- if (index < end_index)
- return PAGE_SIZE;
- if (index == end_index)
- return ((i_size - 1) & ~PAGE_MASK) + 1;
- }
- return 0;
-}
-
-/*
- * Determine the number of bytes of data the page contains
- */
static inline size_t nfs_folio_length(struct folio *folio)
{
- loff_t i_size = i_size_read(folio_file_mapping(folio)->host);
+ loff_t i_size = i_size_read(folio->mapping->host);
if (i_size > 0) {
- pgoff_t index = folio_index(folio) >> folio_order(folio);
+ pgoff_t index = folio->index >> folio_order(folio);
pgoff_t end_index = (i_size - 1) >> folio_shift(folio);
if (index < end_index)
return folio_size(folio);
diff --git a/fs/nfs/io.c b/fs/nfs/io.c
index b5551ed8f648..3388faf2acb9 100644
--- a/fs/nfs/io.c
+++ b/fs/nfs/io.c
@@ -39,19 +39,28 @@ static void nfs_block_o_direct(struct nfs_inode *nfsi, struct inode *inode)
* Note that buffered writes and truncates both take a write lock on
* inode->i_rwsem, meaning that those are serialised w.r.t. the reads.
*/
-void
+int
nfs_start_io_read(struct inode *inode)
{
struct nfs_inode *nfsi = NFS_I(inode);
+ int err;
+
/* Be an optimist! */
- down_read(&inode->i_rwsem);
+ err = down_read_killable(&inode->i_rwsem);
+ if (err)
+ return err;
if (test_bit(NFS_INO_ODIRECT, &nfsi->flags) == 0)
- return;
+ return 0;
up_read(&inode->i_rwsem);
+
/* Slow path.... */
- down_write(&inode->i_rwsem);
+ err = down_write_killable(&inode->i_rwsem);
+ if (err)
+ return err;
nfs_block_o_direct(nfsi, inode);
downgrade_write(&inode->i_rwsem);
+
+ return 0;
}
/**
@@ -74,11 +83,15 @@ nfs_end_io_read(struct inode *inode)
* Declare that a buffered read operation is about to start, and ensure
* that we block all direct I/O.
*/
-void
+int
nfs_start_io_write(struct inode *inode)
{
- down_write(&inode->i_rwsem);
- nfs_block_o_direct(NFS_I(inode), inode);
+ int err;
+
+ err = down_write_killable(&inode->i_rwsem);
+ if (!err)
+ nfs_block_o_direct(NFS_I(inode), inode);
+ return err;
}
/**
@@ -119,19 +132,28 @@ static void nfs_block_buffered(struct nfs_inode *nfsi, struct inode *inode)
* Note that buffered writes and truncates both take a write lock on
* inode->i_rwsem, meaning that those are serialised w.r.t. O_DIRECT.
*/
-void
+int
nfs_start_io_direct(struct inode *inode)
{
struct nfs_inode *nfsi = NFS_I(inode);
+ int err;
+
/* Be an optimist! */
- down_read(&inode->i_rwsem);
+ err = down_read_killable(&inode->i_rwsem);
+ if (err)
+ return err;
if (test_bit(NFS_INO_ODIRECT, &nfsi->flags) != 0)
- return;
+ return 0;
up_read(&inode->i_rwsem);
+
/* Slow path.... */
- down_write(&inode->i_rwsem);
+ err = down_write_killable(&inode->i_rwsem);
+ if (err)
+ return err;
nfs_block_buffered(nfsi, inode);
downgrade_write(&inode->i_rwsem);
+
+ return 0;
}
/**
diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h
index 5aa776b5a3e7..49862c95b224 100644
--- a/fs/nfs/iostat.h
+++ b/fs/nfs/iostat.h
@@ -46,10 +46,11 @@ static inline void nfs_add_stats(const struct inode *inode,
nfs_add_server_stats(NFS_SERVER(inode), stat, addend);
}
-static inline struct nfs_iostats __percpu *nfs_alloc_iostats(void)
-{
- return alloc_percpu(struct nfs_iostats);
-}
+/*
+ * This specialized allocator has to be a macro for its allocations to be
+ * accounted separately (to have a separate alloc_tag).
+ */
+#define nfs_alloc_iostats() alloc_percpu(struct nfs_iostats)
static inline void nfs_free_iostats(struct nfs_iostats __percpu *stats)
{
diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c
new file mode 100644
index 000000000000..5c21caeae075
--- /dev/null
+++ b/fs/nfs/localio.c
@@ -0,0 +1,867 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * NFS client support for local clients to bypass network stack
+ *
+ * Copyright (C) 2014 Weston Andros Adamson <dros@primarydata.com>
+ * Copyright (C) 2019 Trond Myklebust <trond.myklebust@hammerspace.com>
+ * Copyright (C) 2024 Mike Snitzer <snitzer@hammerspace.com>
+ * Copyright (C) 2024 NeilBrown <neilb@suse.de>
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/vfs.h>
+#include <linux/file.h>
+#include <linux/inet.h>
+#include <linux/sunrpc/addr.h>
+#include <linux/inetdevice.h>
+#include <net/addrconf.h>
+#include <linux/nfs_common.h>
+#include <linux/nfslocalio.h>
+#include <linux/bvec.h>
+
+#include <linux/nfs.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_xdr.h>
+
+#include "internal.h"
+#include "pnfs.h"
+#include "nfstrace.h"
+
+#define NFSDBG_FACILITY NFSDBG_VFS
+
+struct nfs_local_kiocb {
+ struct kiocb kiocb;
+ struct bio_vec *bvec;
+ struct nfs_pgio_header *hdr;
+ struct work_struct work;
+ void (*aio_complete_work)(struct work_struct *);
+ struct nfsd_file *localio;
+};
+
+struct nfs_local_fsync_ctx {
+ struct nfsd_file *localio;
+ struct nfs_commit_data *data;
+ struct work_struct work;
+ struct completion *done;
+};
+
+static bool localio_enabled __read_mostly = true;
+module_param(localio_enabled, bool, 0644);
+
+static bool localio_O_DIRECT_semantics __read_mostly = false;
+module_param(localio_O_DIRECT_semantics, bool, 0644);
+MODULE_PARM_DESC(localio_O_DIRECT_semantics,
+ "LOCALIO will use O_DIRECT semantics to filesystem.");
+
+static inline bool nfs_client_is_local(const struct nfs_client *clp)
+{
+ return !!rcu_access_pointer(clp->cl_uuid.net);
+}
+
+bool nfs_server_is_local(const struct nfs_client *clp)
+{
+ return nfs_client_is_local(clp) && localio_enabled;
+}
+EXPORT_SYMBOL_GPL(nfs_server_is_local);
+
+/*
+ * UUID_IS_LOCAL XDR functions
+ */
+
+static void localio_xdr_enc_uuidargs(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const void *data)
+{
+ const u8 *uuid = data;
+
+ encode_opaque_fixed(xdr, uuid, UUID_SIZE);
+}
+
+static int localio_xdr_dec_uuidres(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ void *result)
+{
+ /* void return */
+ return 0;
+}
+
+static const struct rpc_procinfo nfs_localio_procedures[] = {
+ [LOCALIOPROC_UUID_IS_LOCAL] = {
+ .p_proc = LOCALIOPROC_UUID_IS_LOCAL,
+ .p_encode = localio_xdr_enc_uuidargs,
+ .p_decode = localio_xdr_dec_uuidres,
+ .p_arglen = XDR_QUADLEN(UUID_SIZE),
+ .p_replen = 0,
+ .p_statidx = LOCALIOPROC_UUID_IS_LOCAL,
+ .p_name = "UUID_IS_LOCAL",
+ },
+};
+
+static unsigned int nfs_localio_counts[ARRAY_SIZE(nfs_localio_procedures)];
+static const struct rpc_version nfslocalio_version1 = {
+ .number = 1,
+ .nrprocs = ARRAY_SIZE(nfs_localio_procedures),
+ .procs = nfs_localio_procedures,
+ .counts = nfs_localio_counts,
+};
+
+static const struct rpc_version *nfslocalio_version[] = {
+ [1] = &nfslocalio_version1,
+};
+
+extern const struct rpc_program nfslocalio_program;
+static struct rpc_stat nfslocalio_rpcstat = { &nfslocalio_program };
+
+const struct rpc_program nfslocalio_program = {
+ .name = "nfslocalio",
+ .number = NFS_LOCALIO_PROGRAM,
+ .nrvers = ARRAY_SIZE(nfslocalio_version),
+ .version = nfslocalio_version,
+ .stats = &nfslocalio_rpcstat,
+};
+
+/*
+ * nfs_init_localioclient - Initialise an NFS localio client connection
+ */
+static struct rpc_clnt *nfs_init_localioclient(struct nfs_client *clp)
+{
+ struct rpc_clnt *rpcclient_localio;
+
+ rpcclient_localio = rpc_bind_new_program(clp->cl_rpcclient,
+ &nfslocalio_program, 1);
+
+ dprintk_rcu("%s: server (%s) %s NFS LOCALIO.\n",
+ __func__, rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR),
+ (IS_ERR(rpcclient_localio) ? "does not support" : "supports"));
+
+ return rpcclient_localio;
+}
+
+static bool nfs_server_uuid_is_local(struct nfs_client *clp)
+{
+ u8 uuid[UUID_SIZE];
+ struct rpc_message msg = {
+ .rpc_argp = &uuid,
+ };
+ struct rpc_clnt *rpcclient_localio;
+ int status;
+
+ rpcclient_localio = nfs_init_localioclient(clp);
+ if (IS_ERR(rpcclient_localio))
+ return false;
+
+ export_uuid(uuid, &clp->cl_uuid.uuid);
+
+ msg.rpc_proc = &nfs_localio_procedures[LOCALIOPROC_UUID_IS_LOCAL];
+ status = rpc_call_sync(rpcclient_localio, &msg, 0);
+ dprintk("%s: NFS reply UUID_IS_LOCAL: status=%d\n",
+ __func__, status);
+ rpc_shutdown_client(rpcclient_localio);
+
+ /* Server is only local if it initialized required struct members */
+ if (status || !rcu_access_pointer(clp->cl_uuid.net) || !clp->cl_uuid.dom)
+ return false;
+
+ return true;
+}
+
+/*
+ * nfs_local_probe - probe local i/o support for an nfs_server and nfs_client
+ * - called after alloc_client and init_client (so cl_rpcclient exists)
+ * - this function is idempotent, it can be called for old or new clients
+ */
+void nfs_local_probe(struct nfs_client *clp)
+{
+ /* Disallow localio if disabled via sysfs or AUTH_SYS isn't used */
+ if (!localio_enabled ||
+ clp->cl_rpcclient->cl_auth->au_flavor != RPC_AUTH_UNIX) {
+ nfs_localio_disable_client(clp);
+ return;
+ }
+
+ if (nfs_client_is_local(clp)) {
+ /* If already enabled, disable and re-enable */
+ nfs_localio_disable_client(clp);
+ }
+
+ if (!nfs_uuid_begin(&clp->cl_uuid))
+ return;
+ if (nfs_server_uuid_is_local(clp))
+ nfs_localio_enable_client(clp);
+ nfs_uuid_end(&clp->cl_uuid);
+}
+EXPORT_SYMBOL_GPL(nfs_local_probe);
+
+void nfs_local_probe_async_work(struct work_struct *work)
+{
+ struct nfs_client *clp =
+ container_of(work, struct nfs_client, cl_local_probe_work);
+
+ nfs_local_probe(clp);
+}
+
+void nfs_local_probe_async(struct nfs_client *clp)
+{
+ queue_work(nfsiod_workqueue, &clp->cl_local_probe_work);
+}
+EXPORT_SYMBOL_GPL(nfs_local_probe_async);
+
+static inline struct nfsd_file *nfs_local_file_get(struct nfsd_file *nf)
+{
+ return nfs_to->nfsd_file_get(nf);
+}
+
+static inline void nfs_local_file_put(struct nfsd_file *nf)
+{
+ nfs_to->nfsd_file_put(nf);
+}
+
+/*
+ * __nfs_local_open_fh - open a local filehandle in terms of nfsd_file.
+ *
+ * Returns a pointer to a struct nfsd_file or ERR_PTR.
+ * Caller must release returned nfsd_file with nfs_to_nfsd_file_put_local().
+ */
+static struct nfsd_file *
+__nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred,
+ struct nfs_fh *fh, struct nfs_file_localio *nfl,
+ const fmode_t mode)
+{
+ struct nfsd_file *localio;
+
+ localio = nfs_open_local_fh(&clp->cl_uuid, clp->cl_rpcclient,
+ cred, fh, nfl, mode);
+ if (IS_ERR(localio)) {
+ int status = PTR_ERR(localio);
+ trace_nfs_local_open_fh(fh, mode, status);
+ switch (status) {
+ case -ENOMEM:
+ case -ENXIO:
+ case -ENOENT:
+ /* Revalidate localio, will disable if unsupported */
+ nfs_local_probe(clp);
+ }
+ }
+ return localio;
+}
+
+/*
+ * nfs_local_open_fh - open a local filehandle in terms of nfsd_file.
+ * First checking if the open nfsd_file is already cached, otherwise
+ * must __nfs_local_open_fh and insert the nfsd_file in nfs_file_localio.
+ *
+ * Returns a pointer to a struct nfsd_file or NULL.
+ */
+struct nfsd_file *
+nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred,
+ struct nfs_fh *fh, struct nfs_file_localio *nfl,
+ const fmode_t mode)
+{
+ struct nfsd_file *nf, *new, __rcu **pnf;
+
+ if (!nfs_server_is_local(clp))
+ return NULL;
+ if (mode & ~(FMODE_READ | FMODE_WRITE))
+ return NULL;
+
+ if (mode & FMODE_WRITE)
+ pnf = &nfl->rw_file;
+ else
+ pnf = &nfl->ro_file;
+
+ new = NULL;
+ rcu_read_lock();
+ nf = rcu_dereference(*pnf);
+ if (!nf) {
+ rcu_read_unlock();
+ new = __nfs_local_open_fh(clp, cred, fh, nfl, mode);
+ if (IS_ERR(new))
+ return NULL;
+ /* try to swap in the pointer */
+ spin_lock(&clp->cl_uuid.lock);
+ nf = rcu_dereference_protected(*pnf, 1);
+ if (!nf) {
+ nf = new;
+ new = NULL;
+ rcu_assign_pointer(*pnf, nf);
+ }
+ spin_unlock(&clp->cl_uuid.lock);
+ rcu_read_lock();
+ }
+ nf = nfs_local_file_get(nf);
+ rcu_read_unlock();
+ if (new)
+ nfs_to_nfsd_file_put_local(new);
+ return nf;
+}
+EXPORT_SYMBOL_GPL(nfs_local_open_fh);
+
+static struct bio_vec *
+nfs_bvec_alloc_and_import_pagevec(struct page **pagevec,
+ unsigned int npages, gfp_t flags)
+{
+ struct bio_vec *bvec, *p;
+
+ bvec = kmalloc_array(npages, sizeof(*bvec), flags);
+ if (bvec != NULL) {
+ for (p = bvec; npages > 0; p++, pagevec++, npages--) {
+ p->bv_page = *pagevec;
+ p->bv_len = PAGE_SIZE;
+ p->bv_offset = 0;
+ }
+ }
+ return bvec;
+}
+
+static void
+nfs_local_iocb_free(struct nfs_local_kiocb *iocb)
+{
+ kfree(iocb->bvec);
+ kfree(iocb);
+}
+
+static struct nfs_local_kiocb *
+nfs_local_iocb_alloc(struct nfs_pgio_header *hdr,
+ struct file *file, gfp_t flags)
+{
+ struct nfs_local_kiocb *iocb;
+
+ iocb = kmalloc(sizeof(*iocb), flags);
+ if (iocb == NULL)
+ return NULL;
+ iocb->bvec = nfs_bvec_alloc_and_import_pagevec(hdr->page_array.pagevec,
+ hdr->page_array.npages, flags);
+ if (iocb->bvec == NULL) {
+ kfree(iocb);
+ return NULL;
+ }
+
+ if (localio_O_DIRECT_semantics &&
+ test_bit(NFS_IOHDR_ODIRECT, &hdr->flags)) {
+ iocb->kiocb.ki_filp = file;
+ iocb->kiocb.ki_flags = IOCB_DIRECT;
+ } else
+ init_sync_kiocb(&iocb->kiocb, file);
+
+ iocb->kiocb.ki_pos = hdr->args.offset;
+ iocb->hdr = hdr;
+ iocb->kiocb.ki_flags &= ~IOCB_APPEND;
+ iocb->aio_complete_work = NULL;
+
+ return iocb;
+}
+
+static void
+nfs_local_iter_init(struct iov_iter *i, struct nfs_local_kiocb *iocb, int dir)
+{
+ struct nfs_pgio_header *hdr = iocb->hdr;
+
+ iov_iter_bvec(i, dir, iocb->bvec, hdr->page_array.npages,
+ hdr->args.count + hdr->args.pgbase);
+ if (hdr->args.pgbase != 0)
+ iov_iter_advance(i, hdr->args.pgbase);
+}
+
+static void
+nfs_local_hdr_release(struct nfs_pgio_header *hdr,
+ const struct rpc_call_ops *call_ops)
+{
+ call_ops->rpc_call_done(&hdr->task, hdr);
+ call_ops->rpc_release(hdr);
+}
+
+static void
+nfs_local_pgio_init(struct nfs_pgio_header *hdr,
+ const struct rpc_call_ops *call_ops)
+{
+ hdr->task.tk_ops = call_ops;
+ if (!hdr->task.tk_start)
+ hdr->task.tk_start = ktime_get();
+}
+
+static void
+nfs_local_pgio_done(struct nfs_pgio_header *hdr, long status)
+{
+ if (status >= 0) {
+ hdr->res.count = status;
+ hdr->res.op_status = NFS4_OK;
+ hdr->task.tk_status = 0;
+ } else {
+ hdr->res.op_status = nfs_localio_errno_to_nfs4_stat(status);
+ hdr->task.tk_status = status;
+ }
+}
+
+static void
+nfs_local_pgio_release(struct nfs_local_kiocb *iocb)
+{
+ struct nfs_pgio_header *hdr = iocb->hdr;
+
+ nfs_local_file_put(iocb->localio);
+ nfs_local_iocb_free(iocb);
+ nfs_local_hdr_release(hdr, hdr->task.tk_ops);
+}
+
+/*
+ * Complete the I/O from iocb->kiocb.ki_complete()
+ *
+ * Note that this function can be called from a bottom half context,
+ * hence we need to queue the rpc_call_done() etc to a workqueue
+ */
+static inline void nfs_local_pgio_aio_complete(struct nfs_local_kiocb *iocb)
+{
+ INIT_WORK(&iocb->work, iocb->aio_complete_work);
+ queue_work(nfsiod_workqueue, &iocb->work);
+}
+
+static void
+nfs_local_read_done(struct nfs_local_kiocb *iocb, long status)
+{
+ struct nfs_pgio_header *hdr = iocb->hdr;
+ struct file *filp = iocb->kiocb.ki_filp;
+
+ nfs_local_pgio_done(hdr, status);
+
+ /*
+ * Must clear replen otherwise NFSv3 data corruption will occur
+ * if/when switching from LOCALIO back to using normal RPC.
+ */
+ hdr->res.replen = 0;
+
+ if (hdr->res.count != hdr->args.count ||
+ hdr->args.offset + hdr->res.count >= i_size_read(file_inode(filp)))
+ hdr->res.eof = true;
+
+ dprintk("%s: read %ld bytes eof %d.\n", __func__,
+ status > 0 ? status : 0, hdr->res.eof);
+}
+
+static void nfs_local_read_aio_complete_work(struct work_struct *work)
+{
+ struct nfs_local_kiocb *iocb =
+ container_of(work, struct nfs_local_kiocb, work);
+
+ nfs_local_pgio_release(iocb);
+}
+
+static void nfs_local_read_aio_complete(struct kiocb *kiocb, long ret)
+{
+ struct nfs_local_kiocb *iocb =
+ container_of(kiocb, struct nfs_local_kiocb, kiocb);
+
+ nfs_local_read_done(iocb, ret);
+ nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_read_aio_complete_work */
+}
+
+static void nfs_local_call_read(struct work_struct *work)
+{
+ struct nfs_local_kiocb *iocb =
+ container_of(work, struct nfs_local_kiocb, work);
+ struct file *filp = iocb->kiocb.ki_filp;
+ const struct cred *save_cred;
+ struct iov_iter iter;
+ ssize_t status;
+
+ save_cred = override_creds(filp->f_cred);
+
+ nfs_local_iter_init(&iter, iocb, READ);
+
+ status = filp->f_op->read_iter(&iocb->kiocb, &iter);
+ if (status != -EIOCBQUEUED) {
+ nfs_local_read_done(iocb, status);
+ nfs_local_pgio_release(iocb);
+ }
+
+ revert_creds(save_cred);
+}
+
+static int
+nfs_do_local_read(struct nfs_pgio_header *hdr,
+ struct nfsd_file *localio,
+ const struct rpc_call_ops *call_ops)
+{
+ struct nfs_local_kiocb *iocb;
+ struct file *file = nfs_to->nfsd_file_file(localio);
+
+ /* Don't support filesystems without read_iter */
+ if (!file->f_op->read_iter)
+ return -EAGAIN;
+
+ dprintk("%s: vfs_read count=%u pos=%llu\n",
+ __func__, hdr->args.count, hdr->args.offset);
+
+ iocb = nfs_local_iocb_alloc(hdr, file, GFP_KERNEL);
+ if (iocb == NULL)
+ return -ENOMEM;
+ iocb->localio = localio;
+
+ nfs_local_pgio_init(hdr, call_ops);
+ hdr->res.eof = false;
+
+ if (iocb->kiocb.ki_flags & IOCB_DIRECT) {
+ iocb->kiocb.ki_complete = nfs_local_read_aio_complete;
+ iocb->aio_complete_work = nfs_local_read_aio_complete_work;
+ }
+
+ INIT_WORK(&iocb->work, nfs_local_call_read);
+ queue_work(nfslocaliod_workqueue, &iocb->work);
+
+ return 0;
+}
+
+static void
+nfs_copy_boot_verifier(struct nfs_write_verifier *verifier, struct inode *inode)
+{
+ struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+ u32 *verf = (u32 *)verifier->data;
+ int seq = 0;
+
+ do {
+ read_seqbegin_or_lock(&clp->cl_boot_lock, &seq);
+ verf[0] = (u32)clp->cl_nfssvc_boot.tv_sec;
+ verf[1] = (u32)clp->cl_nfssvc_boot.tv_nsec;
+ } while (need_seqretry(&clp->cl_boot_lock, seq));
+ done_seqretry(&clp->cl_boot_lock, seq);
+}
+
+static void
+nfs_reset_boot_verifier(struct inode *inode)
+{
+ struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+
+ write_seqlock(&clp->cl_boot_lock);
+ ktime_get_real_ts64(&clp->cl_nfssvc_boot);
+ write_sequnlock(&clp->cl_boot_lock);
+}
+
+static void
+nfs_set_local_verifier(struct inode *inode,
+ struct nfs_writeverf *verf,
+ enum nfs3_stable_how how)
+{
+ nfs_copy_boot_verifier(&verf->verifier, inode);
+ verf->committed = how;
+}
+
+/* Factored out from fs/nfsd/vfs.h:fh_getattr() */
+static int __vfs_getattr(struct path *p, struct kstat *stat, int version)
+{
+ u32 request_mask = STATX_BASIC_STATS;
+
+ if (version == 4)
+ request_mask |= (STATX_BTIME | STATX_CHANGE_COOKIE);
+ return vfs_getattr(p, stat, request_mask, AT_STATX_SYNC_AS_STAT);
+}
+
+/* Copied from fs/nfsd/nfsfh.c:nfsd4_change_attribute() */
+static u64 __nfsd4_change_attribute(const struct kstat *stat,
+ const struct inode *inode)
+{
+ u64 chattr;
+
+ if (stat->result_mask & STATX_CHANGE_COOKIE) {
+ chattr = stat->change_cookie;
+ if (S_ISREG(inode->i_mode) &&
+ !(stat->attributes & STATX_ATTR_CHANGE_MONOTONIC)) {
+ chattr += (u64)stat->ctime.tv_sec << 30;
+ chattr += stat->ctime.tv_nsec;
+ }
+ } else {
+ chattr = time_to_chattr(&stat->ctime);
+ }
+ return chattr;
+}
+
+static void nfs_local_vfs_getattr(struct nfs_local_kiocb *iocb)
+{
+ struct kstat stat;
+ struct file *filp = iocb->kiocb.ki_filp;
+ struct nfs_pgio_header *hdr = iocb->hdr;
+ struct nfs_fattr *fattr = hdr->res.fattr;
+ int version = NFS_PROTO(hdr->inode)->version;
+
+ if (unlikely(!fattr) || __vfs_getattr(&filp->f_path, &stat, version))
+ return;
+
+ fattr->valid = (NFS_ATTR_FATTR_FILEID |
+ NFS_ATTR_FATTR_CHANGE |
+ NFS_ATTR_FATTR_SIZE |
+ NFS_ATTR_FATTR_ATIME |
+ NFS_ATTR_FATTR_MTIME |
+ NFS_ATTR_FATTR_CTIME |
+ NFS_ATTR_FATTR_SPACE_USED);
+
+ fattr->fileid = stat.ino;
+ fattr->size = stat.size;
+ fattr->atime = stat.atime;
+ fattr->mtime = stat.mtime;
+ fattr->ctime = stat.ctime;
+ if (version == 4) {
+ fattr->change_attr =
+ __nfsd4_change_attribute(&stat, file_inode(filp));
+ } else
+ fattr->change_attr = nfs_timespec_to_change_attr(&fattr->ctime);
+ fattr->du.nfs3.used = stat.blocks << 9;
+}
+
+static void
+nfs_local_write_done(struct nfs_local_kiocb *iocb, long status)
+{
+ struct nfs_pgio_header *hdr = iocb->hdr;
+ struct inode *inode = hdr->inode;
+
+ dprintk("%s: wrote %ld bytes.\n", __func__, status > 0 ? status : 0);
+
+ /* Handle short writes as if they are ENOSPC */
+ if (status > 0 && status < hdr->args.count) {
+ hdr->mds_offset += status;
+ hdr->args.offset += status;
+ hdr->args.pgbase += status;
+ hdr->args.count -= status;
+ nfs_set_pgio_error(hdr, -ENOSPC, hdr->args.offset);
+ status = -ENOSPC;
+ }
+ if (status < 0)
+ nfs_reset_boot_verifier(inode);
+
+ nfs_local_pgio_done(hdr, status);
+}
+
+static void nfs_local_write_aio_complete_work(struct work_struct *work)
+{
+ struct nfs_local_kiocb *iocb =
+ container_of(work, struct nfs_local_kiocb, work);
+
+ nfs_local_vfs_getattr(iocb);
+ nfs_local_pgio_release(iocb);
+}
+
+static void nfs_local_write_aio_complete(struct kiocb *kiocb, long ret)
+{
+ struct nfs_local_kiocb *iocb =
+ container_of(kiocb, struct nfs_local_kiocb, kiocb);
+
+ nfs_local_write_done(iocb, ret);
+ nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_write_aio_complete_work */
+}
+
+static void nfs_local_call_write(struct work_struct *work)
+{
+ struct nfs_local_kiocb *iocb =
+ container_of(work, struct nfs_local_kiocb, work);
+ struct file *filp = iocb->kiocb.ki_filp;
+ unsigned long old_flags = current->flags;
+ const struct cred *save_cred;
+ struct iov_iter iter;
+ ssize_t status;
+
+ current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO;
+ save_cred = override_creds(filp->f_cred);
+
+ nfs_local_iter_init(&iter, iocb, WRITE);
+
+ file_start_write(filp);
+ status = filp->f_op->write_iter(&iocb->kiocb, &iter);
+ file_end_write(filp);
+ if (status != -EIOCBQUEUED) {
+ nfs_local_write_done(iocb, status);
+ nfs_local_vfs_getattr(iocb);
+ nfs_local_pgio_release(iocb);
+ }
+
+ revert_creds(save_cred);
+ current->flags = old_flags;
+}
+
+static int
+nfs_do_local_write(struct nfs_pgio_header *hdr,
+ struct nfsd_file *localio,
+ const struct rpc_call_ops *call_ops)
+{
+ struct nfs_local_kiocb *iocb;
+ struct file *file = nfs_to->nfsd_file_file(localio);
+
+ /* Don't support filesystems without write_iter */
+ if (!file->f_op->write_iter)
+ return -EAGAIN;
+
+ dprintk("%s: vfs_write count=%u pos=%llu %s\n",
+ __func__, hdr->args.count, hdr->args.offset,
+ (hdr->args.stable == NFS_UNSTABLE) ? "unstable" : "stable");
+
+ iocb = nfs_local_iocb_alloc(hdr, file, GFP_NOIO);
+ if (iocb == NULL)
+ return -ENOMEM;
+ iocb->localio = localio;
+
+ switch (hdr->args.stable) {
+ default:
+ break;
+ case NFS_DATA_SYNC:
+ iocb->kiocb.ki_flags |= IOCB_DSYNC;
+ break;
+ case NFS_FILE_SYNC:
+ iocb->kiocb.ki_flags |= IOCB_DSYNC|IOCB_SYNC;
+ }
+
+ nfs_local_pgio_init(hdr, call_ops);
+
+ nfs_set_local_verifier(hdr->inode, hdr->res.verf, hdr->args.stable);
+
+ if (iocb->kiocb.ki_flags & IOCB_DIRECT) {
+ iocb->kiocb.ki_complete = nfs_local_write_aio_complete;
+ iocb->aio_complete_work = nfs_local_write_aio_complete_work;
+ }
+
+ INIT_WORK(&iocb->work, nfs_local_call_write);
+ queue_work(nfslocaliod_workqueue, &iocb->work);
+
+ return 0;
+}
+
+int nfs_local_doio(struct nfs_client *clp, struct nfsd_file *localio,
+ struct nfs_pgio_header *hdr,
+ const struct rpc_call_ops *call_ops)
+{
+ int status = 0;
+
+ if (!hdr->args.count)
+ return 0;
+
+ switch (hdr->rw_mode) {
+ case FMODE_READ:
+ status = nfs_do_local_read(hdr, localio, call_ops);
+ break;
+ case FMODE_WRITE:
+ status = nfs_do_local_write(hdr, localio, call_ops);
+ break;
+ default:
+ dprintk("%s: invalid mode: %d\n", __func__,
+ hdr->rw_mode);
+ status = -EINVAL;
+ }
+
+ if (status != 0) {
+ if (status == -EAGAIN)
+ nfs_localio_disable_client(clp);
+ nfs_local_file_put(localio);
+ hdr->task.tk_status = status;
+ nfs_local_hdr_release(hdr, call_ops);
+ }
+ return status;
+}
+
+static void
+nfs_local_init_commit(struct nfs_commit_data *data,
+ const struct rpc_call_ops *call_ops)
+{
+ data->task.tk_ops = call_ops;
+}
+
+static int
+nfs_local_run_commit(struct file *filp, struct nfs_commit_data *data)
+{
+ loff_t start = data->args.offset;
+ loff_t end = LLONG_MAX;
+
+ if (data->args.count > 0) {
+ end = start + data->args.count - 1;
+ if (end < start)
+ end = LLONG_MAX;
+ }
+
+ dprintk("%s: commit %llu - %llu\n", __func__, start, end);
+ return vfs_fsync_range(filp, start, end, 0);
+}
+
+static void
+nfs_local_commit_done(struct nfs_commit_data *data, int status)
+{
+ if (status >= 0) {
+ nfs_set_local_verifier(data->inode,
+ data->res.verf,
+ NFS_FILE_SYNC);
+ data->res.op_status = NFS4_OK;
+ data->task.tk_status = 0;
+ } else {
+ nfs_reset_boot_verifier(data->inode);
+ data->res.op_status = nfs_localio_errno_to_nfs4_stat(status);
+ data->task.tk_status = status;
+ }
+}
+
+static void
+nfs_local_release_commit_data(struct nfsd_file *localio,
+ struct nfs_commit_data *data,
+ const struct rpc_call_ops *call_ops)
+{
+ nfs_local_file_put(localio);
+ call_ops->rpc_call_done(&data->task, data);
+ call_ops->rpc_release(data);
+}
+
+static void
+nfs_local_fsync_ctx_free(struct nfs_local_fsync_ctx *ctx)
+{
+ nfs_local_release_commit_data(ctx->localio, ctx->data,
+ ctx->data->task.tk_ops);
+ kfree(ctx);
+}
+
+static void
+nfs_local_fsync_work(struct work_struct *work)
+{
+ struct nfs_local_fsync_ctx *ctx;
+ int status;
+
+ ctx = container_of(work, struct nfs_local_fsync_ctx, work);
+
+ status = nfs_local_run_commit(nfs_to->nfsd_file_file(ctx->localio),
+ ctx->data);
+ nfs_local_commit_done(ctx->data, status);
+ if (ctx->done != NULL)
+ complete(ctx->done);
+ nfs_local_fsync_ctx_free(ctx);
+}
+
+static struct nfs_local_fsync_ctx *
+nfs_local_fsync_ctx_alloc(struct nfs_commit_data *data,
+ struct nfsd_file *localio, gfp_t flags)
+{
+ struct nfs_local_fsync_ctx *ctx = kmalloc(sizeof(*ctx), flags);
+
+ if (ctx != NULL) {
+ ctx->localio = localio;
+ ctx->data = data;
+ INIT_WORK(&ctx->work, nfs_local_fsync_work);
+ ctx->done = NULL;
+ }
+ return ctx;
+}
+
+int nfs_local_commit(struct nfsd_file *localio,
+ struct nfs_commit_data *data,
+ const struct rpc_call_ops *call_ops, int how)
+{
+ struct nfs_local_fsync_ctx *ctx;
+
+ ctx = nfs_local_fsync_ctx_alloc(data, localio, GFP_KERNEL);
+ if (!ctx) {
+ nfs_local_commit_done(data, -ENOMEM);
+ nfs_local_release_commit_data(localio, data, call_ops);
+ return -ENOMEM;
+ }
+
+ nfs_local_init_commit(data, call_ops);
+
+ if (how & FLUSH_SYNC) {
+ DECLARE_COMPLETION_ONSTACK(done);
+ ctx->done = &done;
+ queue_work(nfsiod_workqueue, &ctx->work);
+ wait_for_completion(&done);
+ } else
+ queue_work(nfsiod_workqueue, &ctx->work);
+
+ return 0;
+}
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 68e76b626371..57c9dd700b58 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -128,11 +128,6 @@ struct mountres {
rpc_authflavor_t *auth_flavors;
};
-struct mnt_fhstatus {
- u32 status;
- struct nfs_fh *fh;
-};
-
/**
* nfs_mount - Obtain an NFS file handle for the given host and path
* @info: pointer to mount request arguments
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index e7494cdd957e..973aed9cc5fe 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -182,7 +182,7 @@ struct vfsmount *nfs_d_automount(struct path *path)
ctx->version = client->rpc_ops->version;
ctx->minorversion = client->cl_minorversion;
ctx->nfs_mod = client->cl_nfs_mod;
- __module_get(ctx->nfs_mod->owner);
+ get_nfs_version(ctx->nfs_mod);
ret = client->rpc_ops->submount(fc, server);
if (ret < 0) {
@@ -308,7 +308,7 @@ int nfs_submount(struct fs_context *fc, struct nfs_server *server)
int err;
/* Look it up again to get its attributes */
- err = server->nfs_client->rpc_ops->lookup(d_inode(parent), dentry,
+ err = server->nfs_client->rpc_ops->lookup(d_inode(parent), dentry, &dentry->d_name,
ctx->mntfh, ctx->clone_data.fattr);
dput(parent);
if (err != 0)
diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h
index c8374f74dce1..a68b21603ea9 100644
--- a/fs/nfs/netns.h
+++ b/fs/nfs/netns.h
@@ -9,6 +9,7 @@
#include <linux/nfs4.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
+#include <linux/sunrpc/stats.h>
struct bl_dev_msg {
int32_t status;
@@ -34,6 +35,7 @@ struct nfs_net {
struct nfs_netns_client *nfs_client;
spinlock_t nfs_client_lock;
ktime_t boot_time;
+ struct rpc_stat rpcstats;
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *proc_nfsfs;
#endif
diff --git a/fs/nfs/nfs.h b/fs/nfs/nfs.h
index 0d3ce0460e35..8a5f51be013a 100644
--- a/fs/nfs/nfs.h
+++ b/fs/nfs/nfs.h
@@ -19,10 +19,10 @@ struct nfs_subversion {
const struct nfs_rpc_ops *rpc_ops; /* NFS operations */
const struct super_operations *sops; /* NFS Super operations */
const struct xattr_handler * const *xattr; /* NFS xattr handlers */
- struct list_head list; /* List of NFS versions */
};
-struct nfs_subversion *get_nfs_version(unsigned int);
+struct nfs_subversion *find_nfs_version(unsigned int);
+int get_nfs_version(struct nfs_subversion *);
void put_nfs_version(struct nfs_subversion *);
void register_nfs_version(struct nfs_subversion *);
void unregister_nfs_version(struct nfs_subversion *);
diff --git a/fs/nfs/nfs2super.c b/fs/nfs/nfs2super.c
index 467f21ee6a35..b1badc70bd71 100644
--- a/fs/nfs/nfs2super.c
+++ b/fs/nfs/nfs2super.c
@@ -26,6 +26,7 @@ static void __exit exit_nfs_v2(void)
unregister_nfs_version(&nfs_v2);
}
+MODULE_DESCRIPTION("NFSv2 client support");
MODULE_LICENSE("GPL");
module_init(init_nfs_v2);
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index c19093814296..6e75c6c2d234 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -22,14 +22,12 @@
#include <linux/nfs.h>
#include <linux/nfs2.h>
#include <linux/nfs_fs.h>
+#include <linux/nfs_common.h>
#include "nfstrace.h"
#include "internal.h"
#define NFSDBG_FACILITY NFSDBG_XDR
-/* Mapping from NFS error code to "errno" error code. */
-#define errno_NFSERR_IO EIO
-
/*
* Declare the space requirements for NFS arguments and replies as
* number of 32bit-words
@@ -64,8 +62,6 @@
#define NFS_readdirres_sz (1+NFS_pagepad_sz)
#define NFS_statfsres_sz (1+NFS_info_sz)
-static int nfs_stat_to_errno(enum nfs_stat);
-
/*
* Encode/decode NFSv2 basic data types
*
@@ -1054,70 +1050,6 @@ out_default:
return nfs_stat_to_errno(status);
}
-
-/*
- * We need to translate between nfs status return values and
- * the local errno values which may not be the same.
- */
-static const struct {
- int stat;
- int errno;
-} nfs_errtbl[] = {
- { NFS_OK, 0 },
- { NFSERR_PERM, -EPERM },
- { NFSERR_NOENT, -ENOENT },
- { NFSERR_IO, -errno_NFSERR_IO},
- { NFSERR_NXIO, -ENXIO },
-/* { NFSERR_EAGAIN, -EAGAIN }, */
- { NFSERR_ACCES, -EACCES },
- { NFSERR_EXIST, -EEXIST },
- { NFSERR_XDEV, -EXDEV },
- { NFSERR_NODEV, -ENODEV },
- { NFSERR_NOTDIR, -ENOTDIR },
- { NFSERR_ISDIR, -EISDIR },
- { NFSERR_INVAL, -EINVAL },
- { NFSERR_FBIG, -EFBIG },
- { NFSERR_NOSPC, -ENOSPC },
- { NFSERR_ROFS, -EROFS },
- { NFSERR_MLINK, -EMLINK },
- { NFSERR_NAMETOOLONG, -ENAMETOOLONG },
- { NFSERR_NOTEMPTY, -ENOTEMPTY },
- { NFSERR_DQUOT, -EDQUOT },
- { NFSERR_STALE, -ESTALE },
- { NFSERR_REMOTE, -EREMOTE },
-#ifdef EWFLUSH
- { NFSERR_WFLUSH, -EWFLUSH },
-#endif
- { NFSERR_BADHANDLE, -EBADHANDLE },
- { NFSERR_NOT_SYNC, -ENOTSYNC },
- { NFSERR_BAD_COOKIE, -EBADCOOKIE },
- { NFSERR_NOTSUPP, -ENOTSUPP },
- { NFSERR_TOOSMALL, -ETOOSMALL },
- { NFSERR_SERVERFAULT, -EREMOTEIO },
- { NFSERR_BADTYPE, -EBADTYPE },
- { NFSERR_JUKEBOX, -EJUKEBOX },
- { -1, -EIO }
-};
-
-/**
- * nfs_stat_to_errno - convert an NFS status code to a local errno
- * @status: NFS status code to convert
- *
- * Returns a local errno value, or -EIO if the NFS status code is
- * not recognized. This function is used jointly by NFSv2 and NFSv3.
- */
-static int nfs_stat_to_errno(enum nfs_stat status)
-{
- int i;
-
- for (i = 0; nfs_errtbl[i].stat != -1; i++) {
- if (nfs_errtbl[i].stat == (int)status)
- return nfs_errtbl[i].errno;
- }
- dprintk("NFS: Unrecognized nfs status value: %u\n", status);
- return nfs_errtbl[i].errno;
-}
-
#define PROC(proc, argtype, restype, timer) \
[NFSPROC_##proc] = { \
.p_proc = NFSPROC_##proc, \
diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c
index 674c012868b1..b0c8a39c2bbd 100644
--- a/fs/nfs/nfs3client.c
+++ b/fs/nfs/nfs3client.c
@@ -111,6 +111,7 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv,
cl_init.hostname = buf;
switch (ds_proto) {
+ case XPRT_TRANSPORT_RDMA:
case XPRT_TRANSPORT_TCP:
case XPRT_TRANSPORT_TCP_TLS:
if (mds_clp->cl_nconnect > 1)
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 2de66e4e8280..0c3bc98cd999 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -192,7 +192,7 @@ __nfs3_proc_lookup(struct inode *dir, const char *name, size_t len,
}
static int
-nfs3_proc_lookup(struct inode *dir, struct dentry *dentry,
+nfs3_proc_lookup(struct inode *dir, struct dentry *dentry, const struct qstr *name,
struct nfs_fh *fhandle, struct nfs_fattr *fattr)
{
unsigned short task_flags = 0;
@@ -202,8 +202,7 @@ nfs3_proc_lookup(struct inode *dir, struct dentry *dentry,
task_flags |= RPC_TASK_TIMEOUT;
dprintk("NFS call lookup %pd2\n", dentry);
- return __nfs3_proc_lookup(dir, dentry->d_name.name,
- dentry->d_name.len, fhandle, fattr,
+ return __nfs3_proc_lookup(dir, name->name, name->len, fhandle, fattr,
task_flags);
}
@@ -844,6 +843,41 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
return status;
}
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+
+static unsigned nfs3_localio_probe_throttle __read_mostly = 0;
+module_param(nfs3_localio_probe_throttle, uint, 0644);
+MODULE_PARM_DESC(nfs3_localio_probe_throttle,
+ "Probe for NFSv3 LOCALIO every N IO requests. Must be power-of-2, defaults to 0 (probing disabled).");
+
+static void nfs3_localio_probe(struct nfs_server *server)
+{
+ struct nfs_client *clp = server->nfs_client;
+
+ /* Throttled to reduce nfs_local_probe_async() frequency */
+ if (!nfs3_localio_probe_throttle || nfs_server_is_local(clp))
+ return;
+
+ /*
+ * Try (re)enabling LOCALIO if isn't enabled -- admin deems
+ * it worthwhile to periodically check if LOCALIO possible by
+ * setting the 'nfs3_localio_probe_throttle' module parameter.
+ *
+ * This is useful if LOCALIO was previously enabled, but was
+ * disabled due to server restart, and IO has successfully
+ * completed in terms of normal RPC.
+ */
+ if ((clp->cl_uuid.nfs3_localio_probe_count++ &
+ (nfs3_localio_probe_throttle - 1)) == 0) {
+ if (!nfs_server_is_local(clp))
+ nfs_local_probe_async(clp);
+ }
+}
+
+#else
+static void nfs3_localio_probe(struct nfs_server *server) {}
+#endif
+
static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
{
struct inode *inode = hdr->inode;
@@ -855,8 +889,11 @@ static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
if (nfs3_async_handle_jukebox(task, inode))
return -EAGAIN;
- if (task->tk_status >= 0 && !server->read_hdrsize)
- cmpxchg(&server->read_hdrsize, 0, hdr->res.replen);
+ if (task->tk_status >= 0) {
+ if (!server->read_hdrsize)
+ cmpxchg(&server->read_hdrsize, 0, hdr->res.replen);
+ nfs3_localio_probe(server);
+ }
nfs_invalidate_atime(inode);
nfs_refresh_inode(inode, &hdr->fattr);
@@ -886,8 +923,10 @@ static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
if (nfs3_async_handle_jukebox(task, inode))
return -EAGAIN;
- if (task->tk_status >= 0)
+ if (task->tk_status >= 0) {
nfs_writeback_update_inode(hdr);
+ nfs3_localio_probe(NFS_SERVER(inode));
+ }
return 0;
}
@@ -963,7 +1002,7 @@ nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
struct nfs_open_context *ctx = nfs_file_open_context(filp);
int status;
- if (fl->fl_flags & FL_CLOSE) {
+ if (fl->c.flc_flags & FL_CLOSE) {
l_ctx = nfs_get_lock_context(ctx);
if (IS_ERR(l_ctx))
l_ctx = NULL;
@@ -979,13 +1018,21 @@ nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
return status;
}
-static int nfs3_have_delegation(struct inode *inode, fmode_t flags)
+static int nfs3_have_delegation(struct inode *inode, fmode_t type, int flags)
+{
+ return 0;
+}
+
+static int nfs3_return_delegation(struct inode *inode)
{
+ if (S_ISREG(inode->i_mode))
+ nfs_wb_all(inode);
return 0;
}
static const struct inode_operations nfs3_dir_inode_operations = {
.create = nfs_create,
+ .atomic_open = nfs_atomic_open_v23,
.lookup = nfs_lookup,
.link = nfs_link,
.unlink = nfs_unlink,
@@ -1061,6 +1108,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
.clear_acl_cache = forget_all_cached_acls,
.close_context = nfs_close_context,
.have_delegation = nfs3_have_delegation,
+ .return_delegation = nfs3_return_delegation,
.alloc_client = nfs_alloc_client,
.init_client = nfs_init_client,
.free_client = nfs_free_client,
diff --git a/fs/nfs/nfs3super.c b/fs/nfs/nfs3super.c
index 8a9be9e47f76..20a80478449e 100644
--- a/fs/nfs/nfs3super.c
+++ b/fs/nfs/nfs3super.c
@@ -27,6 +27,7 @@ static void __exit exit_nfs_v3(void)
unregister_nfs_version(&nfs_v3);
}
+MODULE_DESCRIPTION("NFSv3 client support");
MODULE_LICENSE("GPL");
module_init(init_nfs_v3);
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 60f032be805a..4ae01c10b7e2 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -21,14 +21,13 @@
#include <linux/nfs3.h>
#include <linux/nfs_fs.h>
#include <linux/nfsacl.h>
+#include <linux/nfs_common.h>
+
#include "nfstrace.h"
#include "internal.h"
#define NFSDBG_FACILITY NFSDBG_XDR
-/* Mapping from NFS error code to "errno" error code. */
-#define errno_NFSERR_IO EIO
-
/*
* Declare the space requirements for NFS arguments and replies as
* number of 32bit-words
@@ -91,8 +90,6 @@
NFS3_pagepad_sz)
#define ACL3_setaclres_sz (1+NFS3_post_op_attr_sz)
-static int nfs3_stat_to_errno(enum nfs_stat);
-
/*
* Map file type to S_IFMT bits
*/
@@ -1406,7 +1403,7 @@ static int nfs3_xdr_dec_getattr3res(struct rpc_rqst *req,
out:
return error;
out_default:
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
/*
@@ -1445,7 +1442,7 @@ static int nfs3_xdr_dec_setattr3res(struct rpc_rqst *req,
out:
return error;
out_status:
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
/*
@@ -1495,7 +1492,7 @@ out_default:
error = decode_post_op_attr(xdr, result->dir_attr, userns);
if (unlikely(error))
goto out;
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
/*
@@ -1537,7 +1534,7 @@ static int nfs3_xdr_dec_access3res(struct rpc_rqst *req,
out:
return error;
out_default:
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
/*
@@ -1578,7 +1575,7 @@ static int nfs3_xdr_dec_readlink3res(struct rpc_rqst *req,
out:
return error;
out_default:
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
/*
@@ -1658,7 +1655,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
out:
return error;
out_status:
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
/*
@@ -1728,7 +1725,7 @@ static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr,
out:
return error;
out_status:
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
/*
@@ -1795,7 +1792,7 @@ out_default:
error = decode_wcc_data(xdr, result->dir_attr, userns);
if (unlikely(error))
goto out;
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
/*
@@ -1835,7 +1832,7 @@ static int nfs3_xdr_dec_remove3res(struct rpc_rqst *req,
out:
return error;
out_status:
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
/*
@@ -1881,7 +1878,7 @@ static int nfs3_xdr_dec_rename3res(struct rpc_rqst *req,
out:
return error;
out_status:
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
/*
@@ -1926,7 +1923,7 @@ static int nfs3_xdr_dec_link3res(struct rpc_rqst *req, struct xdr_stream *xdr,
out:
return error;
out_status:
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
/**
@@ -2101,7 +2098,7 @@ out_default:
error = decode_post_op_attr(xdr, result->dir_attr, rpc_rqst_userns(req));
if (unlikely(error))
goto out;
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
/*
@@ -2167,7 +2164,7 @@ static int nfs3_xdr_dec_fsstat3res(struct rpc_rqst *req,
out:
return error;
out_status:
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
/*
@@ -2243,7 +2240,7 @@ static int nfs3_xdr_dec_fsinfo3res(struct rpc_rqst *req,
out:
return error;
out_status:
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
/*
@@ -2304,7 +2301,7 @@ static int nfs3_xdr_dec_pathconf3res(struct rpc_rqst *req,
out:
return error;
out_status:
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
/*
@@ -2350,7 +2347,7 @@ static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req,
out:
return error;
out_status:
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
#ifdef CONFIG_NFS_V3_ACL
@@ -2416,7 +2413,7 @@ static int nfs3_xdr_dec_getacl3res(struct rpc_rqst *req,
out:
return error;
out_default:
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req,
@@ -2435,76 +2432,11 @@ static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req,
out:
return error;
out_default:
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
#endif /* CONFIG_NFS_V3_ACL */
-
-/*
- * We need to translate between nfs status return values and
- * the local errno values which may not be the same.
- */
-static const struct {
- int stat;
- int errno;
-} nfs_errtbl[] = {
- { NFS_OK, 0 },
- { NFSERR_PERM, -EPERM },
- { NFSERR_NOENT, -ENOENT },
- { NFSERR_IO, -errno_NFSERR_IO},
- { NFSERR_NXIO, -ENXIO },
-/* { NFSERR_EAGAIN, -EAGAIN }, */
- { NFSERR_ACCES, -EACCES },
- { NFSERR_EXIST, -EEXIST },
- { NFSERR_XDEV, -EXDEV },
- { NFSERR_NODEV, -ENODEV },
- { NFSERR_NOTDIR, -ENOTDIR },
- { NFSERR_ISDIR, -EISDIR },
- { NFSERR_INVAL, -EINVAL },
- { NFSERR_FBIG, -EFBIG },
- { NFSERR_NOSPC, -ENOSPC },
- { NFSERR_ROFS, -EROFS },
- { NFSERR_MLINK, -EMLINK },
- { NFSERR_NAMETOOLONG, -ENAMETOOLONG },
- { NFSERR_NOTEMPTY, -ENOTEMPTY },
- { NFSERR_DQUOT, -EDQUOT },
- { NFSERR_STALE, -ESTALE },
- { NFSERR_REMOTE, -EREMOTE },
-#ifdef EWFLUSH
- { NFSERR_WFLUSH, -EWFLUSH },
-#endif
- { NFSERR_BADHANDLE, -EBADHANDLE },
- { NFSERR_NOT_SYNC, -ENOTSYNC },
- { NFSERR_BAD_COOKIE, -EBADCOOKIE },
- { NFSERR_NOTSUPP, -ENOTSUPP },
- { NFSERR_TOOSMALL, -ETOOSMALL },
- { NFSERR_SERVERFAULT, -EREMOTEIO },
- { NFSERR_BADTYPE, -EBADTYPE },
- { NFSERR_JUKEBOX, -EJUKEBOX },
- { -1, -EIO }
-};
-
-/**
- * nfs3_stat_to_errno - convert an NFS status code to a local errno
- * @status: NFS status code to convert
- *
- * Returns a local errno value, or -EIO if the NFS status code is
- * not recognized. This function is used jointly by NFSv2 and NFSv3.
- */
-static int nfs3_stat_to_errno(enum nfs_stat status)
-{
- int i;
-
- for (i = 0; nfs_errtbl[i].stat != -1; i++) {
- if (nfs_errtbl[i].stat == (int)status)
- return nfs_errtbl[i].errno;
- }
- dprintk("NFS: Unrecognized nfs status value: %u\n", status);
- return nfs_errtbl[i].errno;
-}
-
-
#define PROC(proc, argtype, restype, timer) \
[NFS3PROC_##proc] = { \
.p_proc = NFS3PROC_##proc, \
diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h
index b59876b01a1e..0282d93c8bcc 100644
--- a/fs/nfs/nfs42.h
+++ b/fs/nfs/nfs42.h
@@ -55,11 +55,14 @@ int nfs42_proc_removexattr(struct inode *inode, const char *name);
* They would be 7 bytes long in the eventual buffer ("user.x\0"), and
* 8 bytes long XDR-encoded.
*
- * Include the trailing eof word as well.
+ * Include the trailing eof word as well and make the result a multiple
+ * of 4 bytes.
*/
static inline u32 nfs42_listxattr_xdrsize(u32 buflen)
{
- return ((buflen / (XATTR_USER_PREFIX_LEN + 2)) * 8) + 4;
+ u32 size = 8 * buflen / (XATTR_USER_PREFIX_LEN + 2) + 4;
+
+ return (size + 3) & ~3;
}
#endif /* CONFIG_NFS_V4_2 */
#endif /* __LINUX_FS_NFS_NFS4_2_H */
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 28704f924612..1924c4a2077b 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -218,7 +218,7 @@ static int handle_async_copy(struct nfs42_copy_res *res,
if (dst_server != src_server) {
spin_lock(&src_server->nfs_client->cl_lock);
- list_add_tail(&copy->src_copies, &src_server->ss_copies);
+ list_add_tail(&copy->src_copies, &src_server->ss_src_copies);
spin_unlock(&src_server->nfs_client->cl_lock);
}
@@ -498,15 +498,15 @@ out_put_src_lock:
return err;
}
-struct nfs42_offloadcancel_data {
+struct nfs42_offload_data {
struct nfs_server *seq_server;
struct nfs42_offload_status_args args;
struct nfs42_offload_status_res res;
};
-static void nfs42_offload_cancel_prepare(struct rpc_task *task, void *calldata)
+static void nfs42_offload_prepare(struct rpc_task *task, void *calldata)
{
- struct nfs42_offloadcancel_data *data = calldata;
+ struct nfs42_offload_data *data = calldata;
nfs4_setup_sequence(data->seq_server->nfs_client,
&data->args.osa_seq_args,
@@ -515,7 +515,7 @@ static void nfs42_offload_cancel_prepare(struct rpc_task *task, void *calldata)
static void nfs42_offload_cancel_done(struct rpc_task *task, void *calldata)
{
- struct nfs42_offloadcancel_data *data = calldata;
+ struct nfs42_offload_data *data = calldata;
trace_nfs4_offload_cancel(&data->args, task->tk_status);
nfs41_sequence_done(task, &data->res.osr_seq_res);
@@ -525,22 +525,22 @@ static void nfs42_offload_cancel_done(struct rpc_task *task, void *calldata)
rpc_restart_call_prepare(task);
}
-static void nfs42_free_offloadcancel_data(void *data)
+static void nfs42_offload_release(void *data)
{
kfree(data);
}
static const struct rpc_call_ops nfs42_offload_cancel_ops = {
- .rpc_call_prepare = nfs42_offload_cancel_prepare,
+ .rpc_call_prepare = nfs42_offload_prepare,
.rpc_call_done = nfs42_offload_cancel_done,
- .rpc_release = nfs42_free_offloadcancel_data,
+ .rpc_release = nfs42_offload_release,
};
static int nfs42_do_offload_cancel_async(struct file *dst,
nfs4_stateid *stateid)
{
struct nfs_server *dst_server = NFS_SERVER(file_inode(dst));
- struct nfs42_offloadcancel_data *data = NULL;
+ struct nfs42_offload_data *data = NULL;
struct nfs_open_context *ctx = nfs_file_open_context(dst);
struct rpc_task *task;
struct rpc_message msg = {
@@ -552,14 +552,14 @@ static int nfs42_do_offload_cancel_async(struct file *dst,
.rpc_message = &msg,
.callback_ops = &nfs42_offload_cancel_ops,
.workqueue = nfsiod_workqueue,
- .flags = RPC_TASK_ASYNC,
+ .flags = RPC_TASK_ASYNC | RPC_TASK_MOVEABLE,
};
int status;
if (!(dst_server->caps & NFS_CAP_OFFLOAD_CANCEL))
return -EOPNOTSUPP;
- data = kzalloc(sizeof(struct nfs42_offloadcancel_data), GFP_KERNEL);
+ data = kzalloc(sizeof(struct nfs42_offload_data), GFP_KERNEL);
if (data == NULL)
return -ENOMEM;
@@ -861,7 +861,7 @@ int nfs42_proc_layoutstats_generic(struct nfs_server *server,
.rpc_message = &msg,
.callback_ops = &nfs42_layoutstat_ops,
.callback_data = data,
- .flags = RPC_TASK_ASYNC,
+ .flags = RPC_TASK_ASYNC | RPC_TASK_MOVEABLE,
};
struct rpc_task *task;
@@ -1016,7 +1016,7 @@ int nfs42_proc_layouterror(struct pnfs_layout_segment *lseg,
struct rpc_task_setup task_setup = {
.rpc_message = &msg,
.callback_ops = &nfs42_layouterror_ops,
- .flags = RPC_TASK_ASYNC,
+ .flags = RPC_TASK_ASYNC | RPC_TASK_MOVEABLE,
};
unsigned int i;
diff --git a/fs/nfs/nfs42xattr.c b/fs/nfs/nfs42xattr.c
index 49aaf28a6950..37d79400e5f4 100644
--- a/fs/nfs/nfs42xattr.c
+++ b/fs/nfs/nfs42xattr.c
@@ -802,7 +802,7 @@ static struct shrinker *nfs4_xattr_large_entry_shrinker;
static enum lru_status
cache_lru_isolate(struct list_head *item,
- struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
+ struct list_lru_one *lru, void *arg)
{
struct list_head *dispose = arg;
struct inode *inode;
@@ -867,7 +867,7 @@ nfs4_xattr_cache_count(struct shrinker *shrink, struct shrink_control *sc)
static enum lru_status
entry_lru_isolate(struct list_head *item,
- struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
+ struct list_lru_one *lru, void *arg)
{
struct list_head *dispose = arg;
struct nfs4_xattr_bucket *bucket;
@@ -1017,7 +1017,7 @@ int __init nfs4_xattr_cache_init(void)
nfs4_xattr_cache_cachep = kmem_cache_create("nfs4_xattr_cache_cache",
sizeof(struct nfs4_xattr_cache), 0,
- (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
+ (SLAB_RECLAIM_ACCOUNT),
nfs4_xattr_cache_init_once);
if (nfs4_xattr_cache_cachep == NULL)
return -ENOMEM;
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index 9e3ae53e2205..5072d7ea72e9 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -144,9 +144,11 @@
decode_putfh_maxsz + \
decode_offload_cancel_maxsz)
#define NFS4_enc_copy_notify_sz (compound_encode_hdr_maxsz + \
+ encode_sequence_maxsz + \
encode_putfh_maxsz + \
encode_copy_notify_maxsz)
#define NFS4_dec_copy_notify_sz (compound_decode_hdr_maxsz + \
+ decode_sequence_maxsz + \
decode_putfh_maxsz + \
decode_copy_notify_maxsz)
#define NFS4_enc_deallocate_sz (compound_encode_hdr_maxsz + \
@@ -549,7 +551,7 @@ static void nfs4_xdr_enc_copy(struct rpc_rqst *req,
}
/*
- * Encode OFFLOAD_CANEL request
+ * Encode OFFLOAD_CANCEL request
*/
static void nfs4_xdr_enc_offload_cancel(struct rpc_rqst *req,
struct xdr_stream *xdr,
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 581698f1b7b2..7d383d29a995 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -67,7 +67,8 @@ struct nfs4_minor_version_ops {
void (*free_lock_state)(struct nfs_server *,
struct nfs4_lock_state *);
int (*test_and_free_expired)(struct nfs_server *,
- nfs4_stateid *, const struct cred *);
+ const nfs4_stateid *,
+ const struct cred *);
struct nfs_seqid *
(*alloc_seqid)(struct nfs_seqid_counter *, gfp_t);
void (*session_trunk)(struct rpc_clnt *clnt,
@@ -82,7 +83,7 @@ struct nfs4_minor_version_ops {
#define NFS_SEQID_CONFIRMED 1
struct nfs_seqid_counter {
ktime_t create_time;
- int owner_id;
+ u64 owner_id;
int flags;
u32 counter;
spinlock_t lock; /* Protects the list */
@@ -120,7 +121,6 @@ struct nfs4_state_owner {
unsigned long so_flags;
struct list_head so_states;
struct nfs_seqid_counter so_seqid;
- seqcount_spinlock_t so_reclaim_seqcount;
struct mutex so_delegreturn_mutex;
};
@@ -330,7 +330,7 @@ extern int update_open_stateid(struct nfs4_state *state,
const nfs4_stateid *deleg_stateid,
fmode_t fmode);
extern int nfs4_proc_setlease(struct file *file, int arg,
- struct file_lock **lease, void **priv);
+ struct file_lease **lease, void **priv);
extern int nfs4_proc_get_lease_time(struct nfs_client *clp,
struct nfs_fsinfo *fsinfo);
extern void nfs4_update_changeattr(struct inode *dir,
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 11e3a285594c..83378f69b35e 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -231,9 +231,8 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init)
__set_bit(NFS_CS_INFINITE_SLOTS, &clp->cl_flags);
__set_bit(NFS_CS_DISCRTRY, &clp->cl_flags);
__set_bit(NFS_CS_NO_RETRANS_TIMEOUT, &clp->cl_flags);
-
- if (test_bit(NFS_CS_DS, &cl_init->init_flags))
- __set_bit(NFS_CS_DS, &clp->cl_flags);
+ if (test_bit(NFS_CS_PNFS, &cl_init->init_flags))
+ __set_bit(NFS_CS_PNFS, &clp->cl_flags);
/*
* Set up the connection to the server before we add add to the
* global list.
@@ -924,6 +923,7 @@ static int nfs4_set_client(struct nfs_server *server,
else
cl_init.max_connect = max_connect;
switch (proto) {
+ case XPRT_TRANSPORT_RDMA:
case XPRT_TRANSPORT_TCP:
case XPRT_TRANSPORT_TCP_TLS:
cl_init.nconnect = nconnect;
@@ -1000,6 +1000,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
cl_init.hostname = buf;
switch (ds_proto) {
+ case XPRT_TRANSPORT_RDMA:
case XPRT_TRANSPORT_TCP:
case XPRT_TRANSPORT_TCP_TLS:
if (mds_clp->cl_nconnect > 1) {
@@ -1011,7 +1012,6 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
if (mds_srv->flags & NFS_MOUNT_NORESVPORT)
__set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
- __set_bit(NFS_CS_DS, &cl_init.init_flags);
__set_bit(NFS_CS_PNFS, &cl_init.init_flags);
cl_init.max_connect = NFS_MAX_TRANSPORTS;
/*
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index e238abc78a13..1cd9652f3c28 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -439,7 +439,7 @@ void nfs42_ssc_unregister_ops(void)
}
#endif /* CONFIG_NFS_V4_2 */
-static int nfs4_setlease(struct file *file, int arg, struct file_lock **lease,
+static int nfs4_setlease(struct file *file, int arg, struct file_lease **lease,
void **priv)
{
return nfs4_proc_setlease(file, arg, lease, priv);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 23819a756508..6e95db6c17e9 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -103,10 +103,10 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
const struct cred *cred,
struct nfs4_slot *slot,
bool is_privileged);
-static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *,
- const struct cred *);
+static int nfs41_test_stateid(struct nfs_server *, const nfs4_stateid *,
+ const struct cred *);
static int nfs41_free_stateid(struct nfs_server *, const nfs4_stateid *,
- const struct cred *, bool);
+ const struct cred *, bool);
#endif
#ifdef CONFIG_NFS_V4_SECURITY_LABEL
@@ -114,6 +114,7 @@ static inline struct nfs4_label *
nfs4_label_init_security(struct inode *dir, struct dentry *dentry,
struct iattr *sattr, struct nfs4_label *label)
{
+ struct lsm_context shim;
int err;
if (label == NULL)
@@ -128,18 +129,26 @@ nfs4_label_init_security(struct inode *dir, struct dentry *dentry,
label->label = NULL;
err = security_dentry_init_security(dentry, sattr->ia_mode,
- &dentry->d_name, NULL,
- (void **)&label->label, &label->len);
- if (err == 0)
- return label;
+ &dentry->d_name, NULL, &shim);
+ if (err)
+ return NULL;
- return NULL;
+ label->lsmid = shim.id;
+ label->label = shim.context;
+ label->len = shim.len;
+ return label;
}
static inline void
nfs4_label_release_security(struct nfs4_label *label)
{
- if (label)
- security_release_secctx(label->label, label->len);
+ struct lsm_context shim;
+
+ if (label) {
+ shim.context = label->label;
+ shim.len = label->len;
+ shim.id = label->lsmid;
+ security_release_secctx(&shim);
+ }
}
static inline u32 *nfs4_bitmask(struct nfs_server *server, struct nfs4_label *label)
{
@@ -293,7 +302,7 @@ static void nfs4_bitmap_copy_adjust(__u32 *dst, const __u32 *src,
unsigned long cache_validity;
memcpy(dst, src, NFS4_BITMASK_SZ*sizeof(*dst));
- if (!inode || !nfs4_have_delegation(inode, FMODE_READ))
+ if (!inode || !nfs_have_read_or_write_delegation(inode))
return;
cache_validity = READ_ONCE(NFS_I(inode)->cache_validity) | flags;
@@ -310,6 +319,18 @@ static void nfs4_bitmap_copy_adjust(__u32 *dst, const __u32 *src,
dst[1] &= ~FATTR4_WORD1_MODE;
if (!(cache_validity & NFS_INO_INVALID_OTHER))
dst[1] &= ~(FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP);
+
+ if (nfs_have_delegated_mtime(inode)) {
+ if (!(cache_validity & NFS_INO_INVALID_ATIME))
+ dst[1] &= ~FATTR4_WORD1_TIME_ACCESS;
+ if (!(cache_validity & NFS_INO_INVALID_MTIME))
+ dst[1] &= ~FATTR4_WORD1_TIME_MODIFY;
+ if (!(cache_validity & NFS_INO_INVALID_CTIME))
+ dst[1] &= ~FATTR4_WORD1_TIME_METADATA;
+ } else if (nfs_have_delegated_atime(inode)) {
+ if (!(cache_validity & NFS_INO_INVALID_ATIME))
+ dst[1] &= ~FATTR4_WORD1_TIME_ACCESS;
+ }
}
static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dentry,
@@ -1245,7 +1266,8 @@ nfs4_update_changeattr_locked(struct inode *inode,
struct nfs_inode *nfsi = NFS_I(inode);
u64 change_attr = inode_peek_iversion_raw(inode);
- cache_validity |= NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME;
+ if (!nfs_have_delegated_mtime(inode))
+ cache_validity |= NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME;
if (S_ISDIR(inode->i_mode))
cache_validity |= NFS_INO_INVALID_DATA;
@@ -1264,7 +1286,7 @@ nfs4_update_changeattr_locked(struct inode *inode,
if (S_ISDIR(inode->i_mode))
nfs_force_lookup_revalidate(inode);
- if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
+ if (!nfs_have_delegated_attributes(inode))
cache_validity |=
NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL |
NFS_INO_INVALID_SIZE | NFS_INO_INVALID_OTHER |
@@ -1320,8 +1342,7 @@ static fmode_t _nfs4_ctx_to_openmode(const struct nfs_open_context *ctx)
}
static u32
-nfs4_map_atomic_open_share(struct nfs_server *server,
- fmode_t fmode, int openflags)
+nfs4_fmode_to_share_access(fmode_t fmode)
{
u32 res = 0;
@@ -1335,11 +1356,27 @@ nfs4_map_atomic_open_share(struct nfs_server *server,
case FMODE_READ|FMODE_WRITE:
res = NFS4_SHARE_ACCESS_BOTH;
}
+ return res;
+}
+
+static u32
+nfs4_map_atomic_open_share(struct nfs_server *server,
+ fmode_t fmode, int openflags)
+{
+ u32 res = nfs4_fmode_to_share_access(fmode);
+
if (!(server->caps & NFS_CAP_ATOMIC_OPEN_V1))
goto out;
/* Want no delegation if we're using O_DIRECT */
- if (openflags & O_DIRECT)
+ if (openflags & O_DIRECT) {
res |= NFS4_SHARE_WANT_NO_DELEG;
+ goto out;
+ }
+ /* res |= NFS4_SHARE_WANT_NO_PREFERENCE; */
+ if (server->caps & NFS_CAP_DELEGTIME)
+ res |= NFS4_SHARE_WANT_DELEG_TIMESTAMPS;
+ if (server->caps & NFS_CAP_OPEN_XOR)
+ res |= NFS4_SHARE_WANT_OPEN_XOR_DELEGATION;
out:
return res;
}
@@ -1954,44 +1991,41 @@ out_return_state:
}
static void
-nfs4_opendata_check_deleg(struct nfs4_opendata *data, struct nfs4_state *state)
-{
- struct nfs_client *clp = NFS_SERVER(state->inode)->nfs_client;
- struct nfs_delegation *delegation;
- int delegation_flags = 0;
-
- rcu_read_lock();
- delegation = rcu_dereference(NFS_I(state->inode)->delegation);
- if (delegation)
- delegation_flags = delegation->flags;
- rcu_read_unlock();
- switch (data->o_arg.claim) {
- default:
+nfs4_process_delegation(struct inode *inode, const struct cred *cred,
+ enum open_claim_type4 claim,
+ const struct nfs4_open_delegation *delegation)
+{
+ switch (delegation->open_delegation_type) {
+ case NFS4_OPEN_DELEGATE_READ:
+ case NFS4_OPEN_DELEGATE_WRITE:
+ case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG:
+ case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG:
break;
+ default:
+ return;
+ }
+ switch (claim) {
case NFS4_OPEN_CLAIM_DELEGATE_CUR:
case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
pr_err_ratelimited("NFS: Broken NFSv4 server %s is "
"returning a delegation for "
"OPEN(CLAIM_DELEGATE_CUR)\n",
- clp->cl_hostname);
- return;
+ NFS_SERVER(inode)->nfs_client->cl_hostname);
+ break;
+ case NFS4_OPEN_CLAIM_PREVIOUS:
+ nfs_inode_reclaim_delegation(inode, cred, delegation->type,
+ &delegation->stateid,
+ delegation->pagemod_limit,
+ delegation->open_delegation_type);
+ break;
+ default:
+ nfs_inode_set_delegation(inode, cred, delegation->type,
+ &delegation->stateid,
+ delegation->pagemod_limit,
+ delegation->open_delegation_type);
}
- if ((delegation_flags & 1UL<<NFS_DELEGATION_NEED_RECLAIM) == 0)
- nfs_inode_set_delegation(state->inode,
- data->owner->so_cred,
- data->o_res.delegation_type,
- &data->o_res.delegation,
- data->o_res.pagemod_limit);
- else
- nfs_inode_reclaim_delegation(state->inode,
- data->owner->so_cred,
- data->o_res.delegation_type,
- &data->o_res.delegation,
- data->o_res.pagemod_limit);
-
- if (data->o_res.do_recall)
- nfs_async_inode_return_delegation(state->inode,
- &data->o_res.delegation);
+ if (delegation->do_recall)
+ nfs_async_inode_return_delegation(inode, &delegation->stateid);
}
/*
@@ -2015,11 +2049,16 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data)
if (ret)
return ERR_PTR(ret);
- if (data->o_res.delegation_type != 0)
- nfs4_opendata_check_deleg(data, state);
+ nfs4_process_delegation(state->inode,
+ data->owner->so_cred,
+ data->o_arg.claim,
+ &data->o_res.delegation);
- if (!update_open_stateid(state, &data->o_res.stateid,
- NULL, data->o_arg.fmode))
+ if (!(data->o_res.rflags & NFS4_OPEN_RESULT_NO_OPEN_STATEID)) {
+ if (!update_open_stateid(state, &data->o_res.stateid,
+ NULL, data->o_arg.fmode))
+ return ERR_PTR(-EAGAIN);
+ } else if (!update_open_stateid(state, NULL, NULL, data->o_arg.fmode))
return ERR_PTR(-EAGAIN);
refcount_inc(&state->count);
@@ -2083,10 +2122,18 @@ _nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data)
if (IS_ERR(state))
goto out;
- if (data->o_res.delegation_type != 0)
- nfs4_opendata_check_deleg(data, state);
- if (!update_open_stateid(state, &data->o_res.stateid,
- NULL, data->o_arg.fmode)) {
+ nfs4_process_delegation(state->inode,
+ data->owner->so_cred,
+ data->o_arg.claim,
+ &data->o_res.delegation);
+
+ if (!(data->o_res.rflags & NFS4_OPEN_RESULT_NO_OPEN_STATEID)) {
+ if (!update_open_stateid(state, &data->o_res.stateid,
+ NULL, data->o_arg.fmode)) {
+ nfs4_put_open_state(state);
+ state = ERR_PTR(-EAGAIN);
+ }
+ } else if (!update_open_stateid(state, NULL, NULL, data->o_arg.fmode)) {
nfs4_put_open_state(state);
state = ERR_PTR(-EAGAIN);
}
@@ -2222,7 +2269,7 @@ static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state
{
struct nfs_delegation *delegation;
struct nfs4_opendata *opendata;
- fmode_t delegation_type = 0;
+ u32 delegation_type = NFS4_OPEN_DELEGATE_NONE;
int status;
opendata = nfs4_open_recoverdata_alloc(ctx, state,
@@ -2231,8 +2278,20 @@ static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state
return PTR_ERR(opendata);
rcu_read_lock();
delegation = rcu_dereference(NFS_I(state->inode)->delegation);
- if (delegation != NULL && test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) != 0)
- delegation_type = delegation->type;
+ if (delegation != NULL && test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) != 0) {
+ switch(delegation->type) {
+ case FMODE_READ:
+ delegation_type = NFS4_OPEN_DELEGATE_READ;
+ if (test_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags))
+ delegation_type = NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG;
+ break;
+ case FMODE_WRITE:
+ case FMODE_READ|FMODE_WRITE:
+ delegation_type = NFS4_OPEN_DELEGATE_WRITE;
+ if (test_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags))
+ delegation_type = NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG;
+ }
+ }
rcu_read_unlock();
opendata->o_arg.u.delegation_type = delegation_type;
status = nfs4_open_recover(opendata, state);
@@ -2553,12 +2612,14 @@ static void nfs4_open_release(void *calldata)
struct nfs4_opendata *data = calldata;
struct nfs4_state *state = NULL;
+ /* In case of error, no cleanup! */
+ if (data->rpc_status != 0 || !data->rpc_done) {
+ nfs_release_seqid(data->o_arg.seqid);
+ goto out_free;
+ }
/* If this request hasn't been cancelled, do nothing */
if (!data->cancelled)
goto out_free;
- /* In case of error, no cleanup! */
- if (data->rpc_status != 0 || !data->rpc_done)
- goto out_free;
/* In case we need an open_confirm, no cleanup! */
if (data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM)
goto out_free;
@@ -2825,16 +2886,16 @@ static int nfs40_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st
}
static int nfs40_test_and_free_expired_stateid(struct nfs_server *server,
- nfs4_stateid *stateid,
- const struct cred *cred)
+ const nfs4_stateid *stateid,
+ const struct cred *cred)
{
return -NFS4ERR_BAD_STATEID;
}
#if defined(CONFIG_NFS_V4_1)
static int nfs41_test_and_free_expired_stateid(struct nfs_server *server,
- nfs4_stateid *stateid,
- const struct cred *cred)
+ const nfs4_stateid *stateid,
+ const struct cred *cred)
{
int status;
@@ -3069,10 +3130,8 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
fmode_t acc_mode = _nfs4_ctx_to_accessmode(ctx);
struct inode *dir = d_inode(opendata->dir);
unsigned long dir_verifier;
- unsigned int seq;
int ret;
- seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
dir_verifier = nfs_save_change_attribute(dir);
ret = _nfs4_proc_open(opendata, ctx);
@@ -3113,7 +3172,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
case NFS4_OPEN_CLAIM_DELEGATE_PREV:
if (!opendata->rpc_done)
break;
- if (opendata->o_res.delegation_type != 0)
+ if (opendata->o_res.delegation.type != 0)
dir_verifier = nfs_save_change_attribute(dir);
nfs_set_verifier(dentry, dir_verifier);
}
@@ -3125,11 +3184,8 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
if (ret != 0)
goto out;
- if (d_inode(dentry) == state->inode) {
+ if (d_inode(dentry) == state->inode)
nfs_inode_attach_open_context(ctx);
- if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
- nfs4_schedule_stateid_recovery(server, state);
- }
out:
if (!opendata->cancelled) {
@@ -3399,13 +3455,18 @@ static int nfs4_do_setattr(struct inode *inode, const struct cred *cred,
.inode = inode,
.stateid = &arg.stateid,
};
- unsigned long adjust_flags = NFS_INO_INVALID_CHANGE;
+ unsigned long adjust_flags = NFS_INO_INVALID_CHANGE |
+ NFS_INO_INVALID_CTIME;
int err;
if (sattr->ia_valid & (ATTR_MODE | ATTR_KILL_SUID | ATTR_KILL_SGID))
adjust_flags |= NFS_INO_INVALID_MODE;
if (sattr->ia_valid & (ATTR_UID | ATTR_GID))
adjust_flags |= NFS_INO_INVALID_OTHER;
+ if (sattr->ia_valid & ATTR_ATIME)
+ adjust_flags |= NFS_INO_INVALID_ATIME;
+ if (sattr->ia_valid & ATTR_MTIME)
+ adjust_flags |= NFS_INO_INVALID_MTIME;
do {
nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, fattr->label),
@@ -3705,7 +3766,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
if (calldata->arg.fmode == 0 || calldata->arg.fmode == FMODE_READ) {
/* Close-to-open cache consistency revalidation */
- if (!nfs4_have_delegation(inode, FMODE_READ)) {
+ if (!nfs4_have_delegation(inode, FMODE_READ, 0)) {
nfs4_bitmask_set(calldata->arg.bitmask_store,
server->cache_consistency_bitmask,
inode, 0);
@@ -3715,8 +3776,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
}
calldata->arg.share_access =
- nfs4_map_atomic_open_share(NFS_SERVER(inode),
- calldata->arg.fmode, 0);
+ nfs4_fmode_to_share_access(calldata->arg.fmode);
if (calldata->res.fattr == NULL)
calldata->arg.bitmask = NULL;
@@ -3847,8 +3907,11 @@ nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx,
static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync)
{
+ struct dentry *dentry = ctx->dentry;
if (ctx->state == NULL)
return;
+ if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
+ nfs4_inode_set_return_delegation_on_close(d_inode(dentry));
if (is_sync)
nfs4_close_sync(ctx->state, _nfs4_ctx_to_openmode(ctx));
else
@@ -3857,11 +3920,26 @@ static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync)
#define FATTR4_WORD1_NFS40_MASK (2*FATTR4_WORD1_MOUNTED_ON_FILEID - 1UL)
#define FATTR4_WORD2_NFS41_MASK (2*FATTR4_WORD2_SUPPATTR_EXCLCREAT - 1UL)
-#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_XATTR_SUPPORT - 1UL)
+#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_OPEN_ARGUMENTS - 1UL)
+
+#define FATTR4_WORD2_NFS42_TIME_DELEG_MASK \
+ (FATTR4_WORD2_TIME_DELEG_MODIFY|FATTR4_WORD2_TIME_DELEG_ACCESS)
+static bool nfs4_server_delegtime_capable(struct nfs4_server_caps_res *res)
+{
+ u32 share_access_want = res->open_caps.oa_share_access_want[0];
+ u32 attr_bitmask = res->attr_bitmask[2];
+
+ return (share_access_want & NFS4_SHARE_WANT_DELEG_TIMESTAMPS) &&
+ ((attr_bitmask & FATTR4_WORD2_NFS42_TIME_DELEG_MASK) ==
+ FATTR4_WORD2_NFS42_TIME_DELEG_MASK);
+}
static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
{
- u32 bitmask[3] = {}, minorversion = server->nfs_client->cl_minorversion;
+ u32 minorversion = server->nfs_client->cl_minorversion;
+ u32 bitmask[3] = {
+ [0] = FATTR4_WORD0_SUPPORTED_ATTRS,
+ };
struct nfs4_server_caps_arg args = {
.fhandle = fhandle,
.bitmask = bitmask,
@@ -3883,10 +3961,19 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
FATTR4_WORD0_CASE_INSENSITIVE |
FATTR4_WORD0_CASE_PRESERVING;
if (minorversion)
- bitmask[2] = FATTR4_WORD2_SUPPATTR_EXCLCREAT;
+ bitmask[2] = FATTR4_WORD2_SUPPATTR_EXCLCREAT |
+ FATTR4_WORD2_OPEN_ARGUMENTS;
status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
if (status == 0) {
+ bitmask[0] = (FATTR4_WORD0_SUPPORTED_ATTRS |
+ FATTR4_WORD0_FH_EXPIRE_TYPE |
+ FATTR4_WORD0_LINK_SUPPORT |
+ FATTR4_WORD0_SYMLINK_SUPPORT |
+ FATTR4_WORD0_ACLSUPPORT |
+ FATTR4_WORD0_CASE_INSENSITIVE |
+ FATTR4_WORD0_CASE_PRESERVING) &
+ res.attr_bitmask[0];
/* Sanity check the server answers */
switch (minorversion) {
case 0:
@@ -3895,9 +3982,14 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
break;
case 1:
res.attr_bitmask[2] &= FATTR4_WORD2_NFS41_MASK;
+ bitmask[2] = FATTR4_WORD2_SUPPATTR_EXCLCREAT &
+ res.attr_bitmask[2];
break;
case 2:
res.attr_bitmask[2] &= FATTR4_WORD2_NFS42_MASK;
+ bitmask[2] = (FATTR4_WORD2_SUPPATTR_EXCLCREAT |
+ FATTR4_WORD2_OPEN_ARGUMENTS) &
+ res.attr_bitmask[2];
}
memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask));
server->caps &= ~(NFS_CAP_ACLS | NFS_CAP_HARDLINKS |
@@ -3944,6 +4036,12 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
sizeof(server->attr_bitmask));
server->attr_bitmask_nl[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
+ if (res.open_caps.oa_share_access_want[0] &
+ NFS4_SHARE_WANT_OPEN_XOR_DELEGATION)
+ server->caps |= NFS_CAP_OPEN_XOR;
+ if (nfs4_server_delegtime_capable(&res))
+ server->caps |= NFS_CAP_DELEGTIME;
+
memcpy(server->cache_consistency_bitmask, res.attr_bitmask, sizeof(server->cache_consistency_bitmask));
server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE;
server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY;
@@ -4028,6 +4126,23 @@ static void test_fs_location_for_trunking(struct nfs4_fs_location *location,
}
}
+static bool _is_same_nfs4_pathname(struct nfs4_pathname *path1,
+ struct nfs4_pathname *path2)
+{
+ int i;
+
+ if (path1->ncomponents != path2->ncomponents)
+ return false;
+ for (i = 0; i < path1->ncomponents; i++) {
+ if (path1->components[i].len != path2->components[i].len)
+ return false;
+ if (memcmp(path1->components[i].data, path2->components[i].data,
+ path1->components[i].len))
+ return false;
+ }
+ return true;
+}
+
static int _nfs4_discover_trunking(struct nfs_server *server,
struct nfs_fh *fhandle)
{
@@ -4061,9 +4176,13 @@ static int _nfs4_discover_trunking(struct nfs_server *server,
if (status)
goto out_free_3;
- for (i = 0; i < locations->nlocations; i++)
+ for (i = 0; i < locations->nlocations; i++) {
+ if (!_is_same_nfs4_pathname(&locations->fs_path,
+ &locations->locations[i].rootpath))
+ continue;
test_fs_location_for_trunking(&locations->locations[i], clp,
server);
+ }
out_free_3:
kfree(locations->fattr);
out_free_2:
@@ -4429,15 +4548,15 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
}
static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir,
- struct dentry *dentry, struct nfs_fh *fhandle,
- struct nfs_fattr *fattr)
+ struct dentry *dentry, const struct qstr *name,
+ struct nfs_fh *fhandle, struct nfs_fattr *fattr)
{
struct nfs_server *server = NFS_SERVER(dir);
int status;
struct nfs4_lookup_arg args = {
.bitmask = server->attr_bitmask,
.dir_fh = NFS_FH(dir),
- .name = &dentry->d_name,
+ .name = name,
};
struct nfs4_lookup_res res = {
.server = server,
@@ -4479,17 +4598,16 @@ static void nfs_fixup_secinfo_attributes(struct nfs_fattr *fattr)
}
static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir,
- struct dentry *dentry, struct nfs_fh *fhandle,
- struct nfs_fattr *fattr)
+ struct dentry *dentry, const struct qstr *name,
+ struct nfs_fh *fhandle, struct nfs_fattr *fattr)
{
struct nfs4_exception exception = {
.interruptible = true,
};
struct rpc_clnt *client = *clnt;
- const struct qstr *name = &dentry->d_name;
int err;
do {
- err = _nfs4_proc_lookup(client, dir, dentry, fhandle, fattr);
+ err = _nfs4_proc_lookup(client, dir, dentry, name, fhandle, fattr);
trace_nfs4_lookup(dir, name, err);
switch (err) {
case -NFS4ERR_BADNAME:
@@ -4524,13 +4642,13 @@ out:
return err;
}
-static int nfs4_proc_lookup(struct inode *dir, struct dentry *dentry,
+static int nfs4_proc_lookup(struct inode *dir, struct dentry *dentry, const struct qstr *name,
struct nfs_fh *fhandle, struct nfs_fattr *fattr)
{
int status;
struct rpc_clnt *client = NFS_CLIENT(dir);
- status = nfs4_proc_lookup_common(&client, dir, dentry, fhandle, fattr);
+ status = nfs4_proc_lookup_common(&client, dir, dentry, name, fhandle, fattr);
if (client != NFS_CLIENT(dir)) {
rpc_shutdown_client(client);
nfs_fixup_secinfo_attributes(fattr);
@@ -4545,7 +4663,8 @@ nfs4_proc_lookup_mountpoint(struct inode *dir, struct dentry *dentry,
struct rpc_clnt *client = NFS_CLIENT(dir);
int status;
- status = nfs4_proc_lookup_common(&client, dir, dentry, fhandle, fattr);
+ status = nfs4_proc_lookup_common(&client, dir, dentry, &dentry->d_name,
+ fhandle, fattr);
if (status < 0)
return ERR_PTR(status);
return (client == NFS_CLIENT(dir)) ? rpc_clone_client(client) : client;
@@ -4622,7 +4741,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry
};
int status = 0;
- if (!nfs4_have_delegation(inode, FMODE_READ)) {
+ if (!nfs4_have_delegation(inode, FMODE_READ, 0)) {
res.fattr = nfs_alloc_fattr();
if (res.fattr == NULL)
return -ENOMEM;
@@ -4940,8 +5059,9 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, const struct
goto out;
nfs4_inode_make_writeable(inode);
- nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, res.fattr->label), inode,
- NFS_INO_INVALID_CHANGE);
+ nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, res.fattr->label),
+ inode,
+ NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME);
status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
if (!status) {
nfs4_update_changeattr(dir, &res.cinfo, res.fattr->time_start,
@@ -5461,7 +5581,7 @@ static bool nfs4_read_plus_not_supported(struct rpc_task *task,
struct rpc_message *msg = &task->tk_msg;
if (msg->rpc_proc == &nfs4_procedures[NFSPROC4_CLNT_READ_PLUS] &&
- server->caps & NFS_CAP_READ_PLUS && task->tk_status == -ENOTSUPP) {
+ task->tk_status == -ENOTSUPP) {
server->caps &= ~NFS_CAP_READ_PLUS;
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
rpc_restart_call_prepare(task);
@@ -5591,7 +5711,7 @@ bool nfs4_write_need_cache_consistency_data(struct nfs_pgio_header *hdr)
/* Otherwise, request attributes if and only if we don't hold
* a delegation
*/
- return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0;
+ return nfs4_have_delegation(hdr->inode, FMODE_READ, 0) == 0;
}
void nfs4_bitmask_set(__u32 bitmask[], const __u32 src[],
@@ -6153,7 +6273,7 @@ static int _nfs4_get_security_label(struct inode *inode, void *buf,
size_t buflen)
{
struct nfs_server *server = NFS_SERVER(inode);
- struct nfs4_label label = {0, 0, buflen, buf};
+ struct nfs4_label label = {0, 0, 0, buflen, buf};
u32 bitmask[3] = { 0, 0, FATTR4_WORD2_SECURITY_LABEL };
struct nfs_fattr fattr = {
@@ -6258,7 +6378,7 @@ static int nfs4_do_set_security_label(struct inode *inode,
static int
nfs4_set_security_label(struct inode *inode, const void *buf, size_t buflen)
{
- struct nfs4_label ilabel = {0, 0, buflen, (char *)buf };
+ struct nfs4_label ilabel = {0, 0, 0, buflen, (char *)buf };
struct nfs_fattr *fattr;
int status;
@@ -6273,6 +6393,7 @@ nfs4_set_security_label(struct inode *inode, const void *buf, size_t buflen)
if (status == 0)
nfs_setsecurity(inode, fattr);
+ nfs_free_fattr(fattr);
return status;
}
#endif /* CONFIG_NFS_V4_SECURITY_LABEL */
@@ -6558,6 +6679,7 @@ struct nfs4_delegreturndata {
u32 roc_barrier;
bool roc;
} lr;
+ struct nfs4_delegattr sattr;
struct nfs_fattr fattr;
int rpc_status;
struct inode *inode;
@@ -6582,6 +6704,30 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
&data->res.lr_ret) == -EAGAIN)
goto out_restart;
+ if (data->args.sattr_args && task->tk_status != 0) {
+ switch(data->res.sattr_ret) {
+ case 0:
+ data->args.sattr_args = NULL;
+ data->res.sattr_res = false;
+ break;
+ case -NFS4ERR_ADMIN_REVOKED:
+ case -NFS4ERR_DELEG_REVOKED:
+ case -NFS4ERR_EXPIRED:
+ case -NFS4ERR_BAD_STATEID:
+ /* Let the main handler below do stateid recovery */
+ break;
+ case -NFS4ERR_OLD_STATEID:
+ if (nfs4_refresh_delegation_stateid(&data->stateid,
+ data->inode))
+ goto out_restart;
+ fallthrough;
+ default:
+ data->args.sattr_args = NULL;
+ data->res.sattr_res = false;
+ goto out_restart;
+ }
+ }
+
switch (task->tk_status) {
case 0:
renew_lease(data->res.server, data->timestamp);
@@ -6675,7 +6821,10 @@ static const struct rpc_call_ops nfs4_delegreturn_ops = {
.rpc_release = nfs4_delegreturn_release,
};
-static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, const nfs4_stateid *stateid, int issync)
+static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred,
+ const nfs4_stateid *stateid,
+ struct nfs_delegation *delegation,
+ int issync)
{
struct nfs4_delegreturndata *data;
struct nfs_server *server = NFS_SERVER(inode);
@@ -6727,12 +6876,27 @@ static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred,
}
}
+ if (delegation &&
+ test_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags)) {
+ if (delegation->type & FMODE_READ) {
+ data->sattr.atime = inode_get_atime(inode);
+ data->sattr.atime_set = true;
+ }
+ if (delegation->type & FMODE_WRITE) {
+ data->sattr.mtime = inode_get_mtime(inode);
+ data->sattr.mtime_set = true;
+ }
+ data->args.sattr_args = &data->sattr;
+ data->res.sattr_res = true;
+ }
+
if (!data->inode)
nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1,
1);
else
nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1,
0);
+
task_setup_data.callback_data = data;
msg.rpc_argp = &data->args;
msg.rpc_resp = &data->res;
@@ -6750,13 +6914,16 @@ out:
return status;
}
-int nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, const nfs4_stateid *stateid, int issync)
+int nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred,
+ const nfs4_stateid *stateid,
+ struct nfs_delegation *delegation, int issync)
{
struct nfs_server *server = NFS_SERVER(inode);
struct nfs4_exception exception = { };
int err;
do {
- err = _nfs4_proc_delegreturn(inode, cred, stateid, issync);
+ err = _nfs4_proc_delegreturn(inode, cred, stateid,
+ delegation, issync);
trace_nfs4_delegreturn(inode, stateid, err);
switch (err) {
case -NFS4ERR_STALE_STATEID:
@@ -6800,7 +6967,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
switch (status) {
case 0:
- request->fl_type = F_UNLCK;
+ request->c.flc_type = F_UNLCK;
break;
case -NFS4ERR_DENIED:
status = 0;
@@ -7018,8 +7185,8 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
/* Ensure this is an unlock - when canceling a lock, the
* canceled lock is passed in, and it won't be an unlock.
*/
- fl->fl_type = F_UNLCK;
- if (fl->fl_flags & FL_CLOSE)
+ fl->c.flc_type = F_UNLCK;
+ if (fl->c.flc_flags & FL_CLOSE)
set_bit(NFS_CONTEXT_UNLOCK, &ctx->flags);
data = nfs4_alloc_unlockdata(fl, ctx, lsp, seqid);
@@ -7045,11 +7212,11 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
struct rpc_task *task;
struct nfs_seqid *(*alloc_seqid)(struct nfs_seqid_counter *, gfp_t);
int status = 0;
- unsigned char fl_flags = request->fl_flags;
+ unsigned char saved_flags = request->c.flc_flags;
status = nfs4_set_lock_state(state, request);
/* Unlock _before_ we do the RPC call */
- request->fl_flags |= FL_EXISTS;
+ request->c.flc_flags |= FL_EXISTS;
/* Exclude nfs_delegation_claim_locks() */
mutex_lock(&sp->so_delegreturn_mutex);
/* Exclude nfs4_reclaim_open_stateid() - note nesting! */
@@ -7073,14 +7240,16 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
status = -ENOMEM;
if (IS_ERR(seqid))
goto out;
- task = nfs4_do_unlck(request, nfs_file_open_context(request->fl_file), lsp, seqid);
+ task = nfs4_do_unlck(request,
+ nfs_file_open_context(request->c.flc_file),
+ lsp, seqid);
status = PTR_ERR(task);
if (IS_ERR(task))
goto out;
status = rpc_wait_for_completion_task(task);
rpc_put_task(task);
out:
- request->fl_flags = fl_flags;
+ request->c.flc_flags = saved_flags;
trace_nfs4_unlock(request, state, F_SETLK, status);
return status;
}
@@ -7191,7 +7360,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
renew_lease(NFS_SERVER(d_inode(data->ctx->dentry)),
data->timestamp);
if (data->arg.new_lock && !data->cancelled) {
- data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS);
+ data->fl.c.flc_flags &= ~(FL_SLEEP | FL_ACCESS);
if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0)
goto out_restart;
}
@@ -7292,7 +7461,8 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
if (nfs_server_capable(state->inode, NFS_CAP_MOVEABLE))
task_setup_data.flags |= RPC_TASK_MOVEABLE;
- data = nfs4_alloc_lockdata(fl, nfs_file_open_context(fl->fl_file),
+ data = nfs4_alloc_lockdata(fl,
+ nfs_file_open_context(fl->c.flc_file),
fl->fl_u.nfs4_fl.owner, GFP_KERNEL);
if (data == NULL)
return -ENOMEM;
@@ -7398,10 +7568,10 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
{
struct nfs_inode *nfsi = NFS_I(state->inode);
struct nfs4_state_owner *sp = state->owner;
- unsigned char fl_flags = request->fl_flags;
+ unsigned char flags = request->c.flc_flags;
int status;
- request->fl_flags |= FL_ACCESS;
+ request->c.flc_flags |= FL_ACCESS;
status = locks_lock_inode_wait(state->inode, request);
if (status < 0)
goto out;
@@ -7410,7 +7580,7 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
if (test_bit(NFS_DELEGATED_STATE, &state->flags)) {
/* Yes: cache locks! */
/* ...but avoid races with delegation recall... */
- request->fl_flags = fl_flags & ~FL_SLEEP;
+ request->c.flc_flags = flags & ~FL_SLEEP;
status = locks_lock_inode_wait(state->inode, request);
up_read(&nfsi->rwsem);
mutex_unlock(&sp->so_delegreturn_mutex);
@@ -7420,7 +7590,7 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
mutex_unlock(&sp->so_delegreturn_mutex);
status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW);
out:
- request->fl_flags = fl_flags;
+ request->c.flc_flags = flags;
return status;
}
@@ -7562,7 +7732,7 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request)
if (!(IS_SETLK(cmd) || IS_SETLKW(cmd)))
return -EINVAL;
- if (request->fl_type == F_UNLCK) {
+ if (lock_is_unlock(request)) {
if (state != NULL)
return nfs4_proc_unlck(state, cmd, request);
return 0;
@@ -7571,7 +7741,7 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request)
if (state == NULL)
return -ENOLCK;
- if ((request->fl_flags & FL_POSIX) &&
+ if ((request->c.flc_flags & FL_POSIX) &&
!test_bit(NFS_STATE_POSIX_LOCKS, &state->flags))
return -ENOLCK;
@@ -7579,7 +7749,7 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request)
* Don't rely on the VFS having checked the file open mode,
* since it won't do this for flock() locks.
*/
- switch (request->fl_type) {
+ switch (request->c.flc_type) {
case F_RDLCK:
if (!(filp->f_mode & FMODE_READ))
return -EBADF;
@@ -7601,7 +7771,7 @@ static int nfs4_delete_lease(struct file *file, void **priv)
return generic_setlease(file, F_UNLCK, NULL, priv);
}
-static int nfs4_add_lease(struct file *file, int arg, struct file_lock **lease,
+static int nfs4_add_lease(struct file *file, int arg, struct file_lease **lease,
void **priv)
{
struct inode *inode = file_inode(file);
@@ -7609,17 +7779,17 @@ static int nfs4_add_lease(struct file *file, int arg, struct file_lock **lease,
int ret;
/* No delegation, no lease */
- if (!nfs4_have_delegation(inode, type))
+ if (!nfs4_have_delegation(inode, type, 0))
return -EAGAIN;
ret = generic_setlease(file, arg, lease, priv);
- if (ret || nfs4_have_delegation(inode, type))
+ if (ret || nfs4_have_delegation(inode, type, 0))
return ret;
/* We raced with a delegation return */
nfs4_delete_lease(file, priv);
return -EAGAIN;
}
-int nfs4_proc_setlease(struct file *file, int arg, struct file_lock **lease,
+int nfs4_proc_setlease(struct file *file, int arg, struct file_lease **lease,
void **priv)
{
switch (arg) {
@@ -8820,7 +8990,7 @@ nfs4_run_exchange_id(struct nfs_client *clp, const struct cred *cred,
#ifdef CONFIG_NFS_V4_1_MIGRATION
calldata->args.flags |= EXCHGID4_FLAG_SUPP_MOVED_MIGR;
#endif
- if (test_bit(NFS_CS_DS, &clp->cl_flags))
+ if (test_bit(NFS_CS_PNFS, &clp->cl_flags))
calldata->args.flags |= EXCHGID4_FLAG_USE_PNFS_DS;
msg.rpc_argp = &calldata->args;
msg.rpc_resp = &calldata->res;
@@ -8970,10 +9140,12 @@ try_again:
return;
status = task->tk_status;
- if (status == 0)
+ if (status == 0) {
status = nfs4_detect_session_trunking(adata->clp,
task->tk_msg.rpc_resp, xprt);
-
+ trace_nfs4_trunked_exchange_id(adata->clp,
+ xprt->address_strings[RPC_DISPLAY_ADDR], status);
+ }
if (status == 0)
rpc_clnt_xprt_switch_add_xprt(clnt, xprt);
else if (status != -NFS4ERR_DELAY && rpc_clnt_xprt_switch_has_addr(clnt,
@@ -9832,6 +10004,11 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
if (!nfs41_sequence_process(task, &lrp->res.seq_res))
return;
+ if (task->tk_rpc_status == -ETIMEDOUT) {
+ lrp->rpc_status = -EAGAIN;
+ lrp->res.lrs_present = 0;
+ return;
+ }
/*
* Was there an RPC level error? Assume the call succeeded,
* and that we need to release the layout
@@ -9851,13 +10028,25 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
fallthrough;
default:
task->tk_status = 0;
+ lrp->res.lrs_present = 0;
fallthrough;
case 0:
break;
+ case -NFS4ERR_BADSESSION:
+ case -NFS4ERR_DEADSESSION:
+ case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
+ nfs4_schedule_session_recovery(server->nfs_client->cl_session,
+ task->tk_status);
+ lrp->res.lrs_present = 0;
+ lrp->rpc_status = -EAGAIN;
+ task->tk_status = 0;
+ break;
case -NFS4ERR_DELAY:
- if (nfs4_async_handle_error(task, server, NULL, NULL) != -EAGAIN)
- break;
- goto out_restart;
+ if (nfs4_async_handle_error(task, server, NULL, NULL) ==
+ -EAGAIN)
+ goto out_restart;
+ lrp->res.lrs_present = 0;
+ break;
}
return;
out_restart:
@@ -9871,8 +10060,13 @@ static void nfs4_layoutreturn_release(void *calldata)
struct nfs4_layoutreturn *lrp = calldata;
struct pnfs_layout_hdr *lo = lrp->args.layout;
- pnfs_layoutreturn_free_lsegs(lo, &lrp->args.stateid, &lrp->args.range,
+ if (lrp->rpc_status == 0 || !lrp->inode)
+ pnfs_layoutreturn_free_lsegs(
+ lo, &lrp->args.stateid, &lrp->args.range,
lrp->res.lrs_present ? &lrp->res.stateid : NULL);
+ else
+ pnfs_layoutreturn_retry_later(lo, &lrp->args.stateid,
+ &lrp->args.range);
nfs4_sequence_free_slot(&lrp->res.seq_res);
if (lrp->ld_private.ops && lrp->ld_private.ops->free)
lrp->ld_private.ops->free(&lrp->ld_private);
@@ -9888,7 +10082,7 @@ static const struct rpc_call_ops nfs4_layoutreturn_call_ops = {
.rpc_release = nfs4_layoutreturn_release,
};
-int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync)
+int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, unsigned int flags)
{
struct rpc_task *task;
struct rpc_message msg = {
@@ -9911,7 +10105,7 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync)
&task_setup_data.rpc_client, &msg);
lrp->inode = nfs_igrab_and_active(lrp->args.inode);
- if (!sync) {
+ if (flags & PNFS_FL_LAYOUTRETURN_ASYNC) {
if (!lrp->inode) {
nfs4_layoutreturn_release(lrp);
return -EAGAIN;
@@ -9919,6 +10113,8 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync)
task_setup_data.flags |= RPC_TASK_ASYNC;
}
if (!lrp->inode)
+ flags |= PNFS_FL_LAYOUTRETURN_PRIVILEGED;
+ if (flags & PNFS_FL_LAYOUTRETURN_PRIVILEGED)
nfs4_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1,
1);
else
@@ -9927,7 +10123,7 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync)
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
- if (sync)
+ if (!(flags & PNFS_FL_LAYOUTRETURN_ASYNC))
status = task->tk_status;
trace_nfs4_layoutreturn(lrp->args.inode, &lrp->args.stateid, status);
dprintk("<-- %s status=%d\n", __func__, status);
@@ -10245,12 +10441,12 @@ out:
}
static int _nfs41_test_stateid(struct nfs_server *server,
- nfs4_stateid *stateid,
- const struct cred *cred)
+ const nfs4_stateid *stateid,
+ const struct cred *cred)
{
int status;
struct nfs41_test_stateid_args args = {
- .stateid = stateid,
+ .stateid = *stateid,
};
struct nfs41_test_stateid_res res;
struct rpc_message msg = {
@@ -10306,8 +10502,8 @@ static void nfs4_handle_delay_or_session_error(struct nfs_server *server,
* failed or the state ID is not currently valid.
*/
static int nfs41_test_stateid(struct nfs_server *server,
- nfs4_stateid *stateid,
- const struct cred *cred)
+ const nfs4_stateid *stateid,
+ const struct cred *cred)
{
struct nfs4_exception exception = {
.interruptible = true,
@@ -10615,29 +10811,33 @@ const struct nfs4_minor_version_ops *nfs_v4_minor_ops[] = {
static ssize_t nfs4_listxattr(struct dentry *dentry, char *list, size_t size)
{
ssize_t error, error2, error3;
+ size_t left = size;
- error = generic_listxattr(dentry, list, size);
+ error = generic_listxattr(dentry, list, left);
if (error < 0)
return error;
if (list) {
list += error;
- size -= error;
+ left -= error;
}
- error2 = nfs4_listxattr_nfs4_label(d_inode(dentry), list, size);
+ error2 = nfs4_listxattr_nfs4_label(d_inode(dentry), list, left);
if (error2 < 0)
return error2;
if (list) {
list += error2;
- size -= error2;
+ left -= error2;
}
- error3 = nfs4_listxattr_nfs4_user(d_inode(dentry), list, size);
+ error3 = nfs4_listxattr_nfs4_user(d_inode(dentry), list, left);
if (error3 < 0)
return error3;
- return error + error2 + error3;
+ error += error2 + error3;
+ if (size && error > size)
+ return -ERANGE;
+ return error;
}
static void nfs4_enable_swap(struct inode *inode)
@@ -10733,6 +10933,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
.close_context = nfs4_close_context,
.open_context = nfs4_atomic_open,
.have_delegation = nfs4_have_delegation,
+ .return_delegation = nfs4_inode_return_delegation,
.alloc_client = nfs4_alloc_client,
.init_client = nfs4_init_client,
.free_client = nfs4_free_client,
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 9a5d911a7edc..542cdf71229f 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -501,11 +501,7 @@ nfs4_alloc_state_owner(struct nfs_server *server,
sp = kzalloc(sizeof(*sp), gfp_flags);
if (!sp)
return NULL;
- sp->so_seqid.owner_id = ida_alloc(&server->openowner_id, gfp_flags);
- if (sp->so_seqid.owner_id < 0) {
- kfree(sp);
- return NULL;
- }
+ sp->so_seqid.owner_id = atomic64_inc_return(&server->owner_ctr);
sp->so_server = server;
sp->so_cred = get_cred(cred);
spin_lock_init(&sp->so_lock);
@@ -513,7 +509,6 @@ nfs4_alloc_state_owner(struct nfs_server *server,
nfs4_init_seqid_counter(&sp->so_seqid);
atomic_set(&sp->so_count, 1);
INIT_LIST_HEAD(&sp->so_lru);
- seqcount_spinlock_init(&sp->so_reclaim_seqcount, &sp->so_lock);
mutex_init(&sp->so_delegreturn_mutex);
return sp;
}
@@ -537,7 +532,6 @@ static void nfs4_free_state_owner(struct nfs4_state_owner *sp)
{
nfs4_destroy_seqid_counter(&sp->so_seqid);
put_cred(sp->so_cred);
- ida_free(&sp->so_server->openowner_id, sp->so_seqid.owner_id);
kfree(sp);
}
@@ -847,15 +841,15 @@ void nfs4_close_sync(struct nfs4_state *state, fmode_t fmode)
*/
static struct nfs4_lock_state *
__nfs4_find_lock_state(struct nfs4_state *state,
- fl_owner_t fl_owner, fl_owner_t fl_owner2)
+ fl_owner_t owner, fl_owner_t owner2)
{
struct nfs4_lock_state *pos, *ret = NULL;
list_for_each_entry(pos, &state->lock_states, ls_locks) {
- if (pos->ls_owner == fl_owner) {
+ if (pos->ls_owner == owner) {
ret = pos;
break;
}
- if (pos->ls_owner == fl_owner2)
+ if (pos->ls_owner == owner2)
ret = pos;
}
if (ret)
@@ -868,7 +862,7 @@ __nfs4_find_lock_state(struct nfs4_state *state,
* exists, return an uninitialized one.
*
*/
-static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
+static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t owner)
{
struct nfs4_lock_state *lsp;
struct nfs_server *server = state->owner->so_server;
@@ -879,20 +873,14 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
nfs4_init_seqid_counter(&lsp->ls_seqid);
refcount_set(&lsp->ls_count, 1);
lsp->ls_state = state;
- lsp->ls_owner = fl_owner;
- lsp->ls_seqid.owner_id = ida_alloc(&server->lockowner_id, GFP_KERNEL_ACCOUNT);
- if (lsp->ls_seqid.owner_id < 0)
- goto out_free;
+ lsp->ls_owner = owner;
+ lsp->ls_seqid.owner_id = atomic64_inc_return(&server->owner_ctr);
INIT_LIST_HEAD(&lsp->ls_locks);
return lsp;
-out_free:
- kfree(lsp);
- return NULL;
}
void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp)
{
- ida_free(&server->lockowner_id, lsp->ls_seqid.owner_id);
nfs4_destroy_seqid_counter(&lsp->ls_seqid);
kfree(lsp);
}
@@ -980,7 +968,7 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl)
if (fl->fl_ops != NULL)
return 0;
- lsp = nfs4_get_lock_state(state, fl->fl_owner);
+ lsp = nfs4_get_lock_state(state, fl->c.flc_owner);
if (lsp == NULL)
return -ENOMEM;
fl->fl_u.nfs4_fl.owner = lsp;
@@ -993,7 +981,7 @@ static int nfs4_copy_lock_stateid(nfs4_stateid *dst,
const struct nfs_lock_context *l_ctx)
{
struct nfs4_lock_state *lsp;
- fl_owner_t fl_owner, fl_flock_owner;
+ fl_owner_t owner, fl_flock_owner;
int ret = -ENOENT;
if (l_ctx == NULL)
@@ -1002,11 +990,11 @@ static int nfs4_copy_lock_stateid(nfs4_stateid *dst,
if (test_bit(LK_STATE_IN_USE, &state->flags) == 0)
goto out;
- fl_owner = l_ctx->lockowner;
+ owner = l_ctx->lockowner;
fl_flock_owner = l_ctx->open_context->flock_owner;
spin_lock(&state->state_lock);
- lsp = __nfs4_find_lock_state(state, fl_owner, fl_flock_owner);
+ lsp = __nfs4_find_lock_state(state, owner, fl_flock_owner);
if (lsp && test_bit(NFS_LOCK_LOST, &lsp->ls_flags))
ret = -EIO;
else if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) {
@@ -1095,14 +1083,12 @@ void nfs_release_seqid(struct nfs_seqid *seqid)
return;
sequence = seqid->sequence;
spin_lock(&sequence->lock);
- list_del_init(&seqid->list);
- if (!list_empty(&sequence->list)) {
- struct nfs_seqid *next;
-
- next = list_first_entry(&sequence->list,
- struct nfs_seqid, list);
+ if (list_is_first(&seqid->list, &sequence->list) &&
+ !list_is_singular(&sequence->list)) {
+ struct nfs_seqid *next = list_next_entry(seqid, list);
rpc_wake_up_queued_task(&sequence->wait, next->task);
}
+ list_del_init(&seqid->list);
spin_unlock(&sequence->lock);
}
@@ -1529,8 +1515,8 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_
down_write(&nfsi->rwsem);
spin_lock(&flctx->flc_lock);
restart:
- list_for_each_entry(fl, list, fl_list) {
- if (nfs_file_open_context(fl->fl_file)->state != state)
+ for_each_file_lock(fl, list) {
+ if (nfs_file_open_context(fl->c.flc_file)->state != state)
continue;
spin_unlock(&flctx->flc_lock);
status = ops->recover_lock(state, fl);
@@ -1597,7 +1583,7 @@ static void nfs42_complete_copies(struct nfs4_state_owner *sp, struct nfs4_state
complete(&copy->completion);
}
}
- list_for_each_entry(copy, &sp->so_server->ss_copies, src_copies) {
+ list_for_each_entry(copy, &sp->so_server->ss_src_copies, src_copies) {
if ((test_bit(NFS_CLNT_SRC_SSC_COPY_STATE, &state->flags) &&
!nfs4_stateid_match_other(&state->stateid,
&copy->parent_src_state->stateid)))
@@ -1667,7 +1653,6 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp,
* server that doesn't support a grace period.
*/
spin_lock(&sp->so_lock);
- raw_write_seqcount_begin(&sp->so_reclaim_seqcount);
restart:
list_for_each_entry(state, &sp->so_states, open_states) {
if (!test_and_clear_bit(ops->state_flag_bit, &state->flags))
@@ -1735,7 +1720,6 @@ restart:
spin_lock(&sp->so_lock);
goto restart;
}
- raw_write_seqcount_end(&sp->so_reclaim_seqcount);
spin_unlock(&sp->so_lock);
#ifdef CONFIG_NFS_V4_2
if (found_ssc_copy_state)
@@ -1745,7 +1729,6 @@ restart:
out_err:
nfs4_put_open_state(state);
spin_lock(&sp->so_lock);
- raw_write_seqcount_end(&sp->so_reclaim_seqcount);
spin_unlock(&sp->so_lock);
return status;
}
@@ -1867,6 +1850,7 @@ static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp)
if (!nfs4_state_clear_reclaim_reboot(clp))
return;
+ pnfs_destroy_all_layouts(clp);
ops = clp->cl_mvops->reboot_recovery_ops;
cred = nfs4_get_clid_cred(clp);
err = nfs4_reclaim_complete(clp, ops, cred);
@@ -1928,9 +1912,12 @@ static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recov
struct nfs_server *server;
struct rb_node *pos;
LIST_HEAD(freeme);
- int status = 0;
int lost_locks = 0;
+ int status;
+ status = nfs4_begin_drain_session(clp);
+ if (status < 0)
+ return status;
restart:
rcu_read_lock();
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
@@ -1957,6 +1944,7 @@ restart:
set_bit(ops->owner_flag_bit, &sp->so_flags);
nfs4_put_state_owner(sp);
status = nfs4_recovery_handle_error(clp, status);
+ nfs4_free_state_owners(&freeme);
return (status != 0) ? status : -EAGAIN;
}
@@ -1967,6 +1955,7 @@ restart:
}
rcu_read_unlock();
nfs4_free_state_owners(&freeme);
+ nfs_local_probe_async(clp);
if (lost_locks)
pr_warn("NFS: %s: lost %d locks\n",
clp->cl_hostname, lost_locks);
@@ -2023,6 +2012,12 @@ static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status)
nfs_mark_client_ready(clp, -EPERM);
clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
return -EPERM;
+ case -ETIMEDOUT:
+ if (clp->cl_cons_state == NFS_CS_SESSION_INITING) {
+ nfs_mark_client_ready(clp, -EIO);
+ return -EIO;
+ }
+ fallthrough;
case -EACCES:
case -NFS4ERR_DELAY:
case -EAGAIN:
@@ -2069,7 +2064,6 @@ static int nfs4_establish_lease(struct nfs_client *clp)
put_cred(cred);
if (status != 0)
return status;
- pnfs_destroy_all_layouts(clp);
return 0;
}
@@ -2117,6 +2111,7 @@ static int nfs4_try_migration(struct nfs_server *server, const struct cred *cred
{
struct nfs_client *clp = server->nfs_client;
struct nfs4_fs_locations *locations = NULL;
+ struct nfs_fattr *fattr;
struct inode *inode;
struct page *page;
int status, result;
@@ -2126,19 +2121,16 @@ static int nfs4_try_migration(struct nfs_server *server, const struct cred *cred
(unsigned long long)server->fsid.minor,
clp->cl_hostname);
- result = 0;
page = alloc_page(GFP_KERNEL);
locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL);
- if (page == NULL || locations == NULL) {
- dprintk("<-- %s: no memory\n", __func__);
- goto out;
- }
- locations->fattr = nfs_alloc_fattr();
- if (locations->fattr == NULL) {
+ fattr = nfs_alloc_fattr();
+ if (page == NULL || locations == NULL || fattr == NULL) {
dprintk("<-- %s: no memory\n", __func__);
+ result = 0;
goto out;
}
+ locations->fattr = fattr;
inode = d_inode(server->super->s_root);
result = nfs4_proc_get_locations(server, NFS_FH(inode), locations,
page, cred);
@@ -2683,6 +2675,8 @@ static void nfs4_state_manager(struct nfs_client *clp)
section = "reclaim reboot";
status = nfs4_do_reclaim(clp,
clp->cl_mvops->reboot_recovery_ops);
+ if (status == 0)
+ status = pnfs_layout_handle_reboot(clp);
if (status == -EAGAIN)
continue;
if (status < 0)
@@ -2694,6 +2688,9 @@ static void nfs4_state_manager(struct nfs_client *clp)
/* Detect expired delegations... */
if (test_and_clear_bit(NFS4CLNT_DELEGATION_EXPIRED, &clp->cl_state)) {
section = "detect expired delegations";
+ status = nfs4_begin_drain_session(clp);
+ if (status < 0)
+ goto out_error;
nfs_reap_expired_delegations(clp);
continue;
}
diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c
index d09bcfd7db89..b29a26923ce0 100644
--- a/fs/nfs/nfs4super.c
+++ b/fs/nfs/nfs4super.c
@@ -145,6 +145,7 @@ static int do_nfs4_mount(struct nfs_server *server,
const char *export_path)
{
struct nfs_fs_context *root_ctx;
+ struct nfs_fs_context *ctx;
struct fs_context *root_fc;
struct vfsmount *root_mnt;
struct dentry *dentry;
@@ -157,6 +158,12 @@ static int do_nfs4_mount(struct nfs_server *server,
.dirfd = -1,
};
+ struct fs_parameter param_fsc = {
+ .key = "fsc",
+ .type = fs_value_is_string,
+ .dirfd = -1,
+ };
+
if (IS_ERR(server))
return PTR_ERR(server);
@@ -168,9 +175,26 @@ static int do_nfs4_mount(struct nfs_server *server,
kfree(root_fc->source);
root_fc->source = NULL;
+ ctx = nfs_fc2context(fc);
root_ctx = nfs_fc2context(root_fc);
root_ctx->internal = true;
root_ctx->server = server;
+
+ if (ctx->fscache_uniq) {
+ len = strlen(ctx->fscache_uniq);
+ param_fsc.size = len;
+ param_fsc.string = kmemdup_nul(ctx->fscache_uniq, len, GFP_KERNEL);
+ if (param_fsc.string == NULL) {
+ put_fs_context(root_fc);
+ return -ENOMEM;
+ }
+ ret = vfs_parse_fs_param(root_fc, &param_fsc);
+ kfree(param_fsc.string);
+ if (ret < 0) {
+ put_fs_context(root_fc);
+ return ret;
+ }
+ }
/* We leave export_path unset as it's not used to find the root. */
len = strlen(hostname) + 5;
@@ -308,6 +332,7 @@ static void __exit exit_nfs_v4(void)
nfs_dns_resolver_destroy();
}
+MODULE_DESCRIPTION("NFSv4 client support");
MODULE_LICENSE("GPL");
module_init(init_nfs_v4);
diff --git a/fs/nfs/nfs4sysctl.c b/fs/nfs/nfs4sysctl.c
index 886a7c4c60b3..d1a92d8f8ba4 100644
--- a/fs/nfs/nfs4sysctl.c
+++ b/fs/nfs/nfs4sysctl.c
@@ -17,7 +17,7 @@ static const int nfs_set_port_min;
static const int nfs_set_port_max = 65535;
static struct ctl_table_header *nfs4_callback_sysctl_table;
-static struct ctl_table nfs4_cb_sysctls[] = {
+static const struct ctl_table nfs4_cb_sysctls[] = {
{
.procname = "nfs_callback_tcpport",
.data = &nfs_callback_set_tcpport,
diff --git a/fs/nfs/nfs4trace.c b/fs/nfs/nfs4trace.c
index d9ac556bebcf..389941ccc9c9 100644
--- a/fs/nfs/nfs4trace.c
+++ b/fs/nfs/nfs4trace.c
@@ -2,6 +2,8 @@
/*
* Copyright (c) 2013 Trond Myklebust <Trond.Myklebust@netapp.com>
*/
+#include <uapi/linux/pr.h>
+#include <linux/blkdev.h>
#include <linux/nfs_fs.h>
#include "nfs4_fs.h"
#include "internal.h"
@@ -28,4 +30,11 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_write_pagelist);
EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_read_error);
EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_write_error);
EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_commit_error);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bl_pr_key_reg);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bl_pr_key_reg_err);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bl_pr_key_unreg);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bl_pr_key_unreg_err);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(fl_getdevinfo);
#endif
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index d27919d7241d..22c973316f0b 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -47,7 +47,7 @@ DECLARE_EVENT_CLASS(nfs4_clientid_event,
TP_fast_assign(
__entry->error = error < 0 ? -error : 0;
- __assign_str(dstaddr, clp->cl_hostname);
+ __assign_str(dstaddr);
),
TP_printk(
@@ -77,6 +77,36 @@ DEFINE_NFS4_CLIENTID_EVENT(nfs4_bind_conn_to_session);
DEFINE_NFS4_CLIENTID_EVENT(nfs4_sequence);
DEFINE_NFS4_CLIENTID_EVENT(nfs4_reclaim_complete);
+TRACE_EVENT(nfs4_trunked_exchange_id,
+ TP_PROTO(
+ const struct nfs_client *clp,
+ const char *addr,
+ int error
+ ),
+
+ TP_ARGS(clp, addr, error),
+
+ TP_STRUCT__entry(
+ __string(main_addr, clp->cl_hostname)
+ __string(trunk_addr, addr)
+ __field(unsigned long, error)
+ ),
+
+ TP_fast_assign(
+ __entry->error = error < 0 ? -error : 0;
+ __assign_str(main_addr);
+ __assign_str(trunk_addr);
+ ),
+
+ TP_printk(
+ "error=%ld (%s) main_addr=%s trunk_addr=%s",
+ -__entry->error,
+ show_nfs4_status(__entry->error),
+ __get_str(main_addr),
+ __get_str(trunk_addr)
+ )
+);
+
TRACE_EVENT(nfs4_sequence_done,
TP_PROTO(
const struct nfs4_session *session,
@@ -335,7 +365,7 @@ TRACE_EVENT(nfs4_state_mgr,
TP_fast_assign(
__entry->state = clp->cl_state;
- __assign_str(hostname, clp->cl_hostname);
+ __assign_str(hostname);
),
TP_printk(
@@ -363,8 +393,8 @@ TRACE_EVENT(nfs4_state_mgr_failed,
TP_fast_assign(
__entry->error = status < 0 ? -status : 0;
__entry->state = clp->cl_state;
- __assign_str(hostname, clp->cl_hostname);
- __assign_str(section, section);
+ __assign_str(hostname);
+ __assign_str(section);
),
TP_printk(
@@ -548,7 +578,7 @@ DECLARE_EVENT_CLASS(nfs4_open_event,
__entry->fhandle = 0;
}
__entry->dir = NFS_FILEID(d_inode(ctx->dentry->d_parent));
- __assign_str(name, ctx->dentry->d_name.name);
+ __assign_str(name);
),
TP_printk(
@@ -699,7 +729,7 @@ DECLARE_EVENT_CLASS(nfs4_lock_event,
__entry->error = error < 0 ? -error : 0;
__entry->cmd = cmd;
- __entry->type = request->fl_type;
+ __entry->type = request->c.flc_type;
__entry->start = request->fl_start;
__entry->end = request->fl_end;
__entry->dev = inode->i_sb->s_dev;
@@ -771,7 +801,7 @@ TRACE_EVENT(nfs4_set_lock,
__entry->error = error < 0 ? -error : 0;
__entry->cmd = cmd;
- __entry->type = request->fl_type;
+ __entry->type = request->c.flc_type;
__entry->start = request->fl_start;
__entry->end = request->fl_end;
__entry->dev = inode->i_sb->s_dev;
@@ -1042,7 +1072,7 @@ DECLARE_EVENT_CLASS(nfs4_lookup_event,
__entry->dev = dir->i_sb->s_dev;
__entry->dir = NFS_FILEID(dir);
__entry->error = -error;
- __assign_str(name, name->name);
+ __assign_str(name);
),
TP_printk(
@@ -1126,8 +1156,8 @@ TRACE_EVENT(nfs4_rename,
__entry->olddir = NFS_FILEID(olddir);
__entry->newdir = NFS_FILEID(newdir);
__entry->error = error < 0 ? -error : 0;
- __assign_str(oldname, oldname->name);
- __assign_str(newname, newname->name);
+ __assign_str(oldname);
+ __assign_str(newname);
),
TP_printk(
@@ -1329,7 +1359,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_callback_event,
__entry->fileid = 0;
__entry->dev = 0;
}
- __assign_str(dstaddr, clp ? clp->cl_hostname : "unknown");
+ __assign_str(dstaddr);
),
TP_printk(
@@ -1386,7 +1416,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_callback_event,
__entry->fileid = 0;
__entry->dev = 0;
}
- __assign_str(dstaddr, clp ? clp->cl_hostname : "unknown");
+ __assign_str(dstaddr);
__entry->stateid_seq =
be32_to_cpu(stateid->seqid);
__entry->stateid_hash =
@@ -1930,7 +1960,7 @@ DECLARE_EVENT_CLASS(nfs4_deviceid_event,
),
TP_fast_assign(
- __assign_str(dstaddr, clp->cl_hostname);
+ __assign_str(dstaddr);
memcpy(__entry->deviceid, deviceid->data,
NFS4_DEVICEID4_SIZE);
),
@@ -1968,7 +1998,7 @@ DECLARE_EVENT_CLASS(nfs4_deviceid_status,
TP_fast_assign(
__entry->dev = server->s_dev;
__entry->status = status;
- __assign_str(dstaddr, server->nfs_client->cl_hostname);
+ __assign_str(dstaddr);
memcpy(__entry->deviceid, deviceid->data,
NFS4_DEVICEID4_SIZE);
),
@@ -1991,6 +2021,34 @@ DECLARE_EVENT_CLASS(nfs4_deviceid_status,
DEFINE_PNFS_DEVICEID_STATUS(nfs4_getdeviceinfo);
DEFINE_PNFS_DEVICEID_STATUS(nfs4_find_deviceid);
+TRACE_EVENT(fl_getdevinfo,
+ TP_PROTO(
+ const struct nfs_server *server,
+ const struct nfs4_deviceid *deviceid,
+ char *ds_remotestr
+ ),
+ TP_ARGS(server, deviceid, ds_remotestr),
+
+ TP_STRUCT__entry(
+ __string(mds_addr, server->nfs_client->cl_hostname)
+ __array(unsigned char, deviceid, NFS4_DEVICEID4_SIZE)
+ __string(ds_ips, ds_remotestr)
+ ),
+
+ TP_fast_assign(
+ __assign_str(mds_addr);
+ __assign_str(ds_ips);
+ memcpy(__entry->deviceid, deviceid->data,
+ NFS4_DEVICEID4_SIZE);
+ ),
+ TP_printk(
+ "deviceid=%s, mds_addr=%s, ds_ips=%s",
+ __print_hex(__entry->deviceid, NFS4_DEVICEID4_SIZE),
+ __get_str(mds_addr),
+ __get_str(ds_ips)
+ )
+);
+
DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event,
TP_PROTO(
const struct nfs_pgio_header *hdr
@@ -2025,9 +2083,7 @@ DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event,
be32_to_cpu(hdr->args.stateid.seqid);
__entry->stateid_hash =
nfs_stateid_hash(&hdr->args.stateid);
- __assign_str(dstaddr, hdr->ds_clp ?
- rpc_peeraddr2str(hdr->ds_clp->cl_rpcclient,
- RPC_DISPLAY_ADDR) : "unknown");
+ __assign_str(dstaddr);
),
TP_printk(
@@ -2081,9 +2137,7 @@ TRACE_EVENT(ff_layout_commit_error,
__entry->dev = inode->i_sb->s_dev;
__entry->offset = data->args.offset;
__entry->count = data->args.count;
- __assign_str(dstaddr, data->ds_clp ?
- rpc_peeraddr2str(data->ds_clp->cl_rpcclient,
- RPC_DISPLAY_ADDR) : "unknown");
+ __assign_str(dstaddr);
),
TP_printk(
@@ -2099,6 +2153,94 @@ TRACE_EVENT(ff_layout_commit_error,
)
);
+DECLARE_EVENT_CLASS(pnfs_bl_pr_key_class,
+ TP_PROTO(
+ const struct block_device *bdev,
+ u64 key
+ ),
+ TP_ARGS(bdev, key),
+ TP_STRUCT__entry(
+ __field(u64, key)
+ __field(dev_t, dev)
+ __string(device, bdev->bd_disk->disk_name)
+ ),
+ TP_fast_assign(
+ __entry->key = key;
+ __entry->dev = bdev->bd_dev;
+ __assign_str(device);
+ ),
+ TP_printk("dev=%d,%d (%s) key=0x%016llx",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __get_str(device), __entry->key
+ )
+);
+
+#define DEFINE_NFS4_BLOCK_PRKEY_EVENT(name) \
+ DEFINE_EVENT(pnfs_bl_pr_key_class, name, \
+ TP_PROTO( \
+ const struct block_device *bdev, \
+ u64 key \
+ ), \
+ TP_ARGS(bdev, key))
+DEFINE_NFS4_BLOCK_PRKEY_EVENT(bl_pr_key_reg);
+DEFINE_NFS4_BLOCK_PRKEY_EVENT(bl_pr_key_unreg);
+
+/*
+ * From uapi/linux/pr.h
+ */
+TRACE_DEFINE_ENUM(PR_STS_SUCCESS);
+TRACE_DEFINE_ENUM(PR_STS_IOERR);
+TRACE_DEFINE_ENUM(PR_STS_RESERVATION_CONFLICT);
+TRACE_DEFINE_ENUM(PR_STS_RETRY_PATH_FAILURE);
+TRACE_DEFINE_ENUM(PR_STS_PATH_FAST_FAILED);
+TRACE_DEFINE_ENUM(PR_STS_PATH_FAILED);
+
+#define show_pr_status(x) \
+ __print_symbolic(x, \
+ { PR_STS_SUCCESS, "SUCCESS" }, \
+ { PR_STS_IOERR, "IOERR" }, \
+ { PR_STS_RESERVATION_CONFLICT, "RESERVATION_CONFLICT" }, \
+ { PR_STS_RETRY_PATH_FAILURE, "RETRY_PATH_FAILURE" }, \
+ { PR_STS_PATH_FAST_FAILED, "PATH_FAST_FAILED" }, \
+ { PR_STS_PATH_FAILED, "PATH_FAILED" })
+
+DECLARE_EVENT_CLASS(pnfs_bl_pr_key_err_class,
+ TP_PROTO(
+ const struct block_device *bdev,
+ u64 key,
+ int status
+ ),
+ TP_ARGS(bdev, key, status),
+ TP_STRUCT__entry(
+ __field(u64, key)
+ __field(dev_t, dev)
+ __field(unsigned long, status)
+ __string(device, bdev->bd_disk->disk_name)
+ ),
+ TP_fast_assign(
+ __entry->key = key;
+ __entry->dev = bdev->bd_dev;
+ __entry->status = status;
+ __assign_str(device);
+ ),
+ TP_printk("dev=%d,%d (%s) key=0x%016llx status=%s",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __get_str(device), __entry->key,
+ show_pr_status(__entry->status)
+ )
+);
+
+#define DEFINE_NFS4_BLOCK_PRKEY_ERR_EVENT(name) \
+ DEFINE_EVENT(pnfs_bl_pr_key_err_class, name, \
+ TP_PROTO( \
+ const struct block_device *bdev, \
+ u64 key, \
+ int status \
+ ), \
+ TP_ARGS(bdev, key, status))
+DEFINE_NFS4_BLOCK_PRKEY_ERR_EVENT(bl_pr_key_reg_err);
+DEFINE_NFS4_BLOCK_PRKEY_ERR_EVENT(bl_pr_key_unreg_err);
+
#ifdef CONFIG_NFS_V4_2
TRACE_DEFINE_ENUM(NFS4_CONTENT_DATA);
TRACE_DEFINE_ENUM(NFS4_CONTENT_HOLE);
@@ -2521,7 +2663,7 @@ DECLARE_EVENT_CLASS(nfs4_xattr_event,
__entry->dev = inode->i_sb->s_dev;
__entry->fileid = NFS_FILEID(inode);
__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
- __assign_str(name, name);
+ __assign_str(name);
),
TP_printk(
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 69406e60f391..e8ac3f615f93 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -52,6 +52,7 @@
#include <linux/nfs.h>
#include <linux/nfs4.h>
#include <linux/nfs_fs.h>
+#include <linux/nfs_common.h>
#include "nfs4_fs.h"
#include "nfs4trace.h"
@@ -63,11 +64,7 @@
#define NFSDBG_FACILITY NFSDBG_XDR
-/* Mapping from NFS error code to "errno" error code. */
-#define errno_NFSERR_IO EIO
-
struct compound_hdr;
-static int nfs4_stat_to_errno(int);
static void encode_layoutget(struct xdr_stream *xdr,
const struct nfs4_layoutget_args *args,
struct compound_hdr *hdr);
@@ -224,6 +221,11 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
encode_attrs_maxsz)
#define decode_setattr_maxsz (op_decode_hdr_maxsz + \
nfs4_fattr_bitmap_maxsz)
+#define encode_delegattr_maxsz (op_encode_hdr_maxsz + \
+ encode_stateid_maxsz + \
+ nfs4_fattr_bitmap_maxsz + \
+ 2*nfstime4_maxsz)
+#define decode_delegattr_maxsz (decode_setattr_maxsz)
#define encode_read_maxsz (op_encode_hdr_maxsz + \
encode_stateid_maxsz + 3)
#define decode_read_maxsz (op_decode_hdr_maxsz + 2 + pagepad_maxsz)
@@ -758,12 +760,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
encode_sequence_maxsz + \
encode_putfh_maxsz + \
encode_layoutreturn_maxsz + \
+ encode_delegattr_maxsz + \
encode_delegreturn_maxsz + \
encode_getattr_maxsz)
#define NFS4_dec_delegreturn_sz (compound_decode_hdr_maxsz + \
decode_sequence_maxsz + \
decode_putfh_maxsz + \
decode_layoutreturn_maxsz + \
+ decode_delegattr_maxsz + \
decode_delegreturn_maxsz + \
decode_getattr_maxsz)
#define NFS4_enc_getacl_sz (compound_encode_hdr_maxsz + \
@@ -968,11 +972,6 @@ static __be32 *reserve_space(struct xdr_stream *xdr, size_t nbytes)
return p;
}
-static void encode_opaque_fixed(struct xdr_stream *xdr, const void *buf, size_t len)
-{
- WARN_ON_ONCE(xdr_stream_encode_opaque_fixed(xdr, buf, len) < 0);
-}
-
static void encode_string(struct xdr_stream *xdr, unsigned int len, const char *str)
{
WARN_ON_ONCE(xdr_stream_encode_opaque(xdr, str, len) < 0);
@@ -1060,9 +1059,10 @@ static void encode_nops(struct compound_hdr *hdr)
*hdr->nops_p = htonl(hdr->nops);
}
-static void encode_nfs4_stateid(struct xdr_stream *xdr, const nfs4_stateid *stateid)
+static void encode_nfs4_stateid(struct xdr_stream *xdr,
+ const nfs4_stateid *stateid)
{
- encode_opaque_fixed(xdr, stateid, NFS4_STATEID_SIZE);
+ encode_opaque_fixed(xdr, stateid->data, NFS4_STATEID_SIZE);
}
static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *verf)
@@ -1305,7 +1305,7 @@ static void encode_link(struct xdr_stream *xdr, const struct qstr *name, struct
static inline int nfs4_lock_type(struct file_lock *fl, int block)
{
- if (fl->fl_type == F_RDLCK)
+ if (lock_is_read(fl))
return block ? NFS4_READW_LT : NFS4_READ_LT;
return block ? NFS4_WRITEW_LT : NFS4_WRITE_LT;
}
@@ -1416,12 +1416,12 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena
*/
encode_nfs4_seqid(xdr, arg->seqid);
encode_share_access(xdr, arg->share_access);
- p = reserve_space(xdr, 36);
+ p = reserve_space(xdr, 40);
p = xdr_encode_hyper(p, arg->clientid);
- *p++ = cpu_to_be32(24);
+ *p++ = cpu_to_be32(28);
p = xdr_encode_opaque_fixed(p, "open id:", 8);
*p++ = cpu_to_be32(arg->server->s_dev);
- *p++ = cpu_to_be32(arg->id.uniquifier);
+ p = xdr_encode_hyper(p, arg->id.uniquifier);
xdr_encode_hyper(p, arg->id.create_time);
}
@@ -1468,20 +1468,18 @@ static void encode_opentype(struct xdr_stream *xdr, const struct nfs_openargs *a
}
}
-static inline void encode_delegation_type(struct xdr_stream *xdr, fmode_t delegation_type)
+static inline void encode_delegation_type(struct xdr_stream *xdr, u32 delegation_type)
{
__be32 *p;
p = reserve_space(xdr, 4);
switch (delegation_type) {
- case 0:
- *p = cpu_to_be32(NFS4_OPEN_DELEGATE_NONE);
- break;
- case FMODE_READ:
- *p = cpu_to_be32(NFS4_OPEN_DELEGATE_READ);
- break;
- case FMODE_WRITE|FMODE_READ:
- *p = cpu_to_be32(NFS4_OPEN_DELEGATE_WRITE);
+ case NFS4_OPEN_DELEGATE_NONE:
+ case NFS4_OPEN_DELEGATE_READ:
+ case NFS4_OPEN_DELEGATE_WRITE:
+ case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG:
+ case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG:
+ *p = cpu_to_be32(delegation_type);
break;
default:
BUG();
@@ -1497,7 +1495,7 @@ static inline void encode_claim_null(struct xdr_stream *xdr, const struct qstr *
encode_string(xdr, name->len, name->name);
}
-static inline void encode_claim_previous(struct xdr_stream *xdr, fmode_t type)
+static inline void encode_claim_previous(struct xdr_stream *xdr, u32 type)
{
__be32 *p;
@@ -1735,6 +1733,33 @@ static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs
server->attr_bitmask);
}
+static void encode_delegattr(struct xdr_stream *xdr,
+ const nfs4_stateid *stateid,
+ const struct nfs4_delegattr *attr,
+ struct compound_hdr *hdr)
+{
+ uint32_t bitmap[3] = { 0 };
+ uint32_t len = 0;
+ __be32 *p;
+
+ encode_op_hdr(xdr, OP_SETATTR, encode_delegattr_maxsz, hdr);
+ encode_nfs4_stateid(xdr, stateid);
+ if (attr->atime_set) {
+ bitmap[2] |= FATTR4_WORD2_TIME_DELEG_ACCESS;
+ len += (nfstime4_maxsz << 2);
+ }
+ if (attr->mtime_set) {
+ bitmap[2] |= FATTR4_WORD2_TIME_DELEG_MODIFY;
+ len += (nfstime4_maxsz << 2);
+ }
+ xdr_encode_bitmap4(xdr, bitmap, ARRAY_SIZE(bitmap));
+ xdr_stream_encode_opaque_inline(xdr, (void **)&p, len);
+ if (bitmap[2] & FATTR4_WORD2_TIME_DELEG_ACCESS)
+ p = xdr_encode_nfstime4(p, &attr->atime);
+ if (bitmap[2] & FATTR4_WORD2_TIME_DELEG_MODIFY)
+ p = xdr_encode_nfstime4(p, &attr->mtime);
+}
+
static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclientid *setclientid, struct compound_hdr *hdr)
{
__be32 *p;
@@ -2105,7 +2130,7 @@ static void encode_test_stateid(struct xdr_stream *xdr,
{
encode_op_hdr(xdr, OP_TEST_STATEID, decode_test_stateid_maxsz, hdr);
encode_uint32(xdr, 1);
- encode_nfs4_stateid(xdr, args->stateid);
+ encode_nfs4_stateid(xdr, &args->stateid);
}
static void encode_free_stateid(struct xdr_stream *xdr,
@@ -2812,6 +2837,8 @@ static void nfs4_xdr_enc_delegreturn(struct rpc_rqst *req,
encode_putfh(xdr, args->fhandle, &hdr);
if (args->lr_args)
encode_layoutreturn(xdr, args->lr_args, &hdr);
+ if (args->sattr_args)
+ encode_delegattr(xdr, args->stateid, args->sattr_args, &hdr);
if (args->bitmask)
encode_getfattr(xdr, args->bitmask, &hdr);
encode_delegreturn(xdr, args->stateid, &hdr);
@@ -3412,7 +3439,7 @@ static int decode_attr_link_support(struct xdr_stream *xdr, uint32_t *bitmap, ui
*res = be32_to_cpup(p);
bitmap[0] &= ~FATTR4_WORD0_LINK_SUPPORT;
}
- dprintk("%s: link support=%s\n", __func__, *res == 0 ? "false" : "true");
+ dprintk("%s: link support=%s\n", __func__, str_false_true(*res == 0));
return 0;
}
@@ -3430,7 +3457,7 @@ static int decode_attr_symlink_support(struct xdr_stream *xdr, uint32_t *bitmap,
*res = be32_to_cpup(p);
bitmap[0] &= ~FATTR4_WORD0_SYMLINK_SUPPORT;
}
- dprintk("%s: symlink support=%s\n", __func__, *res == 0 ? "false" : "true");
+ dprintk("%s: symlink support=%s\n", __func__, str_false_true(*res == 0));
return 0;
}
@@ -3572,7 +3599,7 @@ static int decode_attr_case_insensitive(struct xdr_stream *xdr, uint32_t *bitmap
*res = be32_to_cpup(p);
bitmap[0] &= ~FATTR4_WORD0_CASE_INSENSITIVE;
}
- dprintk("%s: case_insensitive=%s\n", __func__, *res == 0 ? "false" : "true");
+ dprintk("%s: case_insensitive=%s\n", __func__, str_false_true(*res == 0));
return 0;
}
@@ -3590,7 +3617,7 @@ static int decode_attr_case_preserving(struct xdr_stream *xdr, uint32_t *bitmap,
*res = be32_to_cpup(p);
bitmap[0] &= ~FATTR4_WORD0_CASE_PRESERVING;
}
- dprintk("%s: case_preserving=%s\n", __func__, *res == 0 ? "false" : "true");
+ dprintk("%s: case_preserving=%s\n", __func__, str_false_true(*res == 0));
return 0;
}
@@ -4298,8 +4325,29 @@ static int decode_attr_xattrsupport(struct xdr_stream *xdr, uint32_t *bitmap,
*res = be32_to_cpup(p);
bitmap[2] &= ~FATTR4_WORD2_XATTR_SUPPORT;
}
- dprintk("%s: XATTR support=%s\n", __func__,
- *res == 0 ? "false" : "true");
+ dprintk("%s: XATTR support=%s\n", __func__, str_false_true(*res == 0));
+ return 0;
+}
+
+static int decode_attr_open_arguments(struct xdr_stream *xdr, uint32_t *bitmap,
+ struct nfs4_open_caps *res)
+{
+ memset(res, 0, sizeof(*res));
+ if (unlikely(bitmap[2] & (FATTR4_WORD2_OPEN_ARGUMENTS - 1U)))
+ return -EIO;
+ if (likely(bitmap[2] & FATTR4_WORD2_OPEN_ARGUMENTS)) {
+ if (decode_bitmap4(xdr, res->oa_share_access, ARRAY_SIZE(res->oa_share_access)) < 0)
+ return -EIO;
+ if (decode_bitmap4(xdr, res->oa_share_deny, ARRAY_SIZE(res->oa_share_deny)) < 0)
+ return -EIO;
+ if (decode_bitmap4(xdr, res->oa_share_access_want, ARRAY_SIZE(res->oa_share_access_want)) < 0)
+ return -EIO;
+ if (decode_bitmap4(xdr, res->oa_open_claim, ARRAY_SIZE(res->oa_open_claim)) < 0)
+ return -EIO;
+ if (decode_bitmap4(xdr, res->oa_createmode, ARRAY_SIZE(res->oa_createmode)) < 0)
+ return -EIO;
+ bitmap[2] &= ~FATTR4_WORD2_OPEN_ARGUMENTS;
+ }
return 0;
}
@@ -4352,14 +4400,6 @@ static int decode_access(struct xdr_stream *xdr, u32 *supported, u32 *access)
return 0;
}
-static int decode_opaque_fixed(struct xdr_stream *xdr, void *buf, size_t len)
-{
- ssize_t ret = xdr_stream_decode_opaque_fixed(xdr, buf, len);
- if (unlikely(ret < 0))
- return -EIO;
- return 0;
-}
-
static int decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
{
return decode_opaque_fixed(xdr, stateid, NFS4_STATEID_SIZE);
@@ -4477,6 +4517,8 @@ static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_re
if ((status = decode_attr_exclcreat_supported(xdr, bitmap,
res->exclcreat_bitmask)) != 0)
goto xdr_error;
+ if ((status = decode_attr_open_arguments(xdr, bitmap, &res->open_caps)) != 0)
+ goto xdr_error;
status = verify_attr_len(xdr, savep, attrlen);
xdr_error:
dprintk("%s: xdr returned %d!\n", __func__, -status);
@@ -5052,10 +5094,10 @@ static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl)
fl->fl_end = fl->fl_start + (loff_t)length - 1;
if (length == ~(uint64_t)0)
fl->fl_end = OFFSET_MAX;
- fl->fl_type = F_WRLCK;
+ fl->c.flc_type = F_WRLCK;
if (type & 1)
- fl->fl_type = F_RDLCK;
- fl->fl_pid = 0;
+ fl->c.flc_type = F_RDLCK;
+ fl->c.flc_pid = 0;
}
p = xdr_decode_hyper(p, &clientid); /* read 8 bytes */
namelen = be32_to_cpup(p); /* read 4 bytes */ /* have read all 32 bytes now */
@@ -5148,13 +5190,12 @@ static int decode_space_limit(struct xdr_stream *xdr,
}
static int decode_rw_delegation(struct xdr_stream *xdr,
- uint32_t delegation_type,
- struct nfs_openres *res)
+ struct nfs4_open_delegation *res)
{
__be32 *p;
int status;
- status = decode_delegation_stateid(xdr, &res->delegation);
+ status = decode_delegation_stateid(xdr, &res->stateid);
if (unlikely(status))
return status;
p = xdr_inline_decode(xdr, 4);
@@ -5162,52 +5203,57 @@ static int decode_rw_delegation(struct xdr_stream *xdr,
return -EIO;
res->do_recall = be32_to_cpup(p);
- switch (delegation_type) {
+ switch (res->open_delegation_type) {
case NFS4_OPEN_DELEGATE_READ:
- res->delegation_type = FMODE_READ;
+ case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG:
+ res->type = FMODE_READ;
break;
case NFS4_OPEN_DELEGATE_WRITE:
- res->delegation_type = FMODE_WRITE|FMODE_READ;
+ case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG:
+ res->type = FMODE_WRITE|FMODE_READ;
if (decode_space_limit(xdr, &res->pagemod_limit) < 0)
return -EIO;
}
return decode_ace(xdr, NULL);
}
-static int decode_no_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
+static int decode_no_delegation(struct xdr_stream *xdr,
+ struct nfs4_open_delegation *res)
{
__be32 *p;
- uint32_t why_no_delegation;
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
return -EIO;
- why_no_delegation = be32_to_cpup(p);
- switch (why_no_delegation) {
+ res->why_no_delegation = be32_to_cpup(p);
+ switch (res->why_no_delegation) {
case WND4_CONTENTION:
case WND4_RESOURCE:
- xdr_inline_decode(xdr, 4);
- /* Ignore for now */
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ return -EIO;
+ res->will_notify = be32_to_cpup(p);
}
return 0;
}
-static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
+static int decode_delegation(struct xdr_stream *xdr,
+ struct nfs4_open_delegation *res)
{
__be32 *p;
- uint32_t delegation_type;
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
return -EIO;
- delegation_type = be32_to_cpup(p);
- res->delegation_type = 0;
- switch (delegation_type) {
+ res->open_delegation_type = be32_to_cpup(p);
+ switch (res->open_delegation_type) {
case NFS4_OPEN_DELEGATE_NONE:
return 0;
case NFS4_OPEN_DELEGATE_READ:
case NFS4_OPEN_DELEGATE_WRITE:
- return decode_rw_delegation(xdr, delegation_type, res);
+ case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG:
+ case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG:
+ return decode_rw_delegation(xdr, res);
case NFS4_OPEN_DELEGATE_NONE_EXT:
return decode_no_delegation(xdr, res);
}
@@ -5248,7 +5294,7 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
for (; i < NFS4_BITMAP_SIZE; i++)
res->attrset[i] = 0;
- return decode_delegation(xdr, res);
+ return decode_delegation(xdr, &res->delegation);
xdr_error:
dprintk("%s: Bitmap too large! Length = %u\n", __func__, bmlen);
return -EIO;
@@ -5480,6 +5526,11 @@ static int decode_setattr(struct xdr_stream *xdr)
return -EIO;
}
+static int decode_delegattr(struct xdr_stream *xdr)
+{
+ return decode_setattr(xdr);
+}
+
static int decode_setclientid(struct xdr_stream *xdr, struct nfs4_setclientid_res *res)
{
__be32 *p;
@@ -7052,6 +7103,12 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp,
if (status)
goto out;
}
+ if (res->sattr_res) {
+ status = decode_delegattr(xdr);
+ res->sattr_ret = status;
+ if (status)
+ goto out;
+ }
if (res->fattr) {
status = decode_getfattr(xdr, res->fattr, res->server);
if (status != 0)
@@ -7547,72 +7604,6 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
return 0;
}
-/*
- * We need to translate between nfs status return values and
- * the local errno values which may not be the same.
- */
-static struct {
- int stat;
- int errno;
-} nfs_errtbl[] = {
- { NFS4_OK, 0 },
- { NFS4ERR_PERM, -EPERM },
- { NFS4ERR_NOENT, -ENOENT },
- { NFS4ERR_IO, -errno_NFSERR_IO},
- { NFS4ERR_NXIO, -ENXIO },
- { NFS4ERR_ACCESS, -EACCES },
- { NFS4ERR_EXIST, -EEXIST },
- { NFS4ERR_XDEV, -EXDEV },
- { NFS4ERR_NOTDIR, -ENOTDIR },
- { NFS4ERR_ISDIR, -EISDIR },
- { NFS4ERR_INVAL, -EINVAL },
- { NFS4ERR_FBIG, -EFBIG },
- { NFS4ERR_NOSPC, -ENOSPC },
- { NFS4ERR_ROFS, -EROFS },
- { NFS4ERR_MLINK, -EMLINK },
- { NFS4ERR_NAMETOOLONG, -ENAMETOOLONG },
- { NFS4ERR_NOTEMPTY, -ENOTEMPTY },
- { NFS4ERR_DQUOT, -EDQUOT },
- { NFS4ERR_STALE, -ESTALE },
- { NFS4ERR_BADHANDLE, -EBADHANDLE },
- { NFS4ERR_BAD_COOKIE, -EBADCOOKIE },
- { NFS4ERR_NOTSUPP, -ENOTSUPP },
- { NFS4ERR_TOOSMALL, -ETOOSMALL },
- { NFS4ERR_SERVERFAULT, -EREMOTEIO },
- { NFS4ERR_BADTYPE, -EBADTYPE },
- { NFS4ERR_LOCKED, -EAGAIN },
- { NFS4ERR_SYMLINK, -ELOOP },
- { NFS4ERR_OP_ILLEGAL, -EOPNOTSUPP },
- { NFS4ERR_DEADLOCK, -EDEADLK },
- { NFS4ERR_NOXATTR, -ENODATA },
- { NFS4ERR_XATTR2BIG, -E2BIG },
- { -1, -EIO }
-};
-
-/*
- * Convert an NFS error code to a local one.
- * This one is used jointly by NFSv2 and NFSv3.
- */
-static int
-nfs4_stat_to_errno(int stat)
-{
- int i;
- for (i = 0; nfs_errtbl[i].stat != -1; i++) {
- if (nfs_errtbl[i].stat == stat)
- return nfs_errtbl[i].errno;
- }
- if (stat <= 10000 || stat > 10100) {
- /* The server is looney tunes. */
- return -EREMOTEIO;
- }
- /* If we cannot translate the error, the recovery routines should
- * handle it.
- * Note: remaining NFSv4 error codes have values > 10000, so should
- * not conflict with native Linux error codes.
- */
- return -stat;
-}
-
#ifdef CONFIG_NFS_V4_2
#include "nfs42xdr.c"
#endif /* CONFIG_NFS_V4_2 */
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 7600100ba26f..432612d22437 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -175,10 +175,10 @@ static int __init root_nfs_cat(char *dest, const char *src,
size_t len = strlen(dest);
if (len && dest[len - 1] != ',')
- if (strlcat(dest, ",", destlen) > destlen)
+ if (strlcat(dest, ",", destlen) >= destlen)
return -1;
- if (strlcat(dest, src, destlen) > destlen)
+ if (strlcat(dest, src, destlen) >= destlen)
return -1;
return 0;
}
diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
index afedb449b54f..7a058bd8c566 100644
--- a/fs/nfs/nfstrace.h
+++ b/fs/nfs/nfstrace.h
@@ -409,7 +409,7 @@ DECLARE_EVENT_CLASS(nfs_lookup_event,
__entry->dir = NFS_FILEID(dir);
__entry->flags = flags;
__entry->fileid = d_is_negative(dentry) ? 0 : NFS_FILEID(d_inode(dentry));
- __assign_str(name, dentry->d_name.name);
+ __assign_str(name);
),
TP_printk(
@@ -457,7 +457,7 @@ DECLARE_EVENT_CLASS(nfs_lookup_event_done,
__entry->error = error < 0 ? -error : 0;
__entry->flags = flags;
__entry->fileid = d_is_negative(dentry) ? 0 : NFS_FILEID(d_inode(dentry));
- __assign_str(name, dentry->d_name.name);
+ __assign_str(name);
),
TP_printk(
@@ -512,7 +512,7 @@ TRACE_EVENT(nfs_atomic_open_enter,
__entry->dir = NFS_FILEID(dir);
__entry->flags = flags;
__entry->fmode = (__force unsigned long)ctx->mode;
- __assign_str(name, ctx->dentry->d_name.name);
+ __assign_str(name);
),
TP_printk(
@@ -551,7 +551,7 @@ TRACE_EVENT(nfs_atomic_open_exit,
__entry->dir = NFS_FILEID(dir);
__entry->flags = flags;
__entry->fmode = (__force unsigned long)ctx->mode;
- __assign_str(name, ctx->dentry->d_name.name);
+ __assign_str(name);
),
TP_printk(
@@ -587,7 +587,7 @@ TRACE_EVENT(nfs_create_enter,
__entry->dev = dir->i_sb->s_dev;
__entry->dir = NFS_FILEID(dir);
__entry->flags = flags;
- __assign_str(name, dentry->d_name.name);
+ __assign_str(name);
),
TP_printk(
@@ -623,7 +623,7 @@ TRACE_EVENT(nfs_create_exit,
__entry->dev = dir->i_sb->s_dev;
__entry->dir = NFS_FILEID(dir);
__entry->flags = flags;
- __assign_str(name, dentry->d_name.name);
+ __assign_str(name);
),
TP_printk(
@@ -654,7 +654,7 @@ DECLARE_EVENT_CLASS(nfs_directory_event,
TP_fast_assign(
__entry->dev = dir->i_sb->s_dev;
__entry->dir = NFS_FILEID(dir);
- __assign_str(name, dentry->d_name.name);
+ __assign_str(name);
),
TP_printk(
@@ -693,7 +693,7 @@ DECLARE_EVENT_CLASS(nfs_directory_event_done,
__entry->dev = dir->i_sb->s_dev;
__entry->dir = NFS_FILEID(dir);
__entry->error = error < 0 ? -error : 0;
- __assign_str(name, dentry->d_name.name);
+ __assign_str(name);
),
TP_printk(
@@ -747,7 +747,7 @@ TRACE_EVENT(nfs_link_enter,
__entry->dev = inode->i_sb->s_dev;
__entry->fileid = NFS_FILEID(inode);
__entry->dir = NFS_FILEID(dir);
- __assign_str(name, dentry->d_name.name);
+ __assign_str(name);
),
TP_printk(
@@ -783,7 +783,7 @@ TRACE_EVENT(nfs_link_exit,
__entry->fileid = NFS_FILEID(inode);
__entry->dir = NFS_FILEID(dir);
__entry->error = error < 0 ? -error : 0;
- __assign_str(name, dentry->d_name.name);
+ __assign_str(name);
),
TP_printk(
@@ -819,8 +819,8 @@ DECLARE_EVENT_CLASS(nfs_rename_event,
__entry->dev = old_dir->i_sb->s_dev;
__entry->old_dir = NFS_FILEID(old_dir);
__entry->new_dir = NFS_FILEID(new_dir);
- __assign_str(old_name, old_dentry->d_name.name);
- __assign_str(new_name, new_dentry->d_name.name);
+ __assign_str(old_name);
+ __assign_str(new_name);
),
TP_printk(
@@ -868,8 +868,8 @@ DECLARE_EVENT_CLASS(nfs_rename_event_done,
__entry->error = -error;
__entry->old_dir = NFS_FILEID(old_dir);
__entry->new_dir = NFS_FILEID(new_dir);
- __assign_str(old_name, old_dentry->d_name.name);
- __assign_str(new_name, new_dentry->d_name.name);
+ __assign_str(old_name);
+ __assign_str(new_name);
),
TP_printk(
@@ -939,10 +939,11 @@ TRACE_EVENT(nfs_sillyrename_unlink,
DECLARE_EVENT_CLASS(nfs_folio_event,
TP_PROTO(
const struct inode *inode,
- struct folio *folio
+ loff_t offset,
+ size_t count
),
- TP_ARGS(inode, folio),
+ TP_ARGS(inode, offset, count),
TP_STRUCT__entry(
__field(dev_t, dev)
@@ -950,7 +951,7 @@ DECLARE_EVENT_CLASS(nfs_folio_event,
__field(u64, fileid)
__field(u64, version)
__field(loff_t, offset)
- __field(u32, count)
+ __field(size_t, count)
),
TP_fast_assign(
@@ -960,13 +961,13 @@ DECLARE_EVENT_CLASS(nfs_folio_event,
__entry->fileid = nfsi->fileid;
__entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
__entry->version = inode_peek_iversion_raw(inode);
- __entry->offset = folio_file_pos(folio);
- __entry->count = nfs_folio_length(folio);
+ __entry->offset = offset,
+ __entry->count = count;
),
TP_printk(
"fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu "
- "offset=%lld count=%u",
+ "offset=%lld count=%zu",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle, __entry->version,
@@ -978,18 +979,20 @@ DECLARE_EVENT_CLASS(nfs_folio_event,
DEFINE_EVENT(nfs_folio_event, name, \
TP_PROTO( \
const struct inode *inode, \
- struct folio *folio \
+ loff_t offset, \
+ size_t count \
), \
- TP_ARGS(inode, folio))
+ TP_ARGS(inode, offset, count))
DECLARE_EVENT_CLASS(nfs_folio_event_done,
TP_PROTO(
const struct inode *inode,
- struct folio *folio,
+ loff_t offset,
+ size_t count,
int ret
),
- TP_ARGS(inode, folio, ret),
+ TP_ARGS(inode, offset, count, ret),
TP_STRUCT__entry(
__field(dev_t, dev)
@@ -998,7 +1001,7 @@ DECLARE_EVENT_CLASS(nfs_folio_event_done,
__field(u64, fileid)
__field(u64, version)
__field(loff_t, offset)
- __field(u32, count)
+ __field(size_t, count)
),
TP_fast_assign(
@@ -1008,14 +1011,14 @@ DECLARE_EVENT_CLASS(nfs_folio_event_done,
__entry->fileid = nfsi->fileid;
__entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
__entry->version = inode_peek_iversion_raw(inode);
- __entry->offset = folio_file_pos(folio);
- __entry->count = nfs_folio_length(folio);
+ __entry->offset = offset,
+ __entry->count = count,
__entry->ret = ret;
),
TP_printk(
"fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu "
- "offset=%lld count=%u ret=%d",
+ "offset=%lld count=%zu ret=%d",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle, __entry->version,
@@ -1027,10 +1030,11 @@ DECLARE_EVENT_CLASS(nfs_folio_event_done,
DEFINE_EVENT(nfs_folio_event_done, name, \
TP_PROTO( \
const struct inode *inode, \
- struct folio *folio, \
+ loff_t offset, \
+ size_t count, \
int ret \
), \
- TP_ARGS(inode, folio, ret))
+ TP_ARGS(inode, offset, count, ret))
DEFINE_NFS_FOLIO_EVENT(nfs_aop_readpage);
DEFINE_NFS_FOLIO_EVENT_DONE(nfs_aop_readpage_done);
@@ -1636,8 +1640,8 @@ TRACE_EVENT(nfs_mount_assign,
),
TP_fast_assign(
- __assign_str(option, option);
- __assign_str(value, value);
+ __assign_str(option);
+ __assign_str(value);
),
TP_printk("option %s=%s",
@@ -1657,7 +1661,7 @@ TRACE_EVENT(nfs_mount_option,
),
TP_fast_assign(
- __assign_str(option, param->key);
+ __assign_str(option);
),
TP_printk("option %s", __get_str(option))
@@ -1675,12 +1679,41 @@ TRACE_EVENT(nfs_mount_path,
),
TP_fast_assign(
- __assign_str(path, path);
+ __assign_str(path);
),
TP_printk("path='%s'", __get_str(path))
);
+TRACE_EVENT(nfs_local_open_fh,
+ TP_PROTO(
+ const struct nfs_fh *fh,
+ fmode_t fmode,
+ int error
+ ),
+
+ TP_ARGS(fh, fmode, error),
+
+ TP_STRUCT__entry(
+ __field(int, error)
+ __field(u32, fhandle)
+ __field(unsigned int, fmode)
+ ),
+
+ TP_fast_assign(
+ __entry->error = error;
+ __entry->fhandle = nfs_fhandle_hash(fh);
+ __entry->fmode = (__force unsigned int)fmode;
+ ),
+
+ TP_printk(
+ "error=%d fhandle=0x%08x mode=%s",
+ __entry->error,
+ __entry->fhandle,
+ show_fs_fmode_flags(__entry->fmode)
+ )
+);
+
DECLARE_EVENT_CLASS(nfs_xdr_event,
TP_PROTO(
const struct xdr_stream *xdr,
@@ -1710,9 +1743,8 @@ DECLARE_EVENT_CLASS(nfs_xdr_event,
__entry->xid = be32_to_cpu(rqstp->rq_xid);
__entry->version = task->tk_client->cl_vers;
__entry->error = error;
- __assign_str(program,
- task->tk_client->cl_program->name);
- __assign_str(procedure, task->tk_msg.rpc_proc->p_name);
+ __assign_str(program);
+ __assign_str(procedure);
),
TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 6efb5068c116..11968dcb7243 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -188,102 +188,6 @@ nfs_async_iocounter_wait(struct rpc_task *task, struct nfs_lock_context *l_ctx)
EXPORT_SYMBOL_GPL(nfs_async_iocounter_wait);
/*
- * nfs_page_lock_head_request - page lock the head of the page group
- * @req: any member of the page group
- */
-struct nfs_page *
-nfs_page_group_lock_head(struct nfs_page *req)
-{
- struct nfs_page *head = req->wb_head;
-
- while (!nfs_lock_request(head)) {
- int ret = nfs_wait_on_request(head);
- if (ret < 0)
- return ERR_PTR(ret);
- }
- if (head != req)
- kref_get(&head->wb_kref);
- return head;
-}
-
-/*
- * nfs_unroll_locks - unlock all newly locked reqs and wait on @req
- * @head: head request of page group, must be holding head lock
- * @req: request that couldn't lock and needs to wait on the req bit lock
- *
- * This is a helper function for nfs_lock_and_join_requests
- * returns 0 on success, < 0 on error.
- */
-static void
-nfs_unroll_locks(struct nfs_page *head, struct nfs_page *req)
-{
- struct nfs_page *tmp;
-
- /* relinquish all the locks successfully grabbed this run */
- for (tmp = head->wb_this_page ; tmp != req; tmp = tmp->wb_this_page) {
- if (!kref_read(&tmp->wb_kref))
- continue;
- nfs_unlock_and_release_request(tmp);
- }
-}
-
-/*
- * nfs_page_group_lock_subreq - try to lock a subrequest
- * @head: head request of page group
- * @subreq: request to lock
- *
- * This is a helper function for nfs_lock_and_join_requests which
- * must be called with the head request and page group both locked.
- * On error, it returns with the page group unlocked.
- */
-static int
-nfs_page_group_lock_subreq(struct nfs_page *head, struct nfs_page *subreq)
-{
- int ret;
-
- if (!kref_get_unless_zero(&subreq->wb_kref))
- return 0;
- while (!nfs_lock_request(subreq)) {
- nfs_page_group_unlock(head);
- ret = nfs_wait_on_request(subreq);
- if (!ret)
- ret = nfs_page_group_lock(head);
- if (ret < 0) {
- nfs_unroll_locks(head, subreq);
- nfs_release_request(subreq);
- return ret;
- }
- }
- return 0;
-}
-
-/*
- * nfs_page_group_lock_subrequests - try to lock the subrequests
- * @head: head request of page group
- *
- * This is a helper function for nfs_lock_and_join_requests which
- * must be called with the head request locked.
- */
-int nfs_page_group_lock_subrequests(struct nfs_page *head)
-{
- struct nfs_page *subreq;
- int ret;
-
- ret = nfs_page_group_lock(head);
- if (ret < 0)
- return ret;
- /* lock each request in the page group */
- for (subreq = head->wb_this_page; subreq != head;
- subreq = subreq->wb_this_page) {
- ret = nfs_page_group_lock_subreq(head, subreq);
- if (ret < 0)
- return ret;
- }
- nfs_page_group_unlock(head);
- return 0;
-}
-
-/*
* nfs_page_set_headlock - set the request PG_HEADLOCK
* @req: request that is to be locked
*
@@ -569,7 +473,7 @@ struct nfs_page *nfs_page_create_from_folio(struct nfs_open_context *ctx,
if (IS_ERR(l_ctx))
return ERR_CAST(l_ctx);
- ret = nfs_page_create(l_ctx, offset, folio_index(folio), offset, count);
+ ret = nfs_page_create(l_ctx, offset, folio->index, offset, count);
if (!IS_ERR(ret)) {
nfs_page_assign_folio(ret, folio);
nfs_page_group_init(ret, NULL);
@@ -694,25 +598,6 @@ void nfs_release_request(struct nfs_page *req)
}
EXPORT_SYMBOL_GPL(nfs_release_request);
-/**
- * nfs_wait_on_request - Wait for a request to complete.
- * @req: request to wait upon.
- *
- * Interruptible by fatal signals only.
- * The user is responsible for holding a count on the request.
- */
-int
-nfs_wait_on_request(struct nfs_page *req)
-{
- if (!test_bit(PG_BUSY, &req->wb_flags))
- return 0;
- set_bit(PG_CONTENDED2, &req->wb_flags);
- smp_mb__after_atomic();
- return wait_on_bit_io(&req->wb_flags, PG_BUSY,
- TASK_UNINTERRUPTIBLE);
-}
-EXPORT_SYMBOL_GPL(nfs_wait_on_request);
-
/*
* nfs_generic_pg_test - determine if requests can be coalesced
* @desc: pointer to descriptor
@@ -846,7 +731,8 @@ static void nfs_pgio_prepare(struct rpc_task *task, void *calldata)
int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr,
const struct cred *cred, const struct nfs_rpc_ops *rpc_ops,
- const struct rpc_call_ops *call_ops, int how, int flags)
+ const struct rpc_call_ops *call_ops, int how, int flags,
+ struct nfsd_file *localio)
{
struct rpc_task *task;
struct rpc_message msg = {
@@ -876,6 +762,10 @@ int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr,
hdr->args.count,
(unsigned long long)hdr->args.offset);
+ if (localio)
+ return nfs_local_doio(NFS_SERVER(hdr->inode)->nfs_client,
+ localio, hdr, call_ops);
+
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
@@ -1068,6 +958,13 @@ static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc)
nfs_pgheader_init(desc, hdr, nfs_pgio_header_free);
ret = nfs_generic_pgio(desc, hdr);
if (ret == 0) {
+ struct nfs_client *clp = NFS_SERVER(hdr->inode)->nfs_client;
+
+ struct nfsd_file *localio =
+ nfs_local_open_fh(clp, hdr->cred, hdr->args.fh,
+ &hdr->args.context->nfl,
+ hdr->args.context->mode);
+
if (NFS_SERVER(hdr->inode)->nfs_client->cl_minorversion)
task_flags = RPC_TASK_MOVEABLE;
ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode),
@@ -1076,7 +973,8 @@ static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc)
NFS_PROTO(hdr->inode),
desc->pg_rpc_callops,
desc->pg_ioflags,
- RPC_TASK_CRED_NOREF | task_flags);
+ RPC_TASK_CRED_NOREF | task_flags,
+ localio);
}
return ret;
}
@@ -1545,6 +1443,11 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index)
continue;
} else if (index == prev->wb_index + 1)
continue;
+ /*
+ * We will submit more requests after these. Indicate
+ * this to the underlying layers.
+ */
+ desc->pg_moreio = 1;
nfs_pageio_complete(desc);
break;
}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 0c0fed1ecd0b..5f582713bf05 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -61,6 +61,7 @@ static void pnfs_free_returned_lsegs(struct pnfs_layout_hdr *lo,
u32 seq);
static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg,
struct list_head *tmp_list);
+static int pnfs_layout_return_on_reboot(struct pnfs_layout_hdr *lo);
/* Return the registered pnfs layout driver module matching given id */
static struct pnfs_layoutdriver_type *
@@ -476,6 +477,18 @@ pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo,
return !list_empty(&lo->plh_segs);
}
+static int pnfs_mark_layout_stateid_return(struct pnfs_layout_hdr *lo,
+ struct list_head *lseg_list,
+ enum pnfs_iomode iomode, u32 seq)
+{
+ struct pnfs_layout_range range = {
+ .iomode = iomode,
+ .length = NFS4_MAX_UINT64,
+ };
+
+ return pnfs_mark_matching_lsegs_return(lo, lseg_list, &range, seq);
+}
+
static int
pnfs_iomode_to_fail_bit(u32 iomode)
{
@@ -846,8 +859,6 @@ pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp,
break;
inode = pnfs_grab_inode_layout_hdr(lo);
if (inode != NULL) {
- if (test_and_clear_bit(NFS_LAYOUT_HASHED, &lo->plh_flags))
- list_del_rcu(&lo->plh_layouts);
if (pnfs_layout_add_bulk_destroy_list(inode,
layout_list))
continue;
@@ -868,7 +879,7 @@ pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp,
static int
pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,
- bool is_bulk_recall)
+ enum pnfs_layout_destroy_mode mode)
{
struct pnfs_layout_hdr *lo;
struct inode *inode;
@@ -886,8 +897,11 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,
spin_lock(&inode->i_lock);
list_del_init(&lo->plh_bulk_destroy);
- if (pnfs_mark_layout_stateid_invalid(lo, &lseg_list)) {
- if (is_bulk_recall)
+ if (mode == PNFS_LAYOUT_FILE_BULK_RETURN) {
+ pnfs_mark_layout_stateid_return(lo, &lseg_list,
+ IOMODE_ANY, 0);
+ } else if (pnfs_mark_layout_stateid_invalid(lo, &lseg_list)) {
+ if (mode == PNFS_LAYOUT_BULK_RETURN)
set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
ret = -EAGAIN;
}
@@ -901,10 +915,8 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,
return ret;
}
-int
-pnfs_destroy_layouts_byfsid(struct nfs_client *clp,
- struct nfs_fsid *fsid,
- bool is_recall)
+int pnfs_layout_destroy_byfsid(struct nfs_client *clp, struct nfs_fsid *fsid,
+ enum pnfs_layout_destroy_mode mode)
{
struct nfs_server *server;
LIST_HEAD(layout_list);
@@ -923,33 +935,40 @@ restart:
rcu_read_unlock();
spin_unlock(&clp->cl_lock);
- if (list_empty(&layout_list))
- return 0;
- return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall);
+ return pnfs_layout_free_bulk_destroy_list(&layout_list, mode);
}
-int
-pnfs_destroy_layouts_byclid(struct nfs_client *clp,
- bool is_recall)
+static void pnfs_layout_build_destroy_list_byclient(struct nfs_client *clp,
+ struct list_head *list)
{
struct nfs_server *server;
- LIST_HEAD(layout_list);
spin_lock(&clp->cl_lock);
rcu_read_lock();
restart:
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
- if (pnfs_layout_bulk_destroy_byserver_locked(clp,
- server,
- &layout_list) != 0)
+ if (pnfs_layout_bulk_destroy_byserver_locked(clp, server,
+ list) != 0)
goto restart;
}
rcu_read_unlock();
spin_unlock(&clp->cl_lock);
+}
- if (list_empty(&layout_list))
- return 0;
- return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall);
+static int pnfs_layout_do_destroy_byclid(struct nfs_client *clp,
+ struct list_head *list,
+ enum pnfs_layout_destroy_mode mode)
+{
+ pnfs_layout_build_destroy_list_byclient(clp, list);
+ return pnfs_layout_free_bulk_destroy_list(list, mode);
+}
+
+int pnfs_layout_destroy_byclid(struct nfs_client *clp,
+ enum pnfs_layout_destroy_mode mode)
+{
+ LIST_HEAD(layout_list);
+
+ return pnfs_layout_do_destroy_byclid(clp, &layout_list, mode);
}
/*
@@ -962,7 +981,68 @@ pnfs_destroy_all_layouts(struct nfs_client *clp)
nfs4_deviceid_mark_client_invalid(clp);
nfs4_deviceid_purge_client(clp);
- pnfs_destroy_layouts_byclid(clp, false);
+ pnfs_layout_destroy_byclid(clp, PNFS_LAYOUT_INVALIDATE);
+}
+
+static void pnfs_layout_build_recover_list_byclient(struct nfs_client *clp,
+ struct list_head *list)
+{
+ struct nfs_server *server;
+
+ spin_lock(&clp->cl_lock);
+ rcu_read_lock();
+restart:
+ list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+ if (!(server->caps & NFS_CAP_REBOOT_LAYOUTRETURN))
+ continue;
+ if (pnfs_layout_bulk_destroy_byserver_locked(clp, server,
+ list) != 0)
+ goto restart;
+ }
+ rcu_read_unlock();
+ spin_unlock(&clp->cl_lock);
+}
+
+static int pnfs_layout_bulk_list_reboot(struct list_head *list)
+{
+ struct pnfs_layout_hdr *lo;
+ struct nfs_server *server;
+ int ret;
+
+ list_for_each_entry(lo, list, plh_bulk_destroy) {
+ server = NFS_SERVER(lo->plh_inode);
+ ret = pnfs_layout_return_on_reboot(lo);
+ switch (ret) {
+ case 0:
+ continue;
+ case -NFS4ERR_BAD_STATEID:
+ server->caps &= ~NFS_CAP_REBOOT_LAYOUTRETURN;
+ break;
+ case -NFS4ERR_NO_GRACE:
+ break;
+ default:
+ goto err;
+ }
+ break;
+ }
+ return 0;
+err:
+ return ret;
+}
+
+int pnfs_layout_handle_reboot(struct nfs_client *clp)
+{
+ LIST_HEAD(list);
+ int ret = 0, ret2;
+
+ pnfs_layout_build_recover_list_byclient(clp, &list);
+ if (!list_empty(&list))
+ ret = pnfs_layout_bulk_list_reboot(&list);
+ ret2 = pnfs_layout_do_destroy_byclid(clp, &list,
+ PNFS_LAYOUT_INVALIDATE);
+ if (!ret)
+ ret = ret2;
+ return (ret == 0) ? 0 : -EAGAIN;
}
static void
@@ -1163,6 +1243,38 @@ static void pnfs_clear_layoutcommit(struct inode *inode,
}
}
+static void
+pnfs_layoutreturn_retry_later_locked(struct pnfs_layout_hdr *lo,
+ const nfs4_stateid *arg_stateid,
+ const struct pnfs_layout_range *range)
+{
+ const struct pnfs_layout_segment *lseg;
+ u32 seq = be32_to_cpu(arg_stateid->seqid);
+
+ if (pnfs_layout_is_valid(lo) &&
+ nfs4_stateid_match_other(&lo->plh_stateid, arg_stateid)) {
+ list_for_each_entry(lseg, &lo->plh_return_segs, pls_list) {
+ if (pnfs_seqid_is_newer(lseg->pls_seq, seq) ||
+ !pnfs_should_free_range(&lseg->pls_range, range))
+ continue;
+ pnfs_set_plh_return_info(lo, range->iomode, seq);
+ break;
+ }
+ }
+}
+
+void pnfs_layoutreturn_retry_later(struct pnfs_layout_hdr *lo,
+ const nfs4_stateid *arg_stateid,
+ const struct pnfs_layout_range *range)
+{
+ struct inode *inode = lo->plh_inode;
+
+ spin_lock(&inode->i_lock);
+ pnfs_layoutreturn_retry_later_locked(lo, arg_stateid, range);
+ pnfs_clear_layoutreturn_waitbit(lo);
+ spin_unlock(&inode->i_lock);
+}
+
void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo,
const nfs4_stateid *arg_stateid,
const struct pnfs_layout_range *range,
@@ -1172,10 +1284,9 @@ void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo,
LIST_HEAD(freeme);
spin_lock(&inode->i_lock);
- if (!pnfs_layout_is_valid(lo) ||
- !nfs4_stateid_match_other(&lo->plh_stateid, arg_stateid))
+ if (!nfs4_stateid_match_other(&lo->plh_stateid, arg_stateid))
goto out_unlock;
- if (stateid) {
+ if (stateid && pnfs_layout_is_valid(lo)) {
u32 seq = be32_to_cpu(arg_stateid->seqid);
pnfs_mark_matching_lsegs_invalid(lo, &freeme, range, seq);
@@ -1197,7 +1308,7 @@ pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo,
enum pnfs_iomode *iomode)
{
/* Serialise LAYOUTGET/LAYOUTRETURN */
- if (atomic_read(&lo->plh_outstanding) != 0)
+ if (atomic_read(&lo->plh_outstanding) != 0 && lo->plh_return_seq == 0)
return false;
if (test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags))
return false;
@@ -1239,7 +1350,7 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo,
const nfs4_stateid *stateid,
const struct cred **pcred,
enum pnfs_iomode iomode,
- bool sync)
+ unsigned int flags)
{
struct inode *ino = lo->plh_inode;
struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld;
@@ -1266,33 +1377,21 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo,
if (ld->prepare_layoutreturn)
ld->prepare_layoutreturn(&lrp->args);
- status = nfs4_proc_layoutreturn(lrp, sync);
+ status = nfs4_proc_layoutreturn(lrp, flags);
out:
dprintk("<-- %s status: %d\n", __func__, status);
return status;
}
-static bool
-pnfs_layout_segments_returnable(struct pnfs_layout_hdr *lo,
- enum pnfs_iomode iomode,
- u32 seq)
-{
- struct pnfs_layout_range recall_range = {
- .length = NFS4_MAX_UINT64,
- .iomode = iomode,
- };
- return pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs,
- &recall_range, seq) != -EBUSY;
-}
-
/* Return true if layoutreturn is needed */
static bool
pnfs_layout_need_return(struct pnfs_layout_hdr *lo)
{
if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
return false;
- return pnfs_layout_segments_returnable(lo, lo->plh_return_iomode,
- lo->plh_return_seq);
+ return pnfs_mark_layout_stateid_return(lo, &lo->plh_return_segs,
+ lo->plh_return_iomode,
+ lo->plh_return_seq) != EBUSY;
}
static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo)
@@ -1312,7 +1411,8 @@ static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo)
spin_unlock(&inode->i_lock);
if (send) {
/* Send an async layoutreturn so we dont deadlock */
- pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, false);
+ pnfs_send_layoutreturn(lo, &stateid, &cred, iomode,
+ PNFS_FL_LAYOUTRETURN_ASYNC);
}
} else
spin_unlock(&inode->i_lock);
@@ -1379,7 +1479,8 @@ _pnfs_return_layout(struct inode *ino)
send = pnfs_prepare_layoutreturn(lo, &stateid, &cred, NULL);
spin_unlock(&ino->i_lock);
if (send)
- status = pnfs_send_layoutreturn(lo, &stateid, &cred, IOMODE_ANY, true);
+ status = pnfs_send_layoutreturn(lo, &stateid, &cred, IOMODE_ANY,
+ 0);
out_wait_layoutreturn:
wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN, TASK_UNINTERRUPTIBLE);
out_put_layout_hdr:
@@ -1417,6 +1518,24 @@ pnfs_commit_and_return_layout(struct inode *inode)
return ret;
}
+static int pnfs_layout_return_on_reboot(struct pnfs_layout_hdr *lo)
+{
+ struct inode *inode = lo->plh_inode;
+ const struct cred *cred;
+
+ spin_lock(&inode->i_lock);
+ if (!pnfs_layout_is_valid(lo)) {
+ spin_unlock(&inode->i_lock);
+ return 0;
+ }
+ cred = get_cred(lo->plh_lc_cred);
+ pnfs_get_layout_hdr(lo);
+ spin_unlock(&inode->i_lock);
+
+ return pnfs_send_layoutreturn(lo, &zero_stateid, &cred, IOMODE_ANY,
+ PNFS_FL_LAYOUTRETURN_PRIVILEGED);
+}
+
bool pnfs_roc(struct inode *ino,
struct nfs4_layoutreturn_args *args,
struct nfs4_layoutreturn_res *res,
@@ -1520,7 +1639,7 @@ out_noroc:
return true;
}
if (layoutreturn)
- pnfs_send_layoutreturn(lo, &stateid, &lc_cred, iomode, true);
+ pnfs_send_layoutreturn(lo, &stateid, &lc_cred, iomode, 0);
pnfs_put_layout_hdr(lo);
return false;
}
@@ -1570,8 +1689,7 @@ int pnfs_roc_done(struct rpc_task *task, struct nfs4_layoutreturn_args **argpp,
}
void pnfs_roc_release(struct nfs4_layoutreturn_args *args,
- struct nfs4_layoutreturn_res *res,
- int ret)
+ struct nfs4_layoutreturn_res *res, int ret)
{
struct pnfs_layout_hdr *lo = args->layout;
struct inode *inode = args->inode;
@@ -1579,11 +1697,13 @@ void pnfs_roc_release(struct nfs4_layoutreturn_args *args,
struct nfs4_xdr_opaque_data *ld_private = args->ld_private;
switch (ret) {
+ case -NFS4ERR_BADSESSION:
+ case -NFS4ERR_DEADSESSION:
+ case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
case -NFS4ERR_NOMATCHING_LAYOUT:
spin_lock(&inode->i_lock);
- if (pnfs_layout_is_valid(lo) &&
- nfs4_stateid_match_other(&args->stateid, &lo->plh_stateid))
- pnfs_set_plh_return_info(lo, args->range.iomode, 0);
+ pnfs_layoutreturn_retry_later_locked(lo, &args->stateid,
+ &args->range);
pnfs_clear_layoutreturn_waitbit(lo);
spin_unlock(&inode->i_lock);
break;
@@ -1999,6 +2119,14 @@ pnfs_update_layout(struct inode *ino,
}
lookup_again:
+ if (!nfs4_valid_open_stateid(ctx->state)) {
+ trace_pnfs_update_layout(ino, pos, count,
+ iomode, lo, lseg,
+ PNFS_UPDATE_LAYOUT_INVALID_OPEN);
+ lseg = ERR_PTR(-EIO);
+ goto out;
+ }
+
lseg = ERR_PTR(nfs4_client_recover_expired_lease(clp));
if (IS_ERR(lseg))
goto out;
@@ -2558,7 +2686,8 @@ pnfs_mark_layout_for_return(struct inode *inode,
return_now = pnfs_prepare_layoutreturn(lo, &stateid, &cred, &iomode);
spin_unlock(&inode->i_lock);
if (return_now)
- pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, false);
+ pnfs_send_layoutreturn(lo, &stateid, &cred, iomode,
+ PNFS_FL_LAYOUTRETURN_ASYNC);
} else {
spin_unlock(&inode->i_lock);
nfs_commit_inode(inode, 0);
@@ -2674,7 +2803,8 @@ restart:
}
spin_unlock(&inode->i_lock);
rcu_read_unlock();
- pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, false);
+ pnfs_send_layoutreturn(lo, &stateid, &cred, iomode,
+ PNFS_FL_LAYOUTRETURN_ASYNC);
pnfs_put_layout_hdr(lo);
cond_resched();
goto restart;
@@ -2697,38 +2827,28 @@ pnfs_layout_return_unused_byclid(struct nfs_client *clp,
&range);
}
+/* Check if we have we have a valid layout but if there isn't an intersection
+ * between the request and the pgio->pg_lseg, put this pgio->pg_lseg away.
+ */
void
-pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio)
+pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio,
+ struct nfs_page *req)
{
if (pgio->pg_lseg == NULL ||
- test_bit(NFS_LSEG_VALID, &pgio->pg_lseg->pls_flags))
+ (test_bit(NFS_LSEG_VALID, &pgio->pg_lseg->pls_flags) &&
+ pnfs_lseg_request_intersecting(pgio->pg_lseg, req)))
return;
pnfs_put_lseg(pgio->pg_lseg);
pgio->pg_lseg = NULL;
}
EXPORT_SYMBOL_GPL(pnfs_generic_pg_check_layout);
-/*
- * Check for any intersection between the request and the pgio->pg_lseg,
- * and if none, put this pgio->pg_lseg away.
- */
-void
-pnfs_generic_pg_check_range(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
-{
- if (pgio->pg_lseg && !pnfs_lseg_request_intersecting(pgio->pg_lseg, req)) {
- pnfs_put_lseg(pgio->pg_lseg);
- pgio->pg_lseg = NULL;
- }
-}
-EXPORT_SYMBOL_GPL(pnfs_generic_pg_check_range);
-
void
pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
{
u64 rd_size;
- pnfs_generic_pg_check_layout(pgio);
- pnfs_generic_pg_check_range(pgio, req);
+ pnfs_generic_pg_check_layout(pgio, req);
if (pgio->pg_lseg == NULL) {
if (pgio->pg_dreq == NULL)
rd_size = i_size_read(pgio->pg_inode) - req_offset(req);
@@ -2758,8 +2878,7 @@ void
pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req, u64 wb_size)
{
- pnfs_generic_pg_check_layout(pgio);
- pnfs_generic_pg_check_range(pgio, req);
+ pnfs_generic_pg_check_layout(pgio, req);
if (pgio->pg_lseg == NULL) {
pgio->pg_lseg =
pnfs_update_layout(pgio->pg_inode, nfs_req_openctx(req),
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index db57a85500ee..30d2613e912b 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -118,6 +118,12 @@ enum layoutdriver_policy_flags {
PNFS_LAYOUTGET_ON_OPEN = 1 << 3,
};
+enum pnfs_layout_destroy_mode {
+ PNFS_LAYOUT_INVALIDATE = 0,
+ PNFS_LAYOUT_BULK_RETURN,
+ PNFS_LAYOUT_FILE_BULK_RETURN,
+};
+
struct nfs4_deviceid_node;
/* Per-layout driver specific registration structure */
@@ -127,7 +133,6 @@ struct pnfs_layoutdriver_type {
const char *name;
struct module *owner;
unsigned flags;
- unsigned max_deviceinfo_size;
unsigned max_layoutget_response;
int (*set_layoutdriver) (struct nfs_server *, const struct nfs_fh *);
@@ -193,8 +198,6 @@ struct pnfs_commit_ops {
int max);
void (*recover_commit_reqs) (struct list_head *list,
struct nfs_commit_info *cinfo);
- struct nfs_page * (*search_commit_reqs)(struct nfs_commit_info *cinfo,
- struct folio *folio);
};
struct pnfs_layout_hdr {
@@ -242,6 +245,9 @@ extern const struct pnfs_layoutdriver_type *pnfs_find_layoutdriver(u32 id);
extern void pnfs_put_layoutdriver(const struct pnfs_layoutdriver_type *ld);
/* nfs4proc.c */
+#define PNFS_FL_LAYOUTRETURN_ASYNC (1U << 0)
+#define PNFS_FL_LAYOUTRETURN_PRIVILEGED (1U << 1)
+
extern size_t max_response_pages(struct nfs_server *server);
extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
struct pnfs_device *dev,
@@ -249,7 +255,8 @@ extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
extern struct pnfs_layout_segment *
nfs4_proc_layoutget(struct nfs4_layoutget *lgp,
struct nfs4_exception *exception);
-extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync);
+extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp,
+ unsigned int flags);
/* pnfs.c */
void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo);
@@ -257,8 +264,7 @@ void pnfs_put_lseg(struct pnfs_layout_segment *lseg);
void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, struct nfs_fsinfo *);
void unset_pnfs_layoutdriver(struct nfs_server *);
-void pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio);
-void pnfs_generic_pg_check_range(struct nfs_pageio_descriptor *pgio, struct nfs_page *req);
+void pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio, struct nfs_page *req);
void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *);
int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc);
void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
@@ -274,11 +280,10 @@ void pnfs_free_lseg_list(struct list_head *tmp_list);
void pnfs_destroy_layout(struct nfs_inode *);
void pnfs_destroy_layout_final(struct nfs_inode *);
void pnfs_destroy_all_layouts(struct nfs_client *);
-int pnfs_destroy_layouts_byfsid(struct nfs_client *clp,
- struct nfs_fsid *fsid,
- bool is_recall);
-int pnfs_destroy_layouts_byclid(struct nfs_client *clp,
- bool is_recall);
+int pnfs_layout_destroy_byfsid(struct nfs_client *clp, struct nfs_fsid *fsid,
+ enum pnfs_layout_destroy_mode mode);
+int pnfs_layout_destroy_byclid(struct nfs_client *clp,
+ enum pnfs_layout_destroy_mode mode);
bool nfs4_layout_refresh_old_stateid(nfs4_stateid *dst,
struct pnfs_layout_range *dst_range,
struct inode *inode);
@@ -324,6 +329,9 @@ struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
enum pnfs_iomode iomode,
bool strict_iomode,
gfp_t gfp_flags);
+void pnfs_layoutreturn_retry_later(struct pnfs_layout_hdr *lo,
+ const nfs4_stateid *arg_stateid,
+ const struct pnfs_layout_range *range);
void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo,
const nfs4_stateid *arg_stateid,
const struct pnfs_layout_range *range,
@@ -345,6 +353,7 @@ void pnfs_error_mark_layout_for_return(struct inode *inode,
struct pnfs_layout_segment *lseg);
void pnfs_layout_return_unused_byclid(struct nfs_client *clp,
enum pnfs_iomode iomode);
+int pnfs_layout_handle_reboot(struct nfs_client *clp);
/* nfs4_deviceid_flags */
enum {
@@ -397,8 +406,6 @@ void pnfs_generic_prepare_to_resend_writes(struct nfs_commit_data *data);
void pnfs_generic_rw_release(void *data);
void pnfs_generic_recover_commit_reqs(struct list_head *dst,
struct nfs_commit_info *cinfo);
-struct nfs_page *pnfs_generic_search_commit_reqs(struct nfs_commit_info *cinfo,
- struct folio *folio);
int pnfs_generic_commit_pagelist(struct inode *inode,
struct list_head *mds_pages,
int how,
@@ -558,17 +565,6 @@ pnfs_recover_commit_reqs(struct list_head *head, struct nfs_commit_info *cinfo)
fl_cinfo->ops->recover_commit_reqs(head, cinfo);
}
-static inline struct nfs_page *
-pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo,
- struct folio *folio)
-{
- struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds;
-
- if (!fl_cinfo->ops || !fl_cinfo->ops->search_commit_reqs)
- return NULL;
- return fl_cinfo->ops->search_commit_reqs(cinfo, folio);
-}
-
/* Should the pNFS client commit and return the layout upon a setattr */
static inline bool
pnfs_ld_layoutret_on_setattr(struct inode *inode)
@@ -726,6 +722,11 @@ static inline void pnfs_destroy_layout_final(struct nfs_inode *nfsi)
{
}
+static inline int pnfs_layout_handle_reboot(struct nfs_client *clp)
+{
+ return 0;
+}
+
static inline struct pnfs_layout_segment *
pnfs_get_lseg(struct pnfs_layout_segment *lseg)
{
@@ -865,13 +866,6 @@ pnfs_recover_commit_reqs(struct list_head *head, struct nfs_commit_info *cinfo)
{
}
-static inline struct nfs_page *
-pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo,
- struct folio *folio)
-{
- return NULL;
-}
-
static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
{
return 0;
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
index 178001c90156..bf0f2d67e96c 100644
--- a/fs/nfs/pnfs_dev.c
+++ b/fs/nfs/pnfs_dev.c
@@ -110,9 +110,6 @@ nfs4_get_device_info(struct nfs_server *server,
* GETDEVICEINFO's maxcount
*/
max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
- if (server->pnfs_curr_ld->max_deviceinfo_size &&
- server->pnfs_curr_ld->max_deviceinfo_size < max_resp_sz)
- max_resp_sz = server->pnfs_curr_ld->max_deviceinfo_size;
max_pages = nfs_page_array_len(0, max_resp_sz);
dprintk("%s: server %p max_resp_sz %u max_pages %d\n",
__func__, server, max_resp_sz, max_pages);
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index afd23910f3bf..dbef837e871a 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -351,53 +351,6 @@ void pnfs_generic_recover_commit_reqs(struct list_head *dst,
}
EXPORT_SYMBOL_GPL(pnfs_generic_recover_commit_reqs);
-static struct nfs_page *
-pnfs_bucket_search_commit_reqs(struct pnfs_commit_bucket *buckets,
- unsigned int nbuckets, struct folio *folio)
-{
- struct nfs_page *req;
- struct pnfs_commit_bucket *b;
- unsigned int i;
-
- /* Linearly search the commit lists for each bucket until a matching
- * request is found */
- for (i = 0, b = buckets; i < nbuckets; i++, b++) {
- list_for_each_entry(req, &b->written, wb_list) {
- if (nfs_page_to_folio(req) == folio)
- return req->wb_head;
- }
- list_for_each_entry(req, &b->committing, wb_list) {
- if (nfs_page_to_folio(req) == folio)
- return req->wb_head;
- }
- }
- return NULL;
-}
-
-/* pnfs_generic_search_commit_reqs - Search lists in @cinfo for the head request
- * for @folio
- * @cinfo - commit info for current inode
- * @folio - page to search for matching head request
- *
- * Return: the head request if one is found, otherwise %NULL.
- */
-struct nfs_page *pnfs_generic_search_commit_reqs(struct nfs_commit_info *cinfo,
- struct folio *folio)
-{
- struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds;
- struct pnfs_commit_array *array;
- struct nfs_page *req;
-
- list_for_each_entry(array, &fl_cinfo->commits, cinfo_list) {
- req = pnfs_bucket_search_commit_reqs(array->buckets,
- array->nbuckets, folio);
- if (req)
- return req;
- }
- return NULL;
-}
-EXPORT_SYMBOL_GPL(pnfs_generic_search_commit_reqs);
-
static struct pnfs_layout_segment *
pnfs_bucket_get_committing(struct list_head *head,
struct pnfs_commit_bucket *bucket,
@@ -537,7 +490,7 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
nfs_initiate_commit(NFS_CLIENT(inode), data,
NFS_PROTO(data->inode),
data->mds_ops, how,
- RPC_TASK_CRED_NOREF);
+ RPC_TASK_CRED_NOREF, NULL);
} else {
nfs_init_commit(data, NULL, data->lseg, cinfo);
initiate_commit(data, how);
@@ -919,6 +872,8 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv,
dprintk("--> %s DS %s\n", __func__, ds->ds_remotestr);
list_for_each_entry(da, &ds->ds_addrs, da_node) {
+ char servername[48];
+
dprintk("%s: DS %s: trying address %s\n",
__func__, ds->ds_remotestr, da->da_remotestr);
@@ -929,6 +884,7 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv,
.dstaddr = (struct sockaddr *)&da->da_addr,
.addrlen = da->da_addrlen,
.servername = clp->cl_hostname,
+ .xprtsec = clp->cl_xprtsec,
};
struct nfs4_add_xprt_data xprtdata = {
.clp = clp,
@@ -938,10 +894,45 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv,
.data = &xprtdata,
};
- if (da->da_transport != clp->cl_proto)
+ if (da->da_transport != clp->cl_proto &&
+ clp->cl_proto != XPRT_TRANSPORT_TCP_TLS)
continue;
+ if (da->da_transport == XPRT_TRANSPORT_TCP &&
+ mds_srv->nfs_client->cl_proto ==
+ XPRT_TRANSPORT_TCP_TLS) {
+ struct sockaddr *addr =
+ (struct sockaddr *)&da->da_addr;
+ struct sockaddr_in *sin =
+ (struct sockaddr_in *)&da->da_addr;
+ struct sockaddr_in6 *sin6 =
+ (struct sockaddr_in6 *)&da->da_addr;
+
+ /* for NFS with TLS we need to supply a correct
+ * servername of the trunked transport, not the
+ * servername of the main transport stored in
+ * clp->cl_hostname. And set the protocol to
+ * indicate to use TLS
+ */
+ servername[0] = '\0';
+ switch(addr->sa_family) {
+ case AF_INET:
+ snprintf(servername, sizeof(servername),
+ "%pI4", &sin->sin_addr.s_addr);
+ break;
+ case AF_INET6:
+ snprintf(servername, sizeof(servername),
+ "%pI6", &sin6->sin6_addr);
+ break;
+ default:
+ /* do not consider this address */
+ continue;
+ }
+ xprt_args.ident = XPRT_TRANSPORT_TCP_TLS;
+ xprt_args.servername = servername;
+ }
if (da->da_addr.ss_family != clp->cl_addr.ss_family)
continue;
+
/**
* Test this address for session trunking and
* add as an alias
@@ -953,6 +944,10 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv,
if (xprtdata.cred)
put_cred(xprtdata.cred);
} else {
+ if (da->da_transport == XPRT_TRANSPORT_TCP &&
+ mds_srv->nfs_client->cl_proto ==
+ XPRT_TRANSPORT_TCP_TLS)
+ da->da_transport = XPRT_TRANSPORT_TCP_TLS;
clp = nfs4_set_ds_client(mds_srv,
&da->da_addr,
da->da_addrlen,
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index ad3a321ae997..77920a2e3cef 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -153,13 +153,13 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
}
static int
-nfs_proc_lookup(struct inode *dir, struct dentry *dentry,
+nfs_proc_lookup(struct inode *dir, struct dentry *dentry, const struct qstr *name,
struct nfs_fh *fhandle, struct nfs_fattr *fattr)
{
struct nfs_diropargs arg = {
.fh = NFS_FH(dir),
- .name = dentry->d_name.name,
- .len = dentry->d_name.len
+ .name = name->name,
+ .len = name->len
};
struct nfs_diropok res = {
.fh = fhandle,
@@ -687,14 +687,22 @@ out_einval:
return -EINVAL;
}
-static int nfs_have_delegation(struct inode *inode, fmode_t flags)
+static int nfs_have_delegation(struct inode *inode, fmode_t type, int flags)
+{
+ return 0;
+}
+
+static int nfs_return_delegation(struct inode *inode)
{
+ if (S_ISREG(inode->i_mode))
+ nfs_wb_all(inode);
return 0;
}
static const struct inode_operations nfs_dir_inode_operations = {
.create = nfs_create,
.lookup = nfs_lookup,
+ .atomic_open = nfs_atomic_open_v23,
.link = nfs_link,
.unlink = nfs_unlink,
.symlink = nfs_symlink,
@@ -756,6 +764,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
.lock_check_bounds = nfs_lock_check_bounds,
.close_context = nfs_close_context,
.have_delegation = nfs_have_delegation,
+ .return_delegation = nfs_return_delegation,
.alloc_client = nfs_alloc_client,
.init_client = nfs_init_client,
.free_client = nfs_free_client,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 7dc21a48e3e7..81bd1b9aba17 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -28,6 +28,7 @@
#include "fscache.h"
#include "pnfs.h"
#include "nfstrace.h"
+#include "delegation.h"
#define NFSDBG_FACILITY NFSDBG_PAGECACHE
@@ -47,8 +48,7 @@ static struct nfs_pgio_header *nfs_readhdr_alloc(void)
static void nfs_readhdr_free(struct nfs_pgio_header *rhdr)
{
- if (rhdr->res.scratch != NULL)
- kfree(rhdr->res.scratch);
+ kfree(rhdr->res.scratch);
kmem_cache_free(nfs_rdata_cachep, rhdr);
}
@@ -122,8 +122,6 @@ static void nfs_readpage_release(struct nfs_page *req, int error)
{
struct folio *folio = nfs_page_to_folio(req);
- if (nfs_error_is_fatal_on_server(error) && error != -ETIMEDOUT)
- folio_set_error(folio);
if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE))
if (nfs_netfs_folio_unlock(folio))
folio_unlock(folio);
@@ -288,7 +286,7 @@ int nfs_read_add_folio(struct nfs_pageio_descriptor *pgio,
struct nfs_open_context *ctx,
struct folio *folio)
{
- struct inode *inode = folio_file_mapping(folio)->host;
+ struct inode *inode = folio->mapping->host;
struct nfs_server *server = NFS_SERVER(inode);
size_t fsize = folio_size(folio);
unsigned int rsize = server->rsize;
@@ -305,6 +303,8 @@ int nfs_read_add_folio(struct nfs_pageio_descriptor *pgio,
new = nfs_page_create_from_folio(ctx, folio, 0, aligned_len);
if (IS_ERR(new)) {
error = PTR_ERR(new);
+ if (nfs_netfs_folio_unlock(folio))
+ folio_unlock(folio);
goto out;
}
@@ -322,21 +322,57 @@ out:
}
/*
- * Read a page over NFS.
- * We read the page synchronously in the following case:
- * - The error flag is set for this page. This happens only when a
- * previous async read operation failed.
+ * Actually read a folio over the wire.
*/
-int nfs_read_folio(struct file *file, struct folio *folio)
+static int nfs_do_read_folio(struct file *file, struct folio *folio)
{
struct inode *inode = file_inode(file);
struct nfs_pageio_descriptor pgio;
struct nfs_open_context *ctx;
int ret;
- trace_nfs_aop_readpage(inode, folio);
+ ctx = get_nfs_open_context(nfs_file_open_context(file));
+
+ xchg(&ctx->error, 0);
+ nfs_pageio_init_read(&pgio, inode, false,
+ &nfs_async_read_completion_ops);
+
+ ret = nfs_read_add_folio(&pgio, ctx, folio);
+ if (ret)
+ goto out_put;
+
+ nfs_pageio_complete_read(&pgio);
+ nfs_update_delegated_atime(inode);
+ if (pgio.pg_error < 0) {
+ ret = pgio.pg_error;
+ goto out_put;
+ }
+
+ ret = folio_wait_locked_killable(folio);
+ if (!folio_test_uptodate(folio) && !ret)
+ ret = xchg(&ctx->error, 0);
+
+out_put:
+ put_nfs_open_context(ctx);
+ return ret;
+}
+
+/*
+ * Synchronously read a folio.
+ *
+ * This is not heavily used as most users to try an asynchronous
+ * large read through ->readahead first.
+ */
+int nfs_read_folio(struct file *file, struct folio *folio)
+{
+ struct inode *inode = file_inode(file);
+ loff_t pos = folio_pos(folio);
+ size_t len = folio_size(folio);
+ int ret;
+
+ trace_nfs_aop_readpage(inode, pos, len);
nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
- task_io_account_read(folio_size(folio));
+ task_io_account_read(len);
/*
* Try to flush any pending writes to the file..
@@ -356,30 +392,10 @@ int nfs_read_folio(struct file *file, struct folio *folio)
goto out_unlock;
ret = nfs_netfs_read_folio(file, folio);
- if (!ret)
- goto out;
-
- ctx = get_nfs_open_context(nfs_file_open_context(file));
-
- xchg(&ctx->error, 0);
- nfs_pageio_init_read(&pgio, inode, false,
- &nfs_async_read_completion_ops);
-
- ret = nfs_read_add_folio(&pgio, ctx, folio);
if (ret)
- goto out_put;
-
- nfs_pageio_complete_read(&pgio);
- ret = pgio.pg_error < 0 ? pgio.pg_error : 0;
- if (!ret) {
- ret = folio_wait_locked_killable(folio);
- if (!folio_test_uptodate(folio) && !ret)
- ret = xchg(&ctx->error, 0);
- }
-out_put:
- put_nfs_open_context(ctx);
+ ret = nfs_do_read_folio(file, folio);
out:
- trace_nfs_aop_readpage_done(inode, folio, ret);
+ trace_nfs_aop_readpage_done(inode, pos, len, ret);
return ret;
out_unlock:
folio_unlock(folio);
@@ -426,6 +442,7 @@ void nfs_readahead(struct readahead_control *ractl)
}
nfs_pageio_complete_read(&pgio);
+ nfs_update_delegated_atime(inode);
put_nfs_open_context(ctx);
out:
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 075b31c93f87..aeb715b4a690 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -47,6 +47,7 @@
#include <linux/vfs.h>
#include <linux/inet.h>
#include <linux/in6.h>
+#include <linux/sched.h>
#include <linux/slab.h>
#include <net/ipv6.h>
#include <linux/netdevice.h>
@@ -72,6 +73,7 @@
#include "nfs.h"
#include "netns.h"
#include "sysfs.h"
+#include "nfs4idmap.h"
#define NFSDBG_FACILITY NFSDBG_VFS
@@ -228,6 +230,7 @@ static int __nfs_list_for_each_server(struct list_head *head,
ret = fn(server, data);
if (ret)
goto out;
+ cond_resched();
rcu_read_lock();
}
rcu_read_unlock();
@@ -516,8 +519,16 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
else
nfs_show_nfsv4_options(m, nfss, showdefaults);
- if (nfss->options & NFS_OPTION_FSCACHE)
+ if (nfss->options & NFS_OPTION_FSCACHE) {
+#ifdef CONFIG_NFS_FSCACHE
+ if (nfss->fscache_uniq)
+ seq_printf(m, ",fsc=%s", nfss->fscache_uniq);
+ else
+ seq_puts(m, ",fsc");
+#else
seq_puts(m, ",fsc");
+#endif
+ }
if (nfss->options & NFS_OPTION_MIGRATION)
seq_puts(m, ",migration");
@@ -541,6 +552,9 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
else
seq_puts(m, ",local_lock=posix");
+ if (nfss->flags & NFS_MOUNT_NO_ALIGNWRITE)
+ seq_puts(m, ",noalignwrite");
+
if (nfss->flags & NFS_MOUNT_WRITE_EAGER) {
if (nfss->flags & NFS_MOUNT_WRITE_WAIT)
seq_puts(m, ",write=wait");
@@ -872,7 +886,15 @@ static int nfs_request_mount(struct fs_context *fc,
* Now ask the mount server to map our export path
* to a file handle.
*/
- status = nfs_mount(&request, ctx->timeo, ctx->retrans);
+ if ((request.protocol == XPRT_TRANSPORT_UDP) ==
+ !(ctx->flags & NFS_MOUNT_TCP))
+ /*
+ * NFS protocol and mount protocol are both UDP or neither UDP
+ * so timeouts are compatible. Use NFS timeouts for MOUNT
+ */
+ status = nfs_mount(&request, ctx->timeo, ctx->retrans);
+ else
+ status = nfs_mount(&request, NFS_UNSPEC_TIMEO, NFS_UNSPEC_RETRANS);
if (status != 0) {
dfprintk(MOUNT, "NFS: unable to mount server %s, error %d\n",
request.hostname, status);
@@ -893,6 +915,16 @@ static struct nfs_server *nfs_try_mount_request(struct fs_context *fc)
rpc_authflavor_t authlist[NFS_MAX_SECFLAVORS];
unsigned int authlist_len = ARRAY_SIZE(authlist);
+ /* make sure 'nolock'/'lock' override the 'local_lock' mount option */
+ if (ctx->lock_status) {
+ if (ctx->lock_status == NFS_LOCK_NOLOCK) {
+ ctx->flags |= NFS_MOUNT_NONLM;
+ ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL);
+ } else {
+ ctx->flags &= ~NFS_MOUNT_NONLM;
+ ctx->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL);
+ }
+ }
status = nfs_request_mount(fc, ctx->mntfh, authlist, &authlist_len);
if (status)
return ERR_PTR(status);
diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c
index 0e27a2e4e68b..1c62a5a9f51d 100644
--- a/fs/nfs/symlink.c
+++ b/fs/nfs/symlink.c
@@ -32,16 +32,8 @@ static int nfs_symlink_filler(struct file *file, struct folio *folio)
int error;
error = NFS_PROTO(inode)->readlink(inode, &folio->page, 0, PAGE_SIZE);
- if (error < 0)
- goto error;
- folio_mark_uptodate(folio);
- folio_unlock(folio);
- return 0;
-
-error:
- folio_set_error(folio);
- folio_unlock(folio);
- return -EIO;
+ folio_end_read(folio, error == 0);
+ return error;
}
static const char *nfs_get_link(struct dentry *dentry,
diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c
index e645be1a3381..f579df0e8d67 100644
--- a/fs/nfs/sysctl.c
+++ b/fs/nfs/sysctl.c
@@ -14,7 +14,7 @@
static struct ctl_table_header *nfs_callback_sysctl_table;
-static struct ctl_table nfs_cb_sysctls[] = {
+static const struct ctl_table nfs_cb_sysctls[] = {
{
.procname = "nfs_mountpoint_timeout",
.data = &nfs_mountpoint_expiry_timeout,
diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c
index bf378ecd5d9f..7b59a40d40c0 100644
--- a/fs/nfs/sysfs.c
+++ b/fs/nfs/sysfs.c
@@ -280,9 +280,9 @@ void nfs_sysfs_link_rpc_client(struct nfs_server *server,
char name[RPC_CLIENT_NAME_SIZE];
int ret;
- strcpy(name, clnt->cl_program->name);
- strcat(name, uniq ? uniq : "");
- strcat(name, "_client");
+ strscpy(name, clnt->cl_program->name, sizeof(name));
+ strncat(name, uniq ? uniq : "", sizeof(name) - strlen(name) - 1);
+ strncat(name, "_client", sizeof(name) - strlen(name) - 1);
ret = sysfs_create_link_nowarn(&server->kobj,
&clnt->cl_sysfs->kobject, name);
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 0110299643a2..bf77399696a7 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -232,6 +232,8 @@ nfs_complete_unlink(struct dentry *dentry, struct inode *inode)
dentry->d_fsdata = NULL;
spin_unlock(&dentry->d_lock);
+ NFS_PROTO(inode)->return_delegation(inode);
+
if (NFS_STALE(inode) || !nfs_call_unlink(dentry, inode, data))
nfs_free_unlinkdata(data);
}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index bb79d3a886ae..aa3d8bea3ec0 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -63,9 +63,6 @@ static void nfs_clear_request_commit(struct nfs_commit_info *cinfo,
struct nfs_page *req);
static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
struct inode *inode);
-static struct nfs_page *
-nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi,
- struct folio *folio);
static struct kmem_cache *nfs_wdata_cachep;
static mempool_t *nfs_wdata_mempool;
@@ -172,28 +169,23 @@ nfs_cancel_remove_inode(struct nfs_page *req, struct inode *inode)
return 0;
}
-static struct nfs_page *nfs_folio_private_request(struct folio *folio)
-{
- return folio_get_private(folio);
-}
-
/**
- * nfs_folio_find_private_request - find head request associated with a folio
+ * nfs_folio_find_head_request - find head request associated with a folio
* @folio: pointer to folio
*
* must be called while holding the inode lock.
*
* returns matching head request with reference held, or NULL if not found.
*/
-static struct nfs_page *nfs_folio_find_private_request(struct folio *folio)
+static struct nfs_page *nfs_folio_find_head_request(struct folio *folio)
{
- struct address_space *mapping = folio_file_mapping(folio);
+ struct address_space *mapping = folio->mapping;
struct nfs_page *req;
if (!folio_test_private(folio))
return NULL;
spin_lock(&mapping->i_private_lock);
- req = nfs_folio_private_request(folio);
+ req = folio->private;
if (req) {
WARN_ON_ONCE(req->wb_head != req);
kref_get(&req->wb_kref);
@@ -202,86 +194,20 @@ static struct nfs_page *nfs_folio_find_private_request(struct folio *folio)
return req;
}
-static struct nfs_page *nfs_folio_find_swap_request(struct folio *folio)
-{
- struct inode *inode = folio_file_mapping(folio)->host;
- struct nfs_inode *nfsi = NFS_I(inode);
- struct nfs_page *req = NULL;
- if (!folio_test_swapcache(folio))
- return NULL;
- mutex_lock(&nfsi->commit_mutex);
- if (folio_test_swapcache(folio)) {
- req = nfs_page_search_commits_for_head_request_locked(nfsi,
- folio);
- if (req) {
- WARN_ON_ONCE(req->wb_head != req);
- kref_get(&req->wb_kref);
- }
- }
- mutex_unlock(&nfsi->commit_mutex);
- return req;
-}
-
-/**
- * nfs_folio_find_head_request - find head request associated with a folio
- * @folio: pointer to folio
- *
- * returns matching head request with reference held, or NULL if not found.
- */
-static struct nfs_page *nfs_folio_find_head_request(struct folio *folio)
-{
- struct nfs_page *req;
-
- req = nfs_folio_find_private_request(folio);
- if (!req)
- req = nfs_folio_find_swap_request(folio);
- return req;
-}
-
-static struct nfs_page *nfs_folio_find_and_lock_request(struct folio *folio)
-{
- struct inode *inode = folio_file_mapping(folio)->host;
- struct nfs_page *req, *head;
- int ret;
-
- for (;;) {
- req = nfs_folio_find_head_request(folio);
- if (!req)
- return req;
- head = nfs_page_group_lock_head(req);
- if (head != req)
- nfs_release_request(req);
- if (IS_ERR(head))
- return head;
- ret = nfs_cancel_remove_inode(head, inode);
- if (ret < 0) {
- nfs_unlock_and_release_request(head);
- return ERR_PTR(ret);
- }
- /* Ensure that nobody removed the request before we locked it */
- if (head == nfs_folio_private_request(folio))
- break;
- if (folio_test_swapcache(folio))
- break;
- nfs_unlock_and_release_request(head);
- }
- return head;
-}
-
/* Adjust the file length if we're writing beyond the end */
static void nfs_grow_file(struct folio *folio, unsigned int offset,
unsigned int count)
{
- struct inode *inode = folio_file_mapping(folio)->host;
+ struct inode *inode = folio->mapping->host;
loff_t end, i_size;
pgoff_t end_index;
spin_lock(&inode->i_lock);
i_size = i_size_read(inode);
end_index = ((i_size - 1) >> folio_shift(folio)) << folio_order(folio);
- if (i_size > 0 && folio_index(folio) < end_index)
+ if (i_size > 0 && folio->index < end_index)
goto out;
- end = folio_file_pos(folio) + (loff_t)offset + (loff_t)count;
+ end = folio_pos(folio) + (loff_t)offset + (loff_t)count;
if (i_size >= end)
goto out;
trace_nfs_size_grow(inode, end);
@@ -289,6 +215,8 @@ static void nfs_grow_file(struct folio *folio, unsigned int offset,
NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_SIZE;
nfs_inc_stats(inode, NFSIOS_EXTENDWRITE);
out:
+ /* Atomically update timestamps if they are delegated to us. */
+ nfs_update_delegated_mtime_locked(inode);
spin_unlock(&inode->i_lock);
nfs_fscache_invalidate(inode, 0);
}
@@ -309,9 +237,8 @@ static void nfs_set_pageerror(struct address_space *mapping)
static void nfs_mapping_set_error(struct folio *folio, int error)
{
- struct address_space *mapping = folio_file_mapping(folio);
+ struct address_space *mapping = folio->mapping;
- folio_set_error(folio);
filemap_set_wb_err(mapping, error);
if (mapping->host)
errseq_set(&mapping->host->i_sb->s_wb_err,
@@ -410,7 +337,7 @@ int nfs_congestion_kb;
static void nfs_folio_set_writeback(struct folio *folio)
{
- struct nfs_server *nfss = NFS_SERVER(folio_file_mapping(folio)->host);
+ struct nfs_server *nfss = NFS_SERVER(folio->mapping->host);
folio_start_writeback(folio);
if (atomic_long_inc_return(&nfss->writeback) > NFS_CONGESTION_ON_THRESH)
@@ -419,12 +346,14 @@ static void nfs_folio_set_writeback(struct folio *folio)
static void nfs_folio_end_writeback(struct folio *folio)
{
- struct nfs_server *nfss = NFS_SERVER(folio_file_mapping(folio)->host);
+ struct nfs_server *nfss = NFS_SERVER(folio->mapping->host);
folio_end_writeback(folio);
if (atomic_long_dec_return(&nfss->writeback) <
- NFS_CONGESTION_OFF_THRESH)
+ NFS_CONGESTION_OFF_THRESH) {
nfss->write_congested = 0;
+ wake_up_all(&nfss->write_congestion_wait);
+ }
}
static void nfs_page_end_writeback(struct nfs_page *req)
@@ -548,6 +477,74 @@ void nfs_join_page_group(struct nfs_page *head, struct nfs_commit_info *cinfo,
nfs_destroy_unlinked_subrequests(destroy_list, head, inode);
}
+/**
+ * nfs_wait_on_request - Wait for a request to complete.
+ * @req: request to wait upon.
+ *
+ * Interruptible by fatal signals only.
+ * The user is responsible for holding a count on the request.
+ */
+static int nfs_wait_on_request(struct nfs_page *req)
+{
+ if (!test_bit(PG_BUSY, &req->wb_flags))
+ return 0;
+ set_bit(PG_CONTENDED2, &req->wb_flags);
+ smp_mb__after_atomic();
+ return wait_on_bit_io(&req->wb_flags, PG_BUSY,
+ TASK_UNINTERRUPTIBLE);
+}
+
+/*
+ * nfs_unroll_locks - unlock all newly locked reqs and wait on @req
+ * @head: head request of page group, must be holding head lock
+ * @req: request that couldn't lock and needs to wait on the req bit lock
+ *
+ * This is a helper function for nfs_lock_and_join_requests
+ * returns 0 on success, < 0 on error.
+ */
+static void
+nfs_unroll_locks(struct nfs_page *head, struct nfs_page *req)
+{
+ struct nfs_page *tmp;
+
+ /* relinquish all the locks successfully grabbed this run */
+ for (tmp = head->wb_this_page ; tmp != req; tmp = tmp->wb_this_page) {
+ if (!kref_read(&tmp->wb_kref))
+ continue;
+ nfs_unlock_and_release_request(tmp);
+ }
+}
+
+/*
+ * nfs_page_group_lock_subreq - try to lock a subrequest
+ * @head: head request of page group
+ * @subreq: request to lock
+ *
+ * This is a helper function for nfs_lock_and_join_requests which
+ * must be called with the head request and page group both locked.
+ * On error, it returns with the page group unlocked.
+ */
+static int
+nfs_page_group_lock_subreq(struct nfs_page *head, struct nfs_page *subreq)
+{
+ int ret;
+
+ if (!kref_get_unless_zero(&subreq->wb_kref))
+ return 0;
+ while (!nfs_lock_request(subreq)) {
+ nfs_page_group_unlock(head);
+ ret = nfs_wait_on_request(subreq);
+ if (!ret)
+ ret = nfs_page_group_lock(head);
+ if (ret < 0) {
+ nfs_unroll_locks(head, subreq);
+ nfs_release_request(subreq);
+ return ret;
+ }
+ }
+ return 0;
+}
+
/*
* nfs_lock_and_join_requests - join all subreqs to the head req
* @folio: the folio used to lookup the "page group" of nfs_page structures
@@ -565,31 +562,59 @@ void nfs_join_page_group(struct nfs_page *head, struct nfs_commit_info *cinfo,
*/
static struct nfs_page *nfs_lock_and_join_requests(struct folio *folio)
{
- struct inode *inode = folio_file_mapping(folio)->host;
- struct nfs_page *head;
+ struct inode *inode = folio->mapping->host;
+ struct nfs_page *head, *subreq;
struct nfs_commit_info cinfo;
int ret;
- nfs_init_cinfo_from_inode(&cinfo, inode);
/*
* A reference is taken only on the head request which acts as a
* reference to the whole page group - the group will not be destroyed
* until the head reference is released.
*/
- head = nfs_folio_find_and_lock_request(folio);
- if (IS_ERR_OR_NULL(head))
- return head;
+retry:
+ head = nfs_folio_find_head_request(folio);
+ if (!head)
+ return NULL;
- /* lock each request in the page group */
- ret = nfs_page_group_lock_subrequests(head);
- if (ret < 0) {
+ while (!nfs_lock_request(head)) {
+ ret = nfs_wait_on_request(head);
+ if (ret < 0)
+ return ERR_PTR(ret);
+ }
+
+ /* Ensure that nobody removed the request before we locked it */
+ if (head != folio->private) {
nfs_unlock_and_release_request(head);
- return ERR_PTR(ret);
+ goto retry;
}
- nfs_join_page_group(head, &cinfo, inode);
+ ret = nfs_cancel_remove_inode(head, inode);
+ if (ret < 0)
+ goto out_unlock;
+
+ ret = nfs_page_group_lock(head);
+ if (ret < 0)
+ goto out_unlock;
+
+ /* lock each request in the page group */
+ for (subreq = head->wb_this_page;
+ subreq != head;
+ subreq = subreq->wb_this_page) {
+ ret = nfs_page_group_lock_subreq(head, subreq);
+ if (ret < 0)
+ goto out_unlock;
+ }
+
+ nfs_page_group_unlock(head);
+ nfs_init_cinfo_from_inode(&cinfo, inode);
+ nfs_join_page_group(head, &cinfo, inode);
return head;
+
+out_unlock:
+ nfs_unlock_and_release_request(head);
+ return ERR_PTR(ret);
}
static void nfs_write_error(struct nfs_page *req, int error)
@@ -641,7 +666,7 @@ static int nfs_page_async_flush(struct folio *folio,
nfs_redirty_request(req);
pgio->pg_error = 0;
} else
- nfs_add_stats(folio_file_mapping(folio)->host,
+ nfs_add_stats(folio->mapping->host,
NFSIOS_WRITEPAGES, 1);
out:
return ret;
@@ -653,7 +678,7 @@ out_launder:
static int nfs_do_writepage(struct folio *folio, struct writeback_control *wbc,
struct nfs_pageio_descriptor *pgio)
{
- nfs_pageio_cond_complete(pgio, folio_index(folio));
+ nfs_pageio_cond_complete(pgio, folio->index);
return nfs_page_async_flush(folio, wbc, pgio);
}
@@ -664,13 +689,9 @@ static int nfs_writepage_locked(struct folio *folio,
struct writeback_control *wbc)
{
struct nfs_pageio_descriptor pgio;
- struct inode *inode = folio_file_mapping(folio)->host;
+ struct inode *inode = folio->mapping->host;
int err;
- if (wbc->sync_mode == WB_SYNC_NONE &&
- NFS_SERVER(inode)->write_congested)
- return AOP_WRITEPAGE_ACTIVATE;
-
nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
nfs_pageio_init_write(&pgio, inode, 0, false,
&nfs_async_write_completion_ops);
@@ -702,12 +723,17 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
struct nfs_pageio_descriptor pgio;
struct nfs_io_completion *ioc = NULL;
unsigned int mntflags = NFS_SERVER(inode)->flags;
+ struct nfs_server *nfss = NFS_SERVER(inode);
int priority = 0;
int err;
- if (wbc->sync_mode == WB_SYNC_NONE &&
- NFS_SERVER(inode)->write_congested)
- return 0;
+ /* Wait with writeback until write congestion eases */
+ if (wbc->sync_mode == WB_SYNC_NONE && nfss->write_congested) {
+ err = wait_event_killable(nfss->write_congestion_wait,
+ nfss->write_congested == 0);
+ if (err)
+ return err;
+ }
nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
@@ -746,24 +772,17 @@ out_err:
static void nfs_inode_add_request(struct nfs_page *req)
{
struct folio *folio = nfs_page_to_folio(req);
- struct address_space *mapping = folio_file_mapping(folio);
+ struct address_space *mapping = folio->mapping;
struct nfs_inode *nfsi = NFS_I(mapping->host);
WARN_ON_ONCE(req->wb_this_page != req);
/* Lock the request! */
nfs_lock_request(req);
-
- /*
- * Swap-space should not get truncated. Hence no need to plug the race
- * with invalidate/truncate.
- */
spin_lock(&mapping->i_private_lock);
- if (likely(!folio_test_swapcache(folio))) {
- set_bit(PG_MAPPED, &req->wb_flags);
- folio_set_private(folio);
- folio->private = req;
- }
+ set_bit(PG_MAPPED, &req->wb_flags);
+ folio_set_private(folio);
+ folio->private = req;
spin_unlock(&mapping->i_private_lock);
atomic_long_inc(&nfsi->nrequests);
/* this a head request for a page group - mark it as having an
@@ -783,10 +802,10 @@ static void nfs_inode_remove_request(struct nfs_page *req)
if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) {
struct folio *folio = nfs_page_to_folio(req->wb_head);
- struct address_space *mapping = folio_file_mapping(folio);
+ struct address_space *mapping = folio->mapping;
spin_lock(&mapping->i_private_lock);
- if (likely(folio && !folio_test_swapcache(folio))) {
+ if (likely(folio)) {
folio->private = NULL;
folio_clear_private(folio);
clear_bit(PG_MAPPED, &req->wb_head->wb_flags);
@@ -807,38 +826,6 @@ static void nfs_mark_request_dirty(struct nfs_page *req)
filemap_dirty_folio(folio_mapping(folio), folio);
}
-/*
- * nfs_page_search_commits_for_head_request_locked
- *
- * Search through commit lists on @inode for the head request for @folio.
- * Must be called while holding the inode (which is cinfo) lock.
- *
- * Returns the head request if found, or NULL if not found.
- */
-static struct nfs_page *
-nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi,
- struct folio *folio)
-{
- struct nfs_page *freq, *t;
- struct nfs_commit_info cinfo;
- struct inode *inode = &nfsi->vfs_inode;
-
- nfs_init_cinfo_from_inode(&cinfo, inode);
-
- /* search through pnfs commit lists */
- freq = pnfs_search_commit_reqs(inode, &cinfo, folio);
- if (freq)
- return freq->wb_head;
-
- /* Linearly search the commit list for the correct request */
- list_for_each_entry_safe(freq, t, &cinfo.mds->list, wb_list) {
- if (nfs_page_to_folio(freq) == folio)
- return freq->wb_head;
- }
-
- return NULL;
-}
-
/**
* nfs_request_add_commit_list_locked - add request to a commit list
* @req: pointer to a struct nfs_page
@@ -945,7 +932,7 @@ static void nfs_folio_clear_commit(struct folio *folio)
long nr = folio_nr_pages(folio);
node_stat_mod_folio(folio, NR_WRITEBACK, -nr);
- wb_stat_mod(&inode_to_bdi(folio_file_mapping(folio)->host)->wb,
+ wb_stat_mod(&inode_to_bdi(folio->mapping->host)->wb,
WB_WRITEBACK, -nr);
}
}
@@ -1130,7 +1117,7 @@ out_flushme:
*/
nfs_mark_request_dirty(req);
nfs_unlock_and_release_request(req);
- error = nfs_wb_folio(folio_file_mapping(folio)->host, folio);
+ error = nfs_wb_folio(folio->mapping->host, folio);
return (error < 0) ? ERR_PTR(error) : NULL;
}
@@ -1206,7 +1193,7 @@ int nfs_flush_incompatible(struct file *file, struct folio *folio)
nfs_release_request(req);
if (!do_flush)
return 0;
- status = nfs_wb_folio(folio_file_mapping(folio)->host, folio);
+ status = nfs_wb_folio(folio->mapping->host, folio);
} while (status == 0);
return status;
}
@@ -1280,7 +1267,7 @@ out:
*/
static bool nfs_folio_write_uptodate(struct folio *folio, unsigned int pagelen)
{
- struct inode *inode = folio_file_mapping(folio)->host;
+ struct inode *inode = folio->mapping->host;
struct nfs_inode *nfsi = NFS_I(inode);
if (nfs_have_delegated_attributes(inode))
@@ -1301,7 +1288,7 @@ static bool
is_whole_file_wrlock(struct file_lock *fl)
{
return fl->fl_start == 0 && fl->fl_end == OFFSET_MAX &&
- fl->fl_type == F_WRLCK;
+ lock_is_write(fl);
}
/* If we know the page is up to date, and we're not using byte range locks (or
@@ -1319,12 +1306,15 @@ static int nfs_can_extend_write(struct file *file, struct folio *folio,
struct file_lock_context *flctx = locks_inode_context(inode);
struct file_lock *fl;
int ret;
+ unsigned int mntflags = NFS_SERVER(inode)->flags;
+ if (mntflags & NFS_MOUNT_NO_ALIGNWRITE)
+ return 0;
if (file->f_flags & O_DSYNC)
return 0;
if (!nfs_folio_write_uptodate(folio, pagelen))
return 0;
- if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
+ if (nfs_have_write_delegation(inode))
return 1;
if (!flctx || (list_empty_careful(&flctx->flc_flock) &&
list_empty_careful(&flctx->flc_posix)))
@@ -1335,13 +1325,13 @@ static int nfs_can_extend_write(struct file *file, struct folio *folio,
spin_lock(&flctx->flc_lock);
if (!list_empty(&flctx->flc_posix)) {
fl = list_first_entry(&flctx->flc_posix, struct file_lock,
- fl_list);
+ c.flc_list);
if (is_whole_file_wrlock(fl))
ret = 1;
} else if (!list_empty(&flctx->flc_flock)) {
fl = list_first_entry(&flctx->flc_flock, struct file_lock,
- fl_list);
- if (fl->fl_type == F_WRLCK)
+ c.flc_list);
+ if (lock_is_write(fl))
ret = 1;
}
spin_unlock(&flctx->flc_lock);
@@ -1358,7 +1348,7 @@ int nfs_update_folio(struct file *file, struct folio *folio,
unsigned int offset, unsigned int count)
{
struct nfs_open_context *ctx = nfs_file_open_context(file);
- struct address_space *mapping = folio_file_mapping(folio);
+ struct address_space *mapping = folio->mapping;
struct inode *inode = mapping->host;
unsigned int pagelen = nfs_folio_length(folio);
int status = 0;
@@ -1366,14 +1356,18 @@ int nfs_update_folio(struct file *file, struct folio *folio,
nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE);
dprintk("NFS: nfs_update_folio(%pD2 %d@%lld)\n", file, count,
- (long long)(folio_file_pos(folio) + offset));
+ (long long)(folio_pos(folio) + offset));
if (!count)
goto out;
if (nfs_can_extend_write(file, folio, pagelen)) {
- count = max(count + offset, pagelen);
- offset = 0;
+ unsigned int end = count + offset;
+
+ offset = round_down(offset, PAGE_SIZE);
+ if (end < pagelen)
+ end = min(round_up(end, PAGE_SIZE), pagelen);
+ count = end - offset;
}
status = nfs_writepage_setup(ctx, folio, offset, count);
@@ -1518,6 +1512,13 @@ void nfs_writeback_update_inode(struct nfs_pgio_header *hdr)
struct nfs_fattr *fattr = &hdr->fattr;
struct inode *inode = hdr->inode;
+ if (nfs_have_delegated_mtime(inode)) {
+ spin_lock(&inode->i_lock);
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS);
+ spin_unlock(&inode->i_lock);
+ return;
+ }
+
spin_lock(&inode->i_lock);
nfs_writeback_check_extend(hdr, fattr);
nfs_post_op_update_inode_force_wcc_locked(inode, fattr);
@@ -1650,7 +1651,7 @@ static int wait_on_commit(struct nfs_mds_commit_info *cinfo)
!atomic_read(&cinfo->rpcs_out));
}
-static void nfs_commit_begin(struct nfs_mds_commit_info *cinfo)
+void nfs_commit_begin(struct nfs_mds_commit_info *cinfo)
{
atomic_inc(&cinfo->rpcs_out);
}
@@ -1674,7 +1675,8 @@ EXPORT_SYMBOL_GPL(nfs_commitdata_release);
int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data,
const struct nfs_rpc_ops *nfs_ops,
const struct rpc_call_ops *call_ops,
- int how, int flags)
+ int how, int flags,
+ struct nfsd_file *localio)
{
struct rpc_task *task;
int priority = flush_task_priority(how);
@@ -1703,6 +1705,9 @@ int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data,
dprintk("NFS: initiated commit call\n");
+ if (localio)
+ return nfs_local_commit(localio, data, call_ops, how);
+
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
@@ -1802,6 +1807,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how,
struct nfs_commit_info *cinfo)
{
struct nfs_commit_data *data;
+ struct nfsd_file *localio;
unsigned short task_flags = 0;
/* another commit raced with us */
@@ -1818,9 +1824,13 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how,
nfs_init_commit(data, head, NULL, cinfo);
if (NFS_SERVER(inode)->nfs_client->cl_minorversion)
task_flags = RPC_TASK_MOVEABLE;
+
+ localio = nfs_local_open_fh(NFS_SERVER(inode)->nfs_client, data->cred,
+ data->args.fh, &data->context->nfl,
+ data->context->mode);
return nfs_initiate_commit(NFS_CLIENT(inode), data, NFS_PROTO(inode),
data->mds_ops, how,
- RPC_TASK_CRED_NOREF | task_flags);
+ RPC_TASK_CRED_NOREF | task_flags, localio);
}
/*
@@ -1841,7 +1851,6 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
struct nfs_page *req;
int status = data->task.tk_status;
struct nfs_commit_info cinfo;
- struct nfs_server *nfss;
struct folio *folio;
while (!list_empty(&data->pages)) {
@@ -1884,9 +1893,6 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
/* Latency breaker */
cond_resched();
}
- nfss = NFS_SERVER(data->inode);
- if (atomic_long_read(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
- nfss->write_congested = 0;
nfs_init_cinfo(&cinfo, data->inode, data->dreq);
nfs_commit_end(cinfo.mds);
@@ -2077,17 +2083,17 @@ int nfs_wb_folio_cancel(struct inode *inode, struct folio *folio)
*/
int nfs_wb_folio(struct inode *inode, struct folio *folio)
{
- loff_t range_start = folio_file_pos(folio);
- loff_t range_end = range_start + (loff_t)folio_size(folio) - 1;
+ loff_t range_start = folio_pos(folio);
+ size_t len = folio_size(folio);
struct writeback_control wbc = {
.sync_mode = WB_SYNC_ALL,
.nr_to_write = 0,
.range_start = range_start,
- .range_end = range_end,
+ .range_end = range_start + len - 1,
};
int ret;
- trace_nfs_writeback_folio(inode, folio);
+ trace_nfs_writeback_folio(inode, range_start, len);
for (;;) {
folio_wait_writeback(folio);
@@ -2105,7 +2111,7 @@ int nfs_wb_folio(struct inode *inode, struct folio *folio)
goto out_error;
}
out_error:
- trace_nfs_writeback_folio_done(inode, folio, ret);
+ trace_nfs_writeback_folio_done(inode, range_start, len, ret);
return ret;
}
@@ -2124,10 +2130,10 @@ int nfs_migrate_folio(struct address_space *mapping, struct folio *dst,
if (folio_test_private(src))
return -EBUSY;
- if (folio_test_fscache(src)) {
+ if (folio_test_private_2(src)) { /* [DEPRECATED] */
if (mode == MIGRATE_ASYNC)
return -EBUSY;
- folio_wait_fscache(src);
+ folio_wait_private_2(src);
}
return migrate_folio(mapping, dst, src, mode);