summaryrefslogtreecommitdiff
path: root/fs/nfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/nfs')
-rw-r--r--fs/nfs/Kconfig18
-rw-r--r--fs/nfs/Makefile1
-rw-r--r--fs/nfs/blocklayout/blocklayout.c57
-rw-r--r--fs/nfs/blocklayout/blocklayout.h11
-rw-r--r--fs/nfs/blocklayout/dev.c184
-rw-r--r--fs/nfs/blocklayout/extent_tree.c104
-rw-r--r--fs/nfs/blocklayout/rpc_pipefs.c55
-rw-r--r--fs/nfs/callback.c97
-rw-r--r--fs/nfs/callback.h29
-rw-r--r--fs/nfs/callback_proc.c80
-rw-r--r--fs/nfs/callback_xdr.c66
-rw-r--r--fs/nfs/client.c221
-rw-r--r--fs/nfs/delegation.c328
-rw-r--r--fs/nfs/delegation.h50
-rw-r--r--fs/nfs/dir.c617
-rw-r--r--fs/nfs/direct.c265
-rw-r--r--fs/nfs/dns_resolve.c12
-rw-r--r--fs/nfs/export.c30
-rw-r--r--fs/nfs/file.c272
-rw-r--r--fs/nfs/filelayout/filelayout.c43
-rw-r--r--fs/nfs/filelayout/filelayout.h2
-rw-r--r--fs/nfs/filelayout/filelayoutdev.c18
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c1082
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.h67
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayoutdev.c133
-rw-r--r--fs/nfs/fs_context.c209
-rw-r--r--fs/nfs/fscache.c237
-rw-r--r--fs/nfs/fscache.h143
-rw-r--r--fs/nfs/getroot.c2
-rw-r--r--fs/nfs/inode.c540
-rw-r--r--fs/nfs/internal.h201
-rw-r--r--fs/nfs/io.c55
-rw-r--r--fs/nfs/iostat.h26
-rw-r--r--fs/nfs/localio.c1072
-rw-r--r--fs/nfs/mount_clnt.c73
-rw-r--r--fs/nfs/namespace.c21
-rw-r--r--fs/nfs/netns.h8
-rw-r--r--fs/nfs/nfs.h6
-rw-r--r--fs/nfs/nfs2super.c1
-rw-r--r--fs/nfs/nfs2xdr.c74
-rw-r--r--fs/nfs/nfs3_fs.h3
-rw-r--r--fs/nfs/nfs3acl.c15
-rw-r--r--fs/nfs/nfs3client.c29
-rw-r--r--fs/nfs/nfs3proc.c98
-rw-r--r--fs/nfs/nfs3super.c4
-rw-r--r--fs/nfs/nfs3xdr.c112
-rw-r--r--fs/nfs/nfs42.h9
-rw-r--r--fs/nfs/nfs42proc.c303
-rw-r--r--fs/nfs/nfs42xattr.c142
-rw-r--r--fs/nfs/nfs42xdr.c796
-rw-r--r--fs/nfs/nfs4_fs.h19
-rw-r--r--fs/nfs/nfs4client.c222
-rw-r--r--fs/nfs/nfs4file.c51
-rw-r--r--fs/nfs/nfs4getroot.c14
-rw-r--r--fs/nfs/nfs4idmap.c21
-rw-r--r--fs/nfs/nfs4proc.c895
-rw-r--r--fs/nfs/nfs4renewd.c2
-rw-r--r--fs/nfs/nfs4session.h4
-rw-r--r--fs/nfs/nfs4state.c152
-rw-r--r--fs/nfs/nfs4super.c37
-rw-r--r--fs/nfs/nfs4sysctl.c24
-rw-r--r--fs/nfs/nfs4trace.c11
-rw-r--r--fs/nfs/nfs4trace.h439
-rw-r--r--fs/nfs/nfs4xdr.c318
-rw-r--r--fs/nfs/nfsroot.c6
-rw-r--r--fs/nfs/nfstrace.h460
-rw-r--r--fs/nfs/pagelist.c368
-rw-r--r--fs/nfs/pnfs.c373
-rw-r--r--fs/nfs/pnfs.h65
-rw-r--r--fs/nfs/pnfs_dev.c5
-rw-r--r--fs/nfs/pnfs_nfs.c187
-rw-r--r--fs/nfs/proc.c32
-rw-r--r--fs/nfs/read.c230
-rw-r--r--fs/nfs/super.c128
-rw-r--r--fs/nfs/symlink.c32
-rw-r--r--fs/nfs/sysctl.c23
-rw-r--r--fs/nfs/sysfs.c346
-rw-r--r--fs/nfs/sysfs.h10
-rw-r--r--fs/nfs/unlink.c15
-rw-r--r--fs/nfs/write.c823
80 files changed, 8992 insertions, 4341 deletions
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 1ead5bd740c2..07932ce9246c 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -2,8 +2,10 @@
config NFS_FS
tristate "NFS client support"
depends on INET && FILE_LOCKING && MULTIUSER
+ select CRC32
select LOCKD
select SUNRPC
+ select NFS_COMMON
select NFS_ACL_SUPPORT if NFS_V3_ACL
help
Choose Y here if you want to access files residing on other
@@ -33,12 +35,12 @@ config NFS_FS
config NFS_V2
tristate "NFS client support for NFS version 2"
depends on NFS_FS
- default y
+ default n
help
This option enables support for version 2 of the NFS protocol
(RFC 1094) in the kernel's NFS client.
- If unsure, say Y.
+ If unsure, say N.
config NFS_V3
tristate "NFS client support for NFS version 3"
@@ -75,7 +77,6 @@ config NFS_V3_ACL
config NFS_V4
tristate "NFS client support for NFS version 4"
depends on NFS_FS
- select SUNRPC_GSS
select KEYS
help
This option enables support for version 4 of the NFS protocol
@@ -126,7 +127,7 @@ config PNFS_BLOCK
config PNFS_FLEXFILE_LAYOUT
tristate
- depends on NFS_V4_1 && NFS_V3
+ depends on NFS_V4_1
default NFS_V4
config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN
@@ -170,7 +171,9 @@ config ROOT_NFS
config NFS_FSCACHE
bool "Provide NFS client caching support"
- depends on NFS_FS=m && FSCACHE || NFS_FS=y && FSCACHE=y
+ depends on NFS_FS
+ select NETFS_SUPPORT
+ select FSCACHE
help
Say Y here if you want NFS data to be cached locally on disc through
the general filesystem cache manager
@@ -194,7 +197,6 @@ config NFS_USE_KERNEL_DNS
config NFS_DEBUG
bool
depends on NFS_FS && SUNRPC_DEBUG
- select CRC32
default y
config NFS_DISABLE_UDP_SUPPORT
@@ -211,6 +213,4 @@ config NFS_V4_2_READ_PLUS
depends on NFS_V4_2
default y
help
- Choose Y here to enable the use of READ_PLUS over NFS v4.2. READ_PLUS
- attempts to improve read performance by compressing out sparse holes
- in the file contents.
+ Choose Y here to enable use of the NFS v4.2 READ_PLUS operation.
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 5f6db37f461e..9fb2f2cac87e 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -13,6 +13,7 @@ nfs-y := client.o dir.o file.o getroot.o inode.o super.o \
nfs-$(CONFIG_ROOT_NFS) += nfsroot.o
nfs-$(CONFIG_SYSCTL) += sysctl.o
nfs-$(CONFIG_NFS_FSCACHE) += fscache.o
+nfs-$(CONFIG_NFS_LOCALIO) += localio.o
obj-$(CONFIG_NFS_V2) += nfsv2.o
nfsv2-y := nfs2super.o proc.o nfs2xdr.o
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 943aeea1eb16..0e4c67373e4f 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -149,8 +149,8 @@ do_add_page_to_bio(struct bio *bio, int npg, enum req_op op, sector_t isect,
/* limit length to what the device mapping allows */
end = disk_addr + *len;
- if (end >= map->start + map->len)
- *len = map->start + map->len - disk_addr;
+ if (end >= map->disk_offset + map->len)
+ *len = map->disk_offset + map->len - disk_addr;
retry:
if (!bio) {
@@ -564,23 +564,45 @@ bl_find_get_deviceid(struct nfs_server *server,
gfp_t gfp_mask)
{
struct nfs4_deviceid_node *node;
- unsigned long start, end;
+ int err = -ENODEV;
retry:
node = nfs4_find_get_deviceid(server, id, cred, gfp_mask);
if (!node)
return ERR_PTR(-ENODEV);
- if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags) == 0)
- return node;
+ /*
+ * Devices that are marked unavailable are left in the cache with a
+ * timeout to avoid sending GETDEVINFO after every LAYOUTGET, or
+ * constantly attempting to register the device. Once marked as
+ * unavailable they must be deleted and never reused.
+ */
+ if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags)) {
+ unsigned long end = jiffies;
+ unsigned long start = end - PNFS_DEVICE_RETRY_TIMEOUT;
+
+ if (!time_in_range(node->timestamp_unavailable, start, end)) {
+ /* Uncork subsequent GETDEVINFO operations for this device */
+ nfs4_delete_deviceid(node->ld, node->nfs_client, id);
+ goto retry;
+ }
+ goto out_put;
+ }
- end = jiffies;
- start = end - PNFS_DEVICE_RETRY_TIMEOUT;
- if (!time_in_range(node->timestamp_unavailable, start, end)) {
- nfs4_delete_deviceid(node->ld, node->nfs_client, id);
- goto retry;
+ if (!bl_register_dev(container_of(node, struct pnfs_block_dev, node))) {
+ /*
+ * If we cannot register, treat this device as transient:
+ * Make a negative cache entry for the device
+ */
+ nfs4_mark_deviceid_unavailable(node);
+ goto out_put;
}
- return ERR_PTR(-ENODEV);
+
+ return node;
+
+out_put:
+ nfs4_put_deviceid_node(node);
+ return ERR_PTR(err);
}
static int
@@ -654,7 +676,7 @@ bl_alloc_lseg(struct pnfs_layout_hdr *lo, struct nfs4_layoutget_res *lgr,
struct pnfs_layout_segment *lseg;
struct xdr_buf buf;
struct xdr_stream xdr;
- struct page *scratch;
+ struct folio *scratch;
int status, i;
uint32_t count;
__be32 *p;
@@ -667,13 +689,13 @@ bl_alloc_lseg(struct pnfs_layout_hdr *lo, struct nfs4_layoutget_res *lgr,
return ERR_PTR(-ENOMEM);
status = -ENOMEM;
- scratch = alloc_page(gfp_mask);
+ scratch = folio_alloc(gfp_mask, 0);
if (!scratch)
goto out;
xdr_init_decode_pages(&xdr, &buf,
lgr->layoutp->pages, lgr->layoutp->len);
- xdr_set_scratch_page(&xdr, scratch);
+ xdr_set_scratch_folio(&xdr, scratch);
status = -EIO;
p = xdr_inline_decode(&xdr, 4);
@@ -722,7 +744,7 @@ process_extents:
}
out_free_scratch:
- __free_page(scratch);
+ folio_put(scratch);
out:
dprintk("%s returns %d\n", __func__, status);
switch (status) {
@@ -893,10 +915,9 @@ bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
}
if (pgio->pg_dreq == NULL)
- wb_size = pnfs_num_cont_bytes(pgio->pg_inode,
- req->wb_index);
+ wb_size = pnfs_num_cont_bytes(pgio->pg_inode, req->wb_index);
else
- wb_size = nfs_dreq_bytes_left(pgio->pg_dreq);
+ wb_size = nfs_dreq_bytes_left(pgio->pg_dreq, req_offset(req));
pnfs_generic_pg_init_write(pgio, req, wb_size);
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index 716bc75e9ed2..6da40ca19570 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -104,20 +104,26 @@ struct pnfs_block_dev {
u64 start;
u64 len;
+ enum pnfs_block_volume_type type;
u32 nr_children;
struct pnfs_block_dev *children;
u64 chunk_size;
- struct block_device *bdev;
+ struct file *bdev_file;
u64 disk_offset;
+ unsigned long flags;
u64 pr_key;
- bool pr_registered;
bool (*map)(struct pnfs_block_dev *dev, u64 offset,
struct pnfs_block_dev_map *map);
};
+/* pnfs_block_dev flag bits */
+enum {
+ PNFS_BDEV_REGISTERED = 0,
+};
+
/* sector_t fields are all in 512-byte sectors */
struct pnfs_block_extent {
union {
@@ -172,6 +178,7 @@ struct bl_msg_hdr {
#define BL_DEVICE_REQUEST_ERR 0x2 /* User level process fails */
/* dev.c */
+bool bl_register_dev(struct pnfs_block_dev *d);
struct nfs4_deviceid_node *bl_alloc_deviceid_node(struct nfs_server *server,
struct pnfs_device *pdev, gfp_t gfp_mask);
void bl_free_deviceid_node(struct nfs4_deviceid_node *d);
diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c
index fea5f8821da5..ab76120705e2 100644
--- a/fs/nfs/blocklayout/dev.c
+++ b/fs/nfs/blocklayout/dev.c
@@ -10,12 +10,81 @@
#include <linux/pr.h>
#include "blocklayout.h"
+#include "../nfs4trace.h"
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
+static void bl_unregister_scsi(struct pnfs_block_dev *dev)
+{
+ struct block_device *bdev = file_bdev(dev->bdev_file);
+ const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
+ int status;
+
+ status = ops->pr_register(bdev, dev->pr_key, 0, false);
+ if (status)
+ trace_bl_pr_key_unreg_err(bdev, dev->pr_key, status);
+ else
+ trace_bl_pr_key_unreg(bdev, dev->pr_key);
+}
+
+static bool bl_register_scsi(struct pnfs_block_dev *dev)
+{
+ struct block_device *bdev = file_bdev(dev->bdev_file);
+ const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
+ int status;
+
+ if (test_and_set_bit(PNFS_BDEV_REGISTERED, &dev->flags))
+ return true;
+
+ status = ops->pr_register(bdev, 0, dev->pr_key, true);
+ if (status) {
+ trace_bl_pr_key_reg_err(bdev, dev->pr_key, status);
+ return false;
+ }
+ trace_bl_pr_key_reg(bdev, dev->pr_key);
+ return true;
+}
+
+static void bl_unregister_dev(struct pnfs_block_dev *dev)
+{
+ u32 i;
+
+ if (dev->nr_children) {
+ for (i = 0; i < dev->nr_children; i++)
+ bl_unregister_dev(&dev->children[i]);
+ return;
+ }
+
+ if (dev->type == PNFS_BLOCK_VOLUME_SCSI &&
+ test_and_clear_bit(PNFS_BDEV_REGISTERED, &dev->flags))
+ bl_unregister_scsi(dev);
+}
+
+bool bl_register_dev(struct pnfs_block_dev *dev)
+{
+ u32 i;
+
+ if (dev->nr_children) {
+ for (i = 0; i < dev->nr_children; i++) {
+ if (!bl_register_dev(&dev->children[i])) {
+ while (i > 0)
+ bl_unregister_dev(&dev->children[--i]);
+ return false;
+ }
+ }
+ return true;
+ }
+
+ if (dev->type == PNFS_BLOCK_VOLUME_SCSI)
+ return bl_register_scsi(dev);
+ return true;
+}
+
static void
bl_free_device(struct pnfs_block_dev *dev)
{
+ bl_unregister_dev(dev);
+
if (dev->nr_children) {
int i;
@@ -23,19 +92,8 @@ bl_free_device(struct pnfs_block_dev *dev)
bl_free_device(&dev->children[i]);
kfree(dev->children);
} else {
- if (dev->pr_registered) {
- const struct pr_ops *ops =
- dev->bdev->bd_disk->fops->pr_ops;
- int error;
-
- error = ops->pr_register(dev->bdev, dev->pr_key, 0,
- false);
- if (error)
- pr_err("failed to unregister PR key.\n");
- }
-
- if (dev->bdev)
- blkdev_put(dev->bdev, FMODE_READ | FMODE_WRITE);
+ if (dev->bdev_file)
+ fput(dev->bdev_file);
}
}
@@ -169,7 +227,7 @@ static bool bl_map_simple(struct pnfs_block_dev *dev, u64 offset,
map->start = dev->start;
map->len = dev->len;
map->disk_offset = dev->disk_offset;
- map->bdev = dev->bdev;
+ map->bdev = file_bdev(dev->bdev_file);
return true;
}
@@ -199,10 +257,11 @@ static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset,
struct pnfs_block_dev *child;
u64 chunk;
u32 chunk_idx;
+ u64 disk_chunk;
u64 disk_offset;
chunk = div_u64(offset, dev->chunk_size);
- div_u64_rem(chunk, dev->nr_children, &chunk_idx);
+ disk_chunk = div_u64_rem(chunk, dev->nr_children, &chunk_idx);
if (chunk_idx >= dev->nr_children) {
dprintk("%s: invalid chunk idx %d (%lld/%lld)\n",
@@ -215,7 +274,7 @@ static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset,
offset = chunk * dev->chunk_size;
/* disk offset of the stripe */
- disk_offset = div_u64(offset, dev->nr_children);
+ disk_offset = disk_chunk * dev->chunk_size;
child = &dev->children[chunk_idx];
child->map(child, disk_offset, map);
@@ -236,27 +295,26 @@ bl_parse_simple(struct nfs_server *server, struct pnfs_block_dev *d,
struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
{
struct pnfs_block_volume *v = &volumes[idx];
- struct block_device *bdev;
+ struct file *bdev_file;
dev_t dev;
dev = bl_resolve_deviceid(server, v, gfp_mask);
if (!dev)
return -EIO;
- bdev = blkdev_get_by_dev(dev, FMODE_READ | FMODE_WRITE, NULL);
- if (IS_ERR(bdev)) {
+ bdev_file = bdev_file_open_by_dev(dev, BLK_OPEN_READ | BLK_OPEN_WRITE,
+ NULL, NULL);
+ if (IS_ERR(bdev_file)) {
printk(KERN_WARNING "pNFS: failed to open device %d:%d (%ld)\n",
- MAJOR(dev), MINOR(dev), PTR_ERR(bdev));
- return PTR_ERR(bdev);
+ MAJOR(dev), MINOR(dev), PTR_ERR(bdev_file));
+ return PTR_ERR(bdev_file);
}
- d->bdev = bdev;
-
-
- d->len = bdev_nr_bytes(d->bdev);
+ d->bdev_file = bdev_file;
+ d->len = bdev_nr_bytes(file_bdev(bdev_file));
d->map = bl_map_simple;
printk(KERN_INFO "pNFS: using block device %s\n",
- d->bdev->bd_disk->disk_name);
+ file_bdev(bdev_file)->bd_disk->disk_name);
return 0;
}
@@ -301,10 +359,10 @@ bl_validate_designator(struct pnfs_block_volume *v)
}
}
-static struct block_device *
+static struct file *
bl_open_path(struct pnfs_block_volume *v, const char *prefix)
{
- struct block_device *bdev;
+ struct file *bdev_file;
const char *devname;
devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/%s%*phN",
@@ -312,14 +370,15 @@ bl_open_path(struct pnfs_block_volume *v, const char *prefix)
if (!devname)
return ERR_PTR(-ENOMEM);
- bdev = blkdev_get_by_path(devname, FMODE_READ | FMODE_WRITE, NULL);
- if (IS_ERR(bdev)) {
- pr_warn("pNFS: failed to open device %s (%ld)\n",
- devname, PTR_ERR(bdev));
+ bdev_file = bdev_file_open_by_path(devname, BLK_OPEN_READ | BLK_OPEN_WRITE,
+ NULL, NULL);
+ if (IS_ERR(bdev_file)) {
+ dprintk("failed to open device %s (%ld)\n",
+ devname, PTR_ERR(bdev_file));
}
kfree(devname);
- return bdev;
+ return bdev_file;
}
static int
@@ -329,6 +388,7 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
struct pnfs_block_volume *v = &volumes[idx];
struct block_device *bdev;
const struct pr_ops *ops;
+ struct file *bdev_file;
int error;
if (!bl_validate_designator(v))
@@ -340,40 +400,38 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
* On other distributions like Debian, the default SCSI by-id path will
* point to the dm-multipath device if one exists.
*/
- bdev = bl_open_path(v, "dm-uuid-mpath-0x");
- if (IS_ERR(bdev))
- bdev = bl_open_path(v, "wwn-0x");
- if (IS_ERR(bdev))
- return PTR_ERR(bdev);
- d->bdev = bdev;
-
- d->len = bdev_nr_bytes(d->bdev);
+ bdev_file = bl_open_path(v, "dm-uuid-mpath-0x");
+ if (IS_ERR(bdev_file))
+ bdev_file = bl_open_path(v, "wwn-0x");
+ if (IS_ERR(bdev_file))
+ bdev_file = bl_open_path(v, "nvme-eui.");
+ if (IS_ERR(bdev_file)) {
+ pr_warn("pNFS: no device found for volume %*phN\n",
+ v->scsi.designator_len, v->scsi.designator);
+ return PTR_ERR(bdev_file);
+ }
+ d->bdev_file = bdev_file;
+ bdev = file_bdev(bdev_file);
+
+ d->len = bdev_nr_bytes(bdev);
d->map = bl_map_simple;
d->pr_key = v->scsi.pr_key;
- pr_info("pNFS: using block device %s (reservation key 0x%llx)\n",
- d->bdev->bd_disk->disk_name, d->pr_key);
+ if (d->len == 0)
+ return -ENODEV;
- ops = d->bdev->bd_disk->fops->pr_ops;
+ ops = bdev->bd_disk->fops->pr_ops;
if (!ops) {
pr_err("pNFS: block device %s does not support reservations.",
- d->bdev->bd_disk->disk_name);
+ bdev->bd_disk->disk_name);
error = -EINVAL;
goto out_blkdev_put;
}
- error = ops->pr_register(d->bdev, 0, d->pr_key, true);
- if (error) {
- pr_err("pNFS: failed to register key for block device %s.",
- d->bdev->bd_disk->disk_name);
- goto out_blkdev_put;
- }
-
- d->pr_registered = true;
return 0;
out_blkdev_put:
- blkdev_put(d->bdev, FMODE_READ | FMODE_WRITE);
+ fput(d->bdev_file);
return error;
}
@@ -402,7 +460,7 @@ bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d,
int ret, i;
d->children = kcalloc(v->concat.volumes_count,
- sizeof(struct pnfs_block_dev), GFP_KERNEL);
+ sizeof(struct pnfs_block_dev), gfp_mask);
if (!d->children)
return -ENOMEM;
@@ -431,7 +489,7 @@ bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d,
int ret, i;
d->children = kcalloc(v->stripe.volumes_count,
- sizeof(struct pnfs_block_dev), GFP_KERNEL);
+ sizeof(struct pnfs_block_dev), gfp_mask);
if (!d->children)
return -ENOMEM;
@@ -455,7 +513,9 @@ static int
bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
{
- switch (volumes[idx].type) {
+ d->type = volumes[idx].type;
+
+ switch (d->type) {
case PNFS_BLOCK_VOLUME_SIMPLE:
return bl_parse_simple(server, d, volumes, idx, gfp_mask);
case PNFS_BLOCK_VOLUME_SLICE:
@@ -467,7 +527,7 @@ bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
case PNFS_BLOCK_VOLUME_SCSI:
return bl_parse_scsi(server, d, volumes, idx, gfp_mask);
default:
- dprintk("unsupported volume type: %d\n", volumes[idx].type);
+ dprintk("unsupported volume type: %d\n", d->type);
return -EIO;
}
}
@@ -481,16 +541,16 @@ bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
struct pnfs_block_dev *top;
struct xdr_stream xdr;
struct xdr_buf buf;
- struct page *scratch;
+ struct folio *scratch;
int nr_volumes, ret, i;
__be32 *p;
- scratch = alloc_page(gfp_mask);
+ scratch = folio_alloc(gfp_mask, 0);
if (!scratch)
goto out;
xdr_init_decode_pages(&xdr, &buf, pdev->pages, pdev->pglen);
- xdr_set_scratch_page(&xdr, scratch);
+ xdr_set_scratch_folio(&xdr, scratch);
p = xdr_inline_decode(&xdr, sizeof(__be32));
if (!p)
@@ -522,7 +582,7 @@ bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
out_free_volumes:
kfree(volumes);
out_free_scratch:
- __free_page(scratch);
+ folio_put(scratch);
out:
return node;
}
diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent_tree.c
index 8f7cff7a4293..315949a7e92d 100644
--- a/fs/nfs/blocklayout/extent_tree.c
+++ b/fs/nfs/blocklayout/extent_tree.c
@@ -6,6 +6,7 @@
#include <linux/vmalloc.h>
#include "blocklayout.h"
+#include "../nfs4trace.h"
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
@@ -520,10 +521,71 @@ static __be32 *encode_scsi_range(struct pnfs_block_extent *be, __be32 *p)
return xdr_encode_hyper(p, be->be_length << SECTOR_SHIFT);
}
-static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
+/**
+ * ext_tree_try_encode_commit - try to encode all extents into the buffer
+ * @bl: pointer to the layout
+ * @p: pointer to the output buffer
+ * @buffer_size: size of the output buffer
+ * @count: output pointer to the number of encoded extents
+ * @lastbyte: output pointer to the last written byte
+ *
+ * Return values:
+ * %0: Success, all required extents encoded, outputs are valid
+ * %-ENOSPC: Buffer too small, nothing encoded, outputs are invalid
+ */
+static int
+ext_tree_try_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
size_t buffer_size, size_t *count, __u64 *lastbyte)
{
struct pnfs_block_extent *be;
+
+ spin_lock(&bl->bl_ext_lock);
+ for (be = ext_tree_first(&bl->bl_ext_rw); be; be = ext_tree_next(be)) {
+ if (be->be_state != PNFS_BLOCK_INVALID_DATA ||
+ be->be_tag != EXTENT_WRITTEN)
+ continue;
+
+ (*count)++;
+ if (ext_tree_layoutupdate_size(bl, *count) > buffer_size) {
+ spin_unlock(&bl->bl_ext_lock);
+ return -ENOSPC;
+ }
+ }
+ for (be = ext_tree_first(&bl->bl_ext_rw); be; be = ext_tree_next(be)) {
+ if (be->be_state != PNFS_BLOCK_INVALID_DATA ||
+ be->be_tag != EXTENT_WRITTEN)
+ continue;
+
+ if (bl->bl_scsi_layout)
+ p = encode_scsi_range(be, p);
+ else
+ p = encode_block_extent(be, p);
+ be->be_tag = EXTENT_COMMITTING;
+ }
+ *lastbyte = (bl->bl_lwb != 0) ? bl->bl_lwb - 1 : U64_MAX;
+ bl->bl_lwb = 0;
+ spin_unlock(&bl->bl_ext_lock);
+
+ return 0;
+}
+
+/**
+ * ext_tree_encode_commit - encode as much as possible extents into the buffer
+ * @bl: pointer to the layout
+ * @p: pointer to the output buffer
+ * @buffer_size: size of the output buffer
+ * @count: output pointer to the number of encoded extents
+ * @lastbyte: output pointer to the last written byte
+ *
+ * Return values:
+ * %0: Success, all required extents encoded, outputs are valid
+ * %-ENOSPC: Buffer too small, some extents are encoded, outputs are valid
+ */
+static int
+ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
+ size_t buffer_size, size_t *count, __u64 *lastbyte)
+{
+ struct pnfs_block_extent *be, *be_prev;
int ret = 0;
spin_lock(&bl->bl_ext_lock);
@@ -534,9 +596,9 @@ static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
(*count)++;
if (ext_tree_layoutupdate_size(bl, *count) > buffer_size) {
- /* keep counting.. */
+ (*count)--;
ret = -ENOSPC;
- continue;
+ break;
}
if (bl->bl_scsi_layout)
@@ -544,14 +606,30 @@ static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
else
p = encode_block_extent(be, p);
be->be_tag = EXTENT_COMMITTING;
+ be_prev = be;
+ }
+ if (!ret) {
+ *lastbyte = (bl->bl_lwb != 0) ? bl->bl_lwb - 1 : U64_MAX;
+ bl->bl_lwb = 0;
+ } else {
+ *lastbyte = be_prev->be_f_offset + be_prev->be_length;
+ *lastbyte <<= SECTOR_SHIFT;
+ *lastbyte -= 1;
}
- *lastbyte = bl->bl_lwb - 1;
- bl->bl_lwb = 0;
spin_unlock(&bl->bl_ext_lock);
return ret;
}
+/**
+ * ext_tree_prepare_commit - encode extents that need to be committed
+ * @arg: layout commit data
+ *
+ * Return values:
+ * %0: Success, all required extents are encoded
+ * %-ENOSPC: Some extents are encoded, but not all, due to RPC size limit
+ * %-ENOMEM: Out of memory, extents not encoded
+ */
int
ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg)
{
@@ -560,20 +638,18 @@ ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg)
__be32 *start_p;
int ret;
- dprintk("%s enter\n", __func__);
-
arg->layoutupdate_page = alloc_page(GFP_NOFS);
if (!arg->layoutupdate_page)
return -ENOMEM;
start_p = page_address(arg->layoutupdate_page);
arg->layoutupdate_pages = &arg->layoutupdate_page;
-retry:
- ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size, &count, &arg->lastbytewritten);
+ ret = ext_tree_try_encode_commit(bl, start_p + 1, buffer_size,
+ &count, &arg->lastbytewritten);
if (unlikely(ret)) {
ext_tree_free_commitdata(arg, buffer_size);
- buffer_size = ext_tree_layoutupdate_size(bl, count);
+ buffer_size = NFS_SERVER(arg->inode)->wsize;
count = 0;
arg->layoutupdate_pages =
@@ -588,7 +664,8 @@ retry:
return -ENOMEM;
}
- goto retry;
+ ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size,
+ &count, &arg->lastbytewritten);
}
*start_p = cpu_to_be32(count);
@@ -607,8 +684,9 @@ retry:
}
}
- dprintk("%s found %zu ranges\n", __func__, count);
- return 0;
+ trace_bl_ext_tree_prepare_commit(ret, count,
+ arg->lastbytewritten, !!ret);
+ return ret;
}
void
diff --git a/fs/nfs/blocklayout/rpc_pipefs.c b/fs/nfs/blocklayout/rpc_pipefs.c
index 6c977288cc28..d526f5ba7887 100644
--- a/fs/nfs/blocklayout/rpc_pipefs.c
+++ b/fs/nfs/blocklayout/rpc_pipefs.c
@@ -75,7 +75,7 @@ bl_resolve_deviceid(struct nfs_server *server, struct pnfs_block_volume *b,
msg->len = sizeof(*bl_msg) + b->simple.len;
msg->data = kzalloc(msg->len, gfp_mask);
if (!msg->data)
- goto out_free_data;
+ goto out_unlock;
bl_msg = msg->data;
bl_msg->type = BL_DEVICE_MOUNT;
@@ -141,24 +141,18 @@ static const struct rpc_pipe_ops bl_upcall_ops = {
.destroy_msg = bl_pipe_destroy_msg,
};
-static struct dentry *nfs4blocklayout_register_sb(struct super_block *sb,
+static int nfs4blocklayout_register_sb(struct super_block *sb,
struct rpc_pipe *pipe)
{
- struct dentry *dir, *dentry;
+ struct dentry *dir;
+ int err;
dir = rpc_d_lookup_sb(sb, NFS_PIPE_DIRNAME);
if (dir == NULL)
- return ERR_PTR(-ENOENT);
- dentry = rpc_mkpipe_dentry(dir, "blocklayout", NULL, pipe);
+ return -ENOENT;
+ err = rpc_mkpipe_dentry(dir, "blocklayout", NULL, pipe);
dput(dir);
- return dentry;
-}
-
-static void nfs4blocklayout_unregister_sb(struct super_block *sb,
- struct rpc_pipe *pipe)
-{
- if (pipe->dentry)
- rpc_unlink(pipe->dentry);
+ return err;
}
static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event,
@@ -167,7 +161,6 @@ static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event,
struct super_block *sb = ptr;
struct net *net = sb->s_fs_info;
struct nfs_net *nn = net_generic(net, nfs_net_id);
- struct dentry *dentry;
int ret = 0;
if (!try_module_get(THIS_MODULE))
@@ -180,16 +173,10 @@ static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event,
switch (event) {
case RPC_PIPEFS_MOUNT:
- dentry = nfs4blocklayout_register_sb(sb, nn->bl_device_pipe);
- if (IS_ERR(dentry)) {
- ret = PTR_ERR(dentry);
- break;
- }
- nn->bl_device_pipe->dentry = dentry;
+ ret = nfs4blocklayout_register_sb(sb, nn->bl_device_pipe);
break;
case RPC_PIPEFS_UMOUNT:
- if (nn->bl_device_pipe->dentry)
- nfs4blocklayout_unregister_sb(sb, nn->bl_device_pipe);
+ rpc_unlink(nn->bl_device_pipe);
break;
default:
ret = -ENOTSUPP;
@@ -203,18 +190,17 @@ static struct notifier_block nfs4blocklayout_block = {
.notifier_call = rpc_pipefs_event,
};
-static struct dentry *nfs4blocklayout_register_net(struct net *net,
- struct rpc_pipe *pipe)
+static int nfs4blocklayout_register_net(struct net *net, struct rpc_pipe *pipe)
{
struct super_block *pipefs_sb;
- struct dentry *dentry;
+ int ret;
pipefs_sb = rpc_get_sb_net(net);
if (!pipefs_sb)
- return NULL;
- dentry = nfs4blocklayout_register_sb(pipefs_sb, pipe);
+ return 0;
+ ret = nfs4blocklayout_register_sb(pipefs_sb, pipe);
rpc_put_sb_net(net);
- return dentry;
+ return ret;
}
static void nfs4blocklayout_unregister_net(struct net *net,
@@ -224,7 +210,7 @@ static void nfs4blocklayout_unregister_net(struct net *net,
pipefs_sb = rpc_get_sb_net(net);
if (pipefs_sb) {
- nfs4blocklayout_unregister_sb(pipefs_sb, pipe);
+ rpc_unlink(pipe);
rpc_put_sb_net(net);
}
}
@@ -232,20 +218,17 @@ static void nfs4blocklayout_unregister_net(struct net *net,
static int nfs4blocklayout_net_init(struct net *net)
{
struct nfs_net *nn = net_generic(net, nfs_net_id);
- struct dentry *dentry;
+ int err;
mutex_init(&nn->bl_mutex);
init_waitqueue_head(&nn->bl_wq);
nn->bl_device_pipe = rpc_mkpipe_data(&bl_upcall_ops, 0);
if (IS_ERR(nn->bl_device_pipe))
return PTR_ERR(nn->bl_device_pipe);
- dentry = nfs4blocklayout_register_net(net, nn->bl_device_pipe);
- if (IS_ERR(dentry)) {
+ err = nfs4blocklayout_register_net(net, nn->bl_device_pipe);
+ if (unlikely(err))
rpc_destroy_pipe_data(nn->bl_device_pipe);
- return PTR_ERR(dentry);
- }
- nn->bl_device_pipe->dentry = dentry;
- return 0;
+ return err;
}
static void nfs4blocklayout_net_exit(struct net *net)
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 456af7d230cf..c8b837006bb2 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -74,72 +74,20 @@ out_err:
static int
nfs4_callback_svc(void *vrqstp)
{
- int err;
struct svc_rqst *rqstp = vrqstp;
- set_freezable();
-
- while (!kthread_freezable_should_stop(NULL)) {
-
- if (signal_pending(current))
- flush_signals(current);
- /*
- * Listen for a request on the socket
- */
- err = svc_recv(rqstp, MAX_SCHEDULE_TIMEOUT);
- if (err == -EAGAIN || err == -EINTR)
- continue;
- svc_process(rqstp);
- }
-
- svc_exit_thread(rqstp);
- return 0;
-}
-
-#if defined(CONFIG_NFS_V4_1)
-/*
- * The callback service for NFSv4.1 callbacks
- */
-static int
-nfs41_callback_svc(void *vrqstp)
-{
- struct svc_rqst *rqstp = vrqstp;
- struct svc_serv *serv = rqstp->rq_server;
- struct rpc_rqst *req;
- int error;
- DEFINE_WAIT(wq);
+ svc_thread_init_status(rqstp, 0);
set_freezable();
- while (!kthread_freezable_should_stop(NULL)) {
-
- if (signal_pending(current))
- flush_signals(current);
-
- prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_INTERRUPTIBLE);
- spin_lock_bh(&serv->sv_cb_lock);
- if (!list_empty(&serv->sv_cb_list)) {
- req = list_first_entry(&serv->sv_cb_list,
- struct rpc_rqst, rq_bc_list);
- list_del(&req->rq_bc_list);
- spin_unlock_bh(&serv->sv_cb_lock);
- finish_wait(&serv->sv_cb_waitq, &wq);
- dprintk("Invoking bc_svc_process()\n");
- error = bc_svc_process(serv, req, rqstp);
- dprintk("bc_svc_process() returned w/ error code= %d\n",
- error);
- } else {
- spin_unlock_bh(&serv->sv_cb_lock);
- if (!kthread_should_stop())
- schedule();
- finish_wait(&serv->sv_cb_waitq, &wq);
- }
- }
+ while (!svc_thread_should_stop(rqstp))
+ svc_recv(rqstp);
svc_exit_thread(rqstp);
return 0;
}
+#if defined(CONFIG_NFS_V4_1)
static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt,
struct svc_serv *serv)
{
@@ -188,7 +136,7 @@ static void nfs_callback_down_net(u32 minorversion, struct svc_serv *serv, struc
return;
dprintk("NFS: destroy per-net callback data; net=%x\n", net->ns.inum);
- svc_xprt_destroy_all(serv, net);
+ svc_xprt_destroy_all(serv, net, false);
}
static int nfs_callback_up_net(int minorversion, struct svc_serv *serv,
@@ -205,7 +153,7 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv,
ret = svc_bind(serv, net);
if (ret < 0) {
printk(KERN_WARNING "NFS: bind callback service failed\n");
- goto err_bind;
+ goto err;
}
ret = 0;
@@ -218,13 +166,11 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv,
if (ret < 0) {
printk(KERN_ERR "NFS: callback service start failed\n");
- goto err_socks;
+ goto err;
}
return 0;
-err_socks:
- svc_rpcb_cleanup(serv, net);
-err_bind:
+err:
nn->cb_users[minorversion]--;
dprintk("NFS: Couldn't create callback socket: err = %d; "
"net = %x\n", ret, net->ns.inum);
@@ -241,7 +187,7 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion)
* Check whether we're already up and running.
*/
if (cb_info->serv)
- return svc_get(cb_info->serv);
+ return cb_info->serv;
/*
* Sanity check: if there's no task,
@@ -252,10 +198,7 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion)
cb_info->users);
threadfn = nfs4_callback_svc;
-#if defined(CONFIG_NFS_V4_1)
- if (minorversion)
- threadfn = nfs41_callback_svc;
-#else
+#if !defined(CONFIG_NFS_V4_1)
if (minorversion)
return ERR_PTR(-ENOTSUPP);
#endif
@@ -266,10 +209,6 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion)
return ERR_PTR(-ENOMEM);
}
cb_info->serv = serv;
- /* As there is only one thread we need to over-ride the
- * default maximum of 80 connections
- */
- serv->sv_maxconn = 1024;
dprintk("nfs_callback_create_svc: service created\n");
return serv;
}
@@ -302,9 +241,10 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt)
cb_info->users++;
err_net:
- if (!cb_info->users)
- cb_info->serv = NULL;
- svc_put(serv);
+ if (!cb_info->users) {
+ svc_set_num_threads(cb_info->serv, NULL, 0);
+ svc_destroy(&cb_info->serv);
+ }
err_create:
mutex_unlock(&nfs_callback_mutex);
return ret;
@@ -328,11 +268,9 @@ void nfs_callback_down(int minorversion, struct net *net)
nfs_callback_down_net(minorversion, serv, net);
cb_info->users--;
if (cb_info->users == 0) {
- svc_get(serv);
svc_set_num_threads(serv, NULL, 0);
- svc_put(serv);
dprintk("nfs_callback_down: service destroyed\n");
- cb_info->serv = NULL;
+ svc_destroy(&cb_info->serv);
}
mutex_unlock(&nfs_callback_mutex);
}
@@ -387,7 +325,7 @@ check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp)
* All other checking done after NFS decoding where the nfs_client can be
* found in nfs4_callback_compound
*/
-static int nfs_callback_authenticate(struct svc_rqst *rqstp)
+static enum svc_auth_status nfs_callback_authenticate(struct svc_rqst *rqstp)
{
rqstp->rq_auth_stat = rpc_autherr_badcred;
@@ -414,15 +352,12 @@ static const struct svc_version *nfs4_callback_version[] = {
[4] = &nfs4_callback_version4,
};
-static struct svc_stat nfs4_callback_stats;
-
static struct svc_program nfs4_callback_program = {
.pg_prog = NFS4_CALLBACK, /* RPC service number */
.pg_nvers = ARRAY_SIZE(nfs4_callback_version), /* Number of entries */
.pg_vers = nfs4_callback_version, /* version table */
.pg_name = "NFSv4 callback", /* service name */
.pg_class = "nfs", /* authentication class */
- .pg_stats = &nfs4_callback_stats,
.pg_authenticate = nfs_callback_authenticate,
.pg_init_request = svc_generic_init_request,
.pg_rpcbind_set = svc_generic_rpcbind_set,
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index ccd4f245cae2..154a6ed1299f 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -19,32 +19,14 @@ enum nfs4_callback_procnum {
CB_COMPOUND = 1,
};
-enum nfs4_callback_opnum {
- OP_CB_GETATTR = 3,
- OP_CB_RECALL = 4,
-/* Callback operations new to NFSv4.1 */
- OP_CB_LAYOUTRECALL = 5,
- OP_CB_NOTIFY = 6,
- OP_CB_PUSH_DELEG = 7,
- OP_CB_RECALL_ANY = 8,
- OP_CB_RECALLABLE_OBJ_AVAIL = 9,
- OP_CB_RECALL_SLOT = 10,
- OP_CB_SEQUENCE = 11,
- OP_CB_WANTS_CANCELLED = 12,
- OP_CB_NOTIFY_LOCK = 13,
- OP_CB_NOTIFY_DEVICEID = 14,
-/* Callback operations new to NFSv4.2 */
- OP_CB_OFFLOAD = 15,
- OP_CB_ILLEGAL = 10044,
-};
-
struct nfs4_slot;
struct cb_process_state {
- __be32 drc_status;
struct nfs_client *clp;
struct nfs4_slot *slot;
- u32 minorversion;
struct net *net;
+ u32 minorversion;
+ __be32 drc_status;
+ unsigned int referring_calls;
};
struct cb_compound_hdr_arg {
@@ -64,14 +46,15 @@ struct cb_compound_hdr_res {
struct cb_getattrargs {
struct nfs_fh fh;
- uint32_t bitmap[2];
+ uint32_t bitmap[3];
};
struct cb_getattrres {
__be32 status;
- uint32_t bitmap[2];
+ uint32_t bitmap[3];
uint64_t size;
uint64_t change_attr;
+ struct timespec64 atime;
struct timespec64 ctime;
struct timespec64 mtime;
};
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index c1eda73254e1..8397c43358bd 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -37,7 +37,7 @@ __be32 nfs4_callback_getattr(void *argp, void *resp,
if (!cps->clp) /* Always set for v4.0. Set in cb_sequence for v4.1 */
goto out;
- res->bitmap[0] = res->bitmap[1] = 0;
+ memset(res->bitmap, 0, sizeof(res->bitmap));
res->status = htonl(NFS4ERR_BADHANDLE);
dprintk_rcu("NFS: GETATTR callback request from %s\n",
@@ -59,12 +59,16 @@ __be32 nfs4_callback_getattr(void *argp, void *resp,
res->change_attr = delegation->change_attr;
if (nfs_have_writebacks(inode))
res->change_attr++;
- res->ctime = inode->i_ctime;
- res->mtime = inode->i_mtime;
- res->bitmap[0] = (FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE) &
- args->bitmap[0];
- res->bitmap[1] = (FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY) &
- args->bitmap[1];
+ res->atime = inode_get_atime(inode);
+ res->ctime = inode_get_ctime(inode);
+ res->mtime = inode_get_mtime(inode);
+ res->bitmap[0] = (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE) &
+ args->bitmap[0];
+ res->bitmap[1] = (FATTR4_WORD1_TIME_ACCESS |
+ FATTR4_WORD1_TIME_METADATA |
+ FATTR4_WORD1_TIME_MODIFY) & args->bitmap[1];
+ res->bitmap[2] = (FATTR4_WORD2_TIME_DELEG_ACCESS |
+ FATTR4_WORD2_TIME_DELEG_MODIFY) & args->bitmap[2];
res->status = 0;
out_iput:
rcu_read_unlock();
@@ -207,7 +211,8 @@ static struct inode *nfs_layout_find_inode(struct nfs_client *clp,
* Enforce RFC5661 section 12.5.5.2.1. (Layout Recall and Return Sequencing)
*/
static u32 pnfs_check_callback_stateid(struct pnfs_layout_hdr *lo,
- const nfs4_stateid *new)
+ const nfs4_stateid *new,
+ struct cb_process_state *cps)
{
u32 oldseq, newseq;
@@ -221,28 +226,29 @@ static u32 pnfs_check_callback_stateid(struct pnfs_layout_hdr *lo,
newseq = be32_to_cpu(new->seqid);
/* Are we already in a layout recall situation? */
- if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags) &&
- lo->plh_return_seq != 0) {
- if (newseq < lo->plh_return_seq)
- return NFS4ERR_OLD_STATEID;
- if (newseq > lo->plh_return_seq)
- return NFS4ERR_DELAY;
- goto out;
- }
+ if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
+ return NFS4ERR_DELAY;
- /* Check that the stateid matches what we think it should be. */
+ /*
+ * Check that the stateid matches what we think it should be.
+ * Note that if the server sent us a list of referring calls,
+ * and we know that those have completed, then we trust the
+ * stateid argument is correct.
+ */
oldseq = be32_to_cpu(lo->plh_stateid.seqid);
- if (newseq > oldseq + 1)
+ if (newseq > oldseq + 1 && !cps->referring_calls)
return NFS4ERR_DELAY;
+
/* Crazy server! */
if (newseq <= oldseq)
return NFS4ERR_OLD_STATEID;
-out:
+
return NFS_OK;
}
static u32 initiate_file_draining(struct nfs_client *clp,
- struct cb_layoutrecallargs *args)
+ struct cb_layoutrecallargs *args,
+ struct cb_process_state *cps)
{
struct inode *ino;
struct pnfs_layout_hdr *lo;
@@ -266,7 +272,7 @@ static u32 initiate_file_draining(struct nfs_client *clp,
goto out;
}
pnfs_get_layout_hdr(lo);
- rv = pnfs_check_callback_stateid(lo, &args->cbl_stateid);
+ rv = pnfs_check_callback_stateid(lo, &args->cbl_stateid, cps);
if (rv != NFS_OK)
goto unlock;
@@ -317,19 +323,21 @@ static u32 initiate_bulk_draining(struct nfs_client *clp,
int stat;
if (args->cbl_recall_type == RETURN_FSID)
- stat = pnfs_destroy_layouts_byfsid(clp, &args->cbl_fsid, true);
+ stat = pnfs_layout_destroy_byfsid(clp, &args->cbl_fsid,
+ PNFS_LAYOUT_BULK_RETURN);
else
- stat = pnfs_destroy_layouts_byclid(clp, true);
+ stat = pnfs_layout_destroy_byclid(clp, PNFS_LAYOUT_BULK_RETURN);
if (stat != 0)
return NFS4ERR_DELAY;
return NFS4ERR_NOMATCHING_LAYOUT;
}
static u32 do_callback_layoutrecall(struct nfs_client *clp,
- struct cb_layoutrecallargs *args)
+ struct cb_layoutrecallargs *args,
+ struct cb_process_state *cps)
{
if (args->cbl_recall_type == RETURN_FILE)
- return initiate_file_draining(clp, args);
+ return initiate_file_draining(clp, args, cps);
return initiate_bulk_draining(clp, args);
}
@@ -340,11 +348,12 @@ __be32 nfs4_callback_layoutrecall(void *argp, void *resp,
u32 res = NFS4ERR_OP_NOT_IN_SESSION;
if (cps->clp)
- res = do_callback_layoutrecall(cps->clp, args);
+ res = do_callback_layoutrecall(cps->clp, args, cps);
return cpu_to_be32(res);
}
-static void pnfs_recall_all_layouts(struct nfs_client *clp)
+static void pnfs_recall_all_layouts(struct nfs_client *clp,
+ struct cb_process_state *cps)
{
struct cb_layoutrecallargs args;
@@ -352,7 +361,7 @@ static void pnfs_recall_all_layouts(struct nfs_client *clp)
memset(&args, 0, sizeof(args));
args.cbl_recall_type = RETURN_ALL;
/* FIXME we ignore errors, what should we do? */
- do_callback_layoutrecall(clp, &args);
+ do_callback_layoutrecall(clp, &args, cps);
}
__be32 nfs4_callback_devicenotify(void *argp, void *resp,
@@ -450,6 +459,7 @@ static int referring_call_exists(struct nfs_client *clp,
__acquires(lock)
{
int status = 0;
+ int found = 0;
int i, j;
struct nfs4_session *session;
struct nfs4_slot_table *tbl;
@@ -478,11 +488,12 @@ static int referring_call_exists(struct nfs_client *clp,
spin_lock(lock);
if (status)
goto out;
+ found++;
}
}
out:
- return status;
+ return status < 0 ? status : found;
}
__be32 nfs4_callback_sequence(void *argp, void *resp,
@@ -493,6 +504,7 @@ __be32 nfs4_callback_sequence(void *argp, void *resp,
struct nfs4_slot_table *tbl;
struct nfs4_slot *slot;
struct nfs_client *clp;
+ int ret;
int i;
__be32 status = htonl(NFS4ERR_BADSESSION);
@@ -552,11 +564,13 @@ __be32 nfs4_callback_sequence(void *argp, void *resp,
* related callback was received before the response to the original
* call.
*/
- if (referring_call_exists(clp, args->csa_nrclists, args->csa_rclists,
- &tbl->slot_tbl_lock) < 0) {
+ ret = referring_call_exists(clp, args->csa_nrclists, args->csa_rclists,
+ &tbl->slot_tbl_lock);
+ if (ret < 0) {
status = htonl(NFS4ERR_DELAY);
goto out_unlock;
}
+ cps->referring_calls = ret;
/*
* RFC5661 20.9.3
@@ -617,7 +631,7 @@ __be32 nfs4_callback_recallany(void *argp, void *resp,
nfs_expire_unused_delegation_types(cps->clp, flags);
if (args->craa_type_mask & BIT(RCA4_TYPE_MASK_FILE_LAYOUT))
- pnfs_recall_all_layouts(cps->clp);
+ pnfs_recall_all_layouts(cps->clp, cps);
if (args->craa_type_mask & BIT(PNFS_FF_RCA4_TYPE_MASK_READ)) {
set_bit(NFS4CLNT_RECALL_ANY_LAYOUT_READ, &cps->clp->cl_state);
@@ -704,7 +718,7 @@ __be32 nfs4_callback_offload(void *data, void *dummy,
copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_KERNEL);
if (!copy)
- return htonl(NFS4ERR_SERVERFAULT);
+ return cpu_to_be32(NFS4ERR_DELAY);
spin_lock(&cps->clp->cl_lock);
rcu_read_lock();
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index d0cccddb7d08..4254ba3ee7c5 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -25,8 +25,9 @@
#define CB_OP_GETATTR_BITMAP_MAXSZ (4 * 4) // bitmap length, 3 bitmaps
#define CB_OP_GETATTR_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \
CB_OP_GETATTR_BITMAP_MAXSZ + \
- /* change, size, ctime, mtime */\
- (2 + 2 + 3 + 3) * 4)
+ /* change, size, atime, ctime,
+ * mtime, deleg_atime, deleg_mtime */\
+ (2 + 2 + 3 + 3 + 3 + 3 + 3) * 4)
#define CB_OP_RECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ)
#if defined(CONFIG_NFS_V4_1)
@@ -117,7 +118,9 @@ static __be32 decode_bitmap(struct xdr_stream *xdr, uint32_t *bitmap)
if (likely(attrlen > 0))
bitmap[0] = ntohl(*p++);
if (attrlen > 1)
- bitmap[1] = ntohl(*p);
+ bitmap[1] = ntohl(*p++);
+ if (attrlen > 2)
+ bitmap[2] = ntohl(*p);
return 0;
}
@@ -372,6 +375,8 @@ static __be32 decode_rc_list(struct xdr_stream *xdr,
rc_list->rcl_nrefcalls = ntohl(*p++);
if (rc_list->rcl_nrefcalls) {
+ if (unlikely(rc_list->rcl_nrefcalls > xdr->buf->len))
+ goto out;
p = xdr_inline_decode(xdr,
rc_list->rcl_nrefcalls * 2 * sizeof(uint32_t));
if (unlikely(p == NULL))
@@ -445,7 +450,7 @@ static __be32 decode_recallany_args(struct svc_rqst *rqstp,
void *argp)
{
struct cb_recallanyargs *args = argp;
- uint32_t bitmap[2];
+ uint32_t bitmap[3];
__be32 *p, status;
p = xdr_inline_decode(xdr, 4);
@@ -635,6 +640,13 @@ static __be32 encode_attr_time(struct xdr_stream *xdr, const struct timespec64 *
return 0;
}
+static __be32 encode_attr_atime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec64 *time)
+{
+ if (!(bitmap[1] & FATTR4_WORD1_TIME_ACCESS))
+ return 0;
+ return encode_attr_time(xdr,time);
+}
+
static __be32 encode_attr_ctime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec64 *time)
{
if (!(bitmap[1] & FATTR4_WORD1_TIME_METADATA))
@@ -649,6 +661,24 @@ static __be32 encode_attr_mtime(struct xdr_stream *xdr, const uint32_t *bitmap,
return encode_attr_time(xdr,time);
}
+static __be32 encode_attr_delegatime(struct xdr_stream *xdr,
+ const uint32_t *bitmap,
+ const struct timespec64 *time)
+{
+ if (!(bitmap[2] & FATTR4_WORD2_TIME_DELEG_ACCESS))
+ return 0;
+ return encode_attr_time(xdr,time);
+}
+
+static __be32 encode_attr_delegmtime(struct xdr_stream *xdr,
+ const uint32_t *bitmap,
+ const struct timespec64 *time)
+{
+ if (!(bitmap[2] & FATTR4_WORD2_TIME_DELEG_MODIFY))
+ return 0;
+ return encode_attr_time(xdr,time);
+}
+
static __be32 encode_compound_hdr_res(struct xdr_stream *xdr, struct cb_compound_hdr_res *hdr)
{
__be32 status;
@@ -699,10 +729,19 @@ static __be32 encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr,
status = encode_attr_size(xdr, res->bitmap, res->size);
if (unlikely(status != 0))
goto out;
+ status = encode_attr_atime(xdr, res->bitmap, &res->atime);
+ if (unlikely(status != 0))
+ goto out;
status = encode_attr_ctime(xdr, res->bitmap, &res->ctime);
if (unlikely(status != 0))
goto out;
status = encode_attr_mtime(xdr, res->bitmap, &res->mtime);
+ if (unlikely(status != 0))
+ goto out;
+ status = encode_attr_delegatime(xdr, res->bitmap, &res->atime);
+ if (unlikely(status != 0))
+ goto out;
+ status = encode_attr_delegmtime(xdr, res->bitmap, &res->mtime);
*savep = htonl((unsigned int)((char *)xdr->p - (char *)(savep+1)));
out:
return status;
@@ -945,6 +984,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp)
nfs_put_client(cps.clp);
goto out_invalidcred;
}
+ svc_xprt_set_valid(rqstp->rq_xprt);
}
cps.minorversion = hdr_arg.minorversion;
@@ -967,6 +1007,11 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp)
nops--;
}
+ if (svc_is_backchannel(rqstp) && cps.clp) {
+ rqstp->bc_to_initval = cps.clp->cl_rpcclient->cl_timeout->to_initval;
+ rqstp->bc_to_retries = cps.clp->cl_rpcclient->cl_timeout->to_retries;
+ }
+
*hdr_res.status = status;
*hdr_res.nops = htonl(nops);
nfs4_cb_free_slot(&cps);
@@ -980,14 +1025,11 @@ out_invalidcred:
}
static int
-nfs_callback_dispatch(struct svc_rqst *rqstp, __be32 *statp)
+nfs_callback_dispatch(struct svc_rqst *rqstp)
{
const struct svc_procedure *procp = rqstp->rq_procinfo;
- svcxdr_init_decode(rqstp);
- svcxdr_init_encode(rqstp);
-
- *statp = procp->pc_func(rqstp);
+ *rqstp->rq_accept_statp = procp->pc_func(rqstp);
return 1;
}
@@ -1072,7 +1114,8 @@ static const struct svc_procedure nfs4_callback_procedures1[] = {
}
};
-static unsigned int nfs4_callback_count1[ARRAY_SIZE(nfs4_callback_procedures1)];
+static DEFINE_PER_CPU_ALIGNED(unsigned long,
+ nfs4_callback_count1[ARRAY_SIZE(nfs4_callback_procedures1)]);
const struct svc_version nfs4_callback_version1 = {
.vs_vers = 1,
.vs_nproc = ARRAY_SIZE(nfs4_callback_procedures1),
@@ -1084,7 +1127,8 @@ const struct svc_version nfs4_callback_version1 = {
.vs_need_cong_ctrl = true,
};
-static unsigned int nfs4_callback_count4[ARRAY_SIZE(nfs4_callback_procedures1)];
+static DEFINE_PER_CPU_ALIGNED(unsigned long,
+ nfs4_callback_count4[ARRAY_SIZE(nfs4_callback_procedures1)]);
const struct svc_version nfs4_callback_version4 = {
.vs_vers = 4,
.vs_nproc = ARRAY_SIZE(nfs4_callback_procedures1),
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index f50e025ae406..54699299d5b1 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -38,7 +38,7 @@
#include <linux/sunrpc/bc_xprt.h>
#include <linux/nsproxy.h>
#include <linux/pid_namespace.h>
-
+#include <linux/nfslocalio.h>
#include "nfs4_fs.h"
#include "callback.h"
@@ -55,9 +55,13 @@
#define NFSDBG_FACILITY NFSDBG_CLIENT
static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq);
-static DEFINE_SPINLOCK(nfs_version_lock);
-static DEFINE_MUTEX(nfs_version_mutex);
-static LIST_HEAD(nfs_versions);
+static DEFINE_RWLOCK(nfs_version_lock);
+
+static struct nfs_subversion *nfs_version_mods[5] = {
+ [2] = NULL,
+ [3] = NULL,
+ [4] = NULL,
+};
/*
* RPC cruft for NFS
@@ -73,46 +77,41 @@ const struct rpc_program nfs_program = {
.number = NFS_PROGRAM,
.nrvers = ARRAY_SIZE(nfs_version),
.version = nfs_version,
- .stats = &nfs_rpcstat,
.pipe_dir_name = NFS_PIPE_DIRNAME,
};
-struct rpc_stat nfs_rpcstat = {
- .program = &nfs_program
-};
-
-static struct nfs_subversion *find_nfs_version(unsigned int version)
+static struct nfs_subversion *__find_nfs_version(unsigned int version)
{
struct nfs_subversion *nfs;
- spin_lock(&nfs_version_lock);
-
- list_for_each_entry(nfs, &nfs_versions, list) {
- if (nfs->rpc_ops->version == version) {
- spin_unlock(&nfs_version_lock);
- return nfs;
- }
- }
- spin_unlock(&nfs_version_lock);
- return ERR_PTR(-EPROTONOSUPPORT);
+ read_lock(&nfs_version_lock);
+ nfs = nfs_version_mods[version];
+ read_unlock(&nfs_version_lock);
+ return nfs;
}
-struct nfs_subversion *get_nfs_version(unsigned int version)
+struct nfs_subversion *find_nfs_version(unsigned int version)
{
- struct nfs_subversion *nfs = find_nfs_version(version);
+ struct nfs_subversion *nfs = __find_nfs_version(version);
- if (IS_ERR(nfs)) {
- mutex_lock(&nfs_version_mutex);
- request_module("nfsv%d", version);
- nfs = find_nfs_version(version);
- mutex_unlock(&nfs_version_mutex);
- }
+ if (!nfs && request_module("nfsv%d", version) == 0)
+ nfs = __find_nfs_version(version);
+
+ if (!nfs)
+ return ERR_PTR(-EPROTONOSUPPORT);
- if (!IS_ERR(nfs) && !try_module_get(nfs->owner))
+ if (!get_nfs_version(nfs))
return ERR_PTR(-EAGAIN);
+
return nfs;
}
+int get_nfs_version(struct nfs_subversion *nfs)
+{
+ return try_module_get(nfs->owner);
+}
+EXPORT_SYMBOL_GPL(get_nfs_version);
+
void put_nfs_version(struct nfs_subversion *nfs)
{
module_put(nfs->owner);
@@ -120,23 +119,23 @@ void put_nfs_version(struct nfs_subversion *nfs)
void register_nfs_version(struct nfs_subversion *nfs)
{
- spin_lock(&nfs_version_lock);
+ write_lock(&nfs_version_lock);
- list_add(&nfs->list, &nfs_versions);
+ nfs_version_mods[nfs->rpc_ops->version] = nfs;
nfs_version[nfs->rpc_ops->version] = nfs->rpc_vers;
- spin_unlock(&nfs_version_lock);
+ write_unlock(&nfs_version_lock);
}
EXPORT_SYMBOL_GPL(register_nfs_version);
void unregister_nfs_version(struct nfs_subversion *nfs)
{
- spin_lock(&nfs_version_lock);
+ write_lock(&nfs_version_lock);
nfs_version[nfs->rpc_ops->version] = NULL;
- list_del(&nfs->list);
+ nfs_version_mods[nfs->rpc_ops->version] = NULL;
- spin_unlock(&nfs_version_lock);
+ write_unlock(&nfs_version_lock);
}
EXPORT_SYMBOL_GPL(unregister_nfs_version);
@@ -156,7 +155,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
clp->cl_minorversion = cl_init->minorversion;
clp->cl_nfs_mod = cl_init->nfs_mod;
- if (!try_module_get(clp->cl_nfs_mod->owner))
+ if (!get_nfs_version(clp->cl_nfs_mod))
goto error_dealloc;
clp->rpc_ops = clp->cl_nfs_mod->rpc_ops;
@@ -181,9 +180,17 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
clp->cl_proto = cl_init->proto;
clp->cl_nconnect = cl_init->nconnect;
clp->cl_max_connect = cl_init->max_connect ? cl_init->max_connect : 1;
- clp->cl_net = get_net(cl_init->net);
+ clp->cl_net = get_net_track(cl_init->net, &clp->cl_ns_tracker, GFP_KERNEL);
+
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+ seqlock_init(&clp->cl_boot_lock);
+ ktime_get_real_ts64(&clp->cl_nfssvc_boot);
+ nfs_uuid_init(&clp->cl_uuid);
+ INIT_WORK(&clp->cl_local_probe_work, nfs_local_probe_async_work);
+#endif /* CONFIG_NFS_LOCALIO */
clp->cl_principal = "*";
+ clp->cl_xprtsec = cl_init->xprtsec;
return clp;
error_cleanup:
@@ -237,15 +244,17 @@ static void pnfs_init_server(struct nfs_server *server)
*/
void nfs_free_client(struct nfs_client *clp)
{
+ nfs_localio_disable_client(clp);
+
/* -EIO all pending I/O */
if (!IS_ERR(clp->cl_rpcclient))
rpc_shutdown_client(clp->cl_rpcclient);
- put_net(clp->cl_net);
+ put_net_track(clp->cl_net, &clp->cl_ns_tracker);
put_nfs_version(clp->cl_nfs_mod);
kfree(clp->cl_hostname);
kfree(clp->cl_acceptor);
- kfree(clp);
+ kfree_rcu(clp, rcu);
}
EXPORT_SYMBOL_GPL(nfs_free_client);
@@ -326,6 +335,18 @@ again:
sap))
continue;
+ /* Match the xprt security policy */
+ if (clp->cl_xprtsec.policy != data->xprtsec.policy)
+ continue;
+ if (clp->cl_xprtsec.policy == RPC_XPRTSEC_TLS_X509) {
+ if (clp->cl_xprtsec.cert_serial !=
+ data->xprtsec.cert_serial)
+ continue;
+ if (clp->cl_xprtsec.privkey_serial !=
+ data->xprtsec.privkey_serial)
+ continue;
+ }
+
refcount_inc(&clp->cl_count);
return clp;
}
@@ -424,7 +445,10 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init)
list_add_tail(&new->cl_share_link,
&nn->nfs_client_list);
spin_unlock(&nn->nfs_client_lock);
- return rpc_ops->init_client(new, cl_init);
+ new = rpc_ops->init_client(new, cl_init);
+ if (!IS_ERR(new))
+ nfs_local_probe_async(new);
+ return new;
}
spin_unlock(&nn->nfs_client_lock);
@@ -458,6 +482,7 @@ void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
switch (proto) {
case XPRT_TRANSPORT_TCP:
+ case XPRT_TRANSPORT_TCP_TLS:
case XPRT_TRANSPORT_RDMA:
if (retrans == NFS_UNSPEC_RETRANS)
to->to_retries = NFS_DEF_TCP_RETRANS;
@@ -496,6 +521,7 @@ int nfs_create_rpc_client(struct nfs_client *clp,
const struct nfs_client_initdata *cl_init,
rpc_authflavor_t flavor)
{
+ struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
struct rpc_clnt *clnt = NULL;
struct rpc_create_args args = {
.net = clp->cl_net,
@@ -507,9 +533,13 @@ int nfs_create_rpc_client(struct nfs_client *clp,
.servername = clp->cl_hostname,
.nodename = cl_init->nodename,
.program = &nfs_program,
+ .stats = &nn->rpcstats,
.version = clp->rpc_ops->version,
.authflavor = flavor,
.cred = cl_init->cred,
+ .xprtsec = cl_init->xprtsec,
+ .connect_timeout = cl_init->connect_timeout,
+ .reconnect_timeout = cl_init->reconnect_timeout,
};
if (test_bit(NFS_CS_DISCRTRY, &clp->cl_flags))
@@ -524,6 +554,8 @@ int nfs_create_rpc_client(struct nfs_client *clp,
args.flags |= RPC_CLNT_CREATE_NOPING;
if (test_bit(NFS_CS_REUSEPORT, &clp->cl_flags))
args.flags |= RPC_CLNT_CREATE_REUSEPORT;
+ if (test_bit(NFS_CS_NETUNREACH_FATAL, &clp->cl_flags))
+ args.flags |= RPC_CLNT_CREATE_NETUNREACH_FATAL;
if (!IS_ERR(clp->cl_rpcclient))
return 0;
@@ -592,6 +624,7 @@ static int nfs_start_lockd(struct nfs_server *server)
server->nlm_host = host;
server->destroy = nfs_destroy_server;
+ nfs_sysfs_link_rpc_client(server, nlmclnt_rpc_clnt(host), NULL);
return 0;
}
@@ -621,6 +654,7 @@ int nfs_init_server_rpcclient(struct nfs_server *server,
if (server->flags & NFS_MOUNT_SOFT)
server->client->cl_softrtry = 1;
+ nfs_sysfs_link_rpc_client(server, server->client, NULL);
return 0;
}
EXPORT_SYMBOL_GPL(nfs_init_server_rpcclient);
@@ -656,6 +690,44 @@ struct nfs_client *nfs_init_client(struct nfs_client *clp,
}
EXPORT_SYMBOL_GPL(nfs_init_client);
+static void nfs4_server_set_init_caps(struct nfs_server *server)
+{
+#if IS_ENABLED(CONFIG_NFS_V4)
+ /* Set the basic capabilities */
+ server->caps = server->nfs_client->cl_mvops->init_caps;
+ if (server->flags & NFS_MOUNT_NORDIRPLUS)
+ server->caps &= ~NFS_CAP_READDIRPLUS;
+ if (server->nfs_client->cl_proto == XPRT_TRANSPORT_RDMA)
+ server->caps &= ~NFS_CAP_READ_PLUS;
+
+ /*
+ * Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower
+ * authentication.
+ */
+ if (nfs4_disable_idmapping &&
+ server->client->cl_auth->au_flavor == RPC_AUTH_UNIX)
+ server->caps |= NFS_CAP_UIDGID_NOMAP;
+#endif
+}
+
+void nfs_server_set_init_caps(struct nfs_server *server)
+{
+ switch (server->nfs_client->rpc_ops->version) {
+ case 2:
+ server->caps = NFS_CAP_HARDLINKS | NFS_CAP_SYMLINKS;
+ break;
+ case 3:
+ server->caps = NFS_CAP_HARDLINKS | NFS_CAP_SYMLINKS;
+ if (!(server->flags & NFS_MOUNT_NORDIRPLUS))
+ server->caps |= NFS_CAP_READDIRPLUS;
+ break;
+ default:
+ nfs4_server_set_init_caps(server);
+ break;
+ }
+}
+EXPORT_SYMBOL_GPL(nfs_server_set_init_caps);
+
/*
* Create a version 2 or 3 client
*/
@@ -675,6 +747,7 @@ static int nfs_init_server(struct nfs_server *server,
.cred = server->cred,
.nconnect = ctx->nfs_server.nconnect,
.init_flags = (1UL << NFS_CS_REUSEPORT),
+ .xprtsec = ctx->xprtsec,
};
struct nfs_client *clp;
int error;
@@ -684,17 +757,21 @@ static int nfs_init_server(struct nfs_server *server,
if (ctx->flags & NFS_MOUNT_NORESVPORT)
set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
+ if (ctx->flags & NFS_MOUNT_NETUNREACH_FATAL)
+ __set_bit(NFS_CS_NETUNREACH_FATAL, &cl_init.init_flags);
+
/* Allocate or find a client reference we can use */
clp = nfs_get_client(&cl_init);
if (IS_ERR(clp))
return PTR_ERR(clp);
server->nfs_client = clp;
+ nfs_sysfs_add_server(server);
+ nfs_sysfs_link_rpc_client(server, clp->cl_rpcclient, "_state");
/* Initialise the client representation from the mount data */
server->flags = ctx->flags;
server->options = ctx->options;
- server->caps |= NFS_CAP_HARDLINKS | NFS_CAP_SYMLINKS;
switch (clp->rpc_ops->version) {
case 2:
@@ -730,6 +807,8 @@ static int nfs_init_server(struct nfs_server *server,
if (error < 0)
goto error;
+ nfs_server_set_init_caps(server);
+
/* Preserve the values of mount_server-related mount options */
if (ctx->mount_server.addrlen) {
memcpy(&server->mountd_address, &ctx->mount_server.address,
@@ -782,7 +861,6 @@ static void nfs_server_set_fsinfo(struct nfs_server *server,
server->wsize = max_rpc_payload;
if (server->wsize > NFS_MAX_FILE_IO_SIZE)
server->wsize = NFS_MAX_FILE_IO_SIZE;
- server->wpages = (server->wsize + PAGE_SIZE - 1) >> PAGE_SHIFT;
server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL);
@@ -799,7 +877,6 @@ static void nfs_server_set_fsinfo(struct nfs_server *server,
server->maxfilesize = fsinfo->maxfilesize;
- server->time_delta = fsinfo->time_delta;
server->change_attr_type = fsinfo->change_attr_type;
server->clone_blksize = fsinfo->clone_blksize;
@@ -819,6 +896,8 @@ static void nfs_server_set_fsinfo(struct nfs_server *server,
if (fsinfo->xattr_support)
server->caps |= NFS_CAP_XATTR;
+ else
+ server->caps &= ~NFS_CAP_XATTR;
#endif
}
@@ -904,7 +983,6 @@ void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *sour
target->acregmax = source->acregmax;
target->acdirmin = source->acdirmin;
target->acdirmax = source->acdirmax;
- target->caps = source->caps;
target->options = source->options;
target->auth_info = source->auth_info;
target->port = source->port;
@@ -944,6 +1022,8 @@ void nfs_server_remove_lists(struct nfs_server *server)
}
EXPORT_SYMBOL_GPL(nfs_server_remove_lists);
+static DEFINE_IDA(s_sysfs_ids);
+
/*
* Allocate and initialise a server record
*/
@@ -955,6 +1035,12 @@ struct nfs_server *nfs_alloc_server(void)
if (!server)
return NULL;
+ server->s_sysfs_id = ida_alloc(&s_sysfs_ids, GFP_KERNEL);
+ if (server->s_sysfs_id < 0) {
+ kfree(server);
+ return NULL;
+ }
+
server->client = server->client_acl = ERR_PTR(-EINVAL);
/* Zero out the NFS state stuff */
@@ -964,8 +1050,10 @@ struct nfs_server *nfs_alloc_server(void)
INIT_LIST_HEAD(&server->layouts);
INIT_LIST_HEAD(&server->state_owners_lru);
INIT_LIST_HEAD(&server->ss_copies);
+ INIT_LIST_HEAD(&server->ss_src_copies);
atomic_set(&server->active, 0);
+ atomic_long_set(&server->nr_active_delegations, 0);
server->io_stats = nfs_alloc_iostats();
if (!server->io_stats) {
@@ -975,8 +1063,11 @@ struct nfs_server *nfs_alloc_server(void)
server->change_attr_type = NFS4_CHANGE_TYPE_IS_UNDEFINED;
- ida_init(&server->openowner_id);
- ida_init(&server->lockowner_id);
+ init_waitqueue_head(&server->write_congestion_wait);
+ atomic_long_set(&server->writeback, 0);
+
+ atomic64_set(&server->owner_ctr, 0);
+
pnfs_init_server(server);
rpc_init_wait_queue(&server->uoc_rpcwaitq, "NFS UOC");
@@ -984,6 +1075,14 @@ struct nfs_server *nfs_alloc_server(void)
}
EXPORT_SYMBOL_GPL(nfs_alloc_server);
+static void delayed_free(struct rcu_head *p)
+{
+ struct nfs_server *server = container_of(p, struct nfs_server, rcu);
+
+ nfs_free_iostats(server->io_stats);
+ kfree(server);
+}
+
/*
* Free up a server record
*/
@@ -1001,12 +1100,15 @@ void nfs_free_server(struct nfs_server *server)
nfs_put_client(server->nfs_client);
- ida_destroy(&server->lockowner_id);
- ida_destroy(&server->openowner_id);
- nfs_free_iostats(server->io_stats);
+ if (server->kobj.state_initialized) {
+ nfs_sysfs_remove_server(server);
+ kobject_put(&server->kobj);
+ }
+ ida_free(&s_sysfs_ids, server->s_sysfs_id);
+
put_cred(server->cred);
- kfree(server);
nfs_release_automount_timer();
+ call_rcu(&server->rcu, delayed_free);
}
EXPORT_SYMBOL_GPL(nfs_free_server);
@@ -1050,6 +1152,8 @@ struct nfs_server *nfs_create_server(struct fs_context *fc)
if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN)
server->namelen = NFS2_MAXNAMLEN;
}
+ /* Linux 'subtree_check' borkenness mandates this setting */
+ server->fh_expire_type = NFS_FH_VOL_RENAME;
if (!(fattr->valid & NFS_ATTR_FATTR)) {
error = ctx->nfs_mod->rpc_ops->getattr(server, ctx->mntfh,
@@ -1102,12 +1206,19 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
server->fsid = fattr->fsid;
+ nfs_sysfs_add_server(server);
+
+ nfs_sysfs_link_rpc_client(server,
+ server->nfs_client->cl_rpcclient, "_state");
+
error = nfs_init_server_rpcclient(server,
source->client->cl_timeout,
flavor);
if (error < 0)
goto out_free_server;
+ nfs_server_set_init_caps(server);
+
/* probe the filesystem info for this server filesystem */
error = nfs_probe_server(server, fh);
if (error < 0)
@@ -1140,8 +1251,14 @@ void nfs_clients_init(struct net *net)
#if IS_ENABLED(CONFIG_NFS_V4)
idr_init(&nn->cb_ident_idr);
#endif
+#if IS_ENABLED(CONFIG_NFS_V4_1)
+ INIT_LIST_HEAD(&nn->nfs4_data_server_cache);
+ spin_lock_init(&nn->nfs4_data_server_lock);
+#endif
spin_lock_init(&nn->nfs_client_lock);
nn->boot_time = ktime_get_real();
+ memset(&nn->rpcstats, 0, sizeof(nn->rpcstats));
+ nn->rpcstats.program = &nfs_program;
nfs_netns_sysfs_setup(nn, net);
}
@@ -1154,6 +1271,9 @@ void nfs_clients_exit(struct net *net)
nfs_cleanup_cb_ident_idr(net);
WARN_ON_ONCE(!list_empty(&nn->nfs_client_list));
WARN_ON_ONCE(!list_empty(&nn->nfs_volume_list));
+#if IS_ENABLED(CONFIG_NFS_V4_1)
+ WARN_ON_ONCE(!list_empty(&nn->nfs4_data_server_cache));
+#endif
}
#ifdef CONFIG_PROC_FS
@@ -1385,6 +1505,7 @@ error_0:
void nfs_fs_proc_exit(void)
{
remove_proc_subtree("fs/nfsfs", NULL);
+ ida_destroy(&s_sysfs_ids);
}
#endif /* CONFIG_PROC_FS */
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index cf7365581031..9d3a5f29f17f 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -27,8 +27,15 @@
#define NFS_DEFAULT_DELEGATION_WATERMARK (5000U)
-static atomic_long_t nfs_active_delegations;
static unsigned nfs_delegation_watermark = NFS_DEFAULT_DELEGATION_WATERMARK;
+module_param_named(delegation_watermark, nfs_delegation_watermark, uint, 0644);
+
+static struct hlist_head *nfs_delegation_hash(struct nfs_server *server,
+ const struct nfs_fh *fhandle)
+{
+ return server->delegation_hash_table +
+ (nfs_fhandle_hash(fhandle) & server->delegation_hash_mask);
+}
static void __nfs_free_delegation(struct nfs_delegation *delegation)
{
@@ -37,11 +44,12 @@ static void __nfs_free_delegation(struct nfs_delegation *delegation)
kfree_rcu(delegation, rcu);
}
-static void nfs_mark_delegation_revoked(struct nfs_delegation *delegation)
+static void nfs_mark_delegation_revoked(struct nfs_server *server,
+ struct nfs_delegation *delegation)
{
if (!test_and_set_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) {
delegation->stateid.type = NFS4_INVALID_STATEID_TYPE;
- atomic_long_dec(&nfs_active_delegations);
+ atomic_long_dec(&server->nr_active_delegations);
if (!test_bit(NFS_DELEGATION_RETURNING, &delegation->flags))
nfs_clear_verifier_delegated(delegation->inode);
}
@@ -59,9 +67,10 @@ static void nfs_put_delegation(struct nfs_delegation *delegation)
__nfs_free_delegation(delegation);
}
-static void nfs_free_delegation(struct nfs_delegation *delegation)
+static void nfs_free_delegation(struct nfs_server *server,
+ struct nfs_delegation *delegation)
{
- nfs_mark_delegation_revoked(delegation);
+ nfs_mark_delegation_revoked(server, delegation);
nfs_put_delegation(delegation);
}
@@ -79,14 +88,14 @@ static void nfs_mark_return_delegation(struct nfs_server *server,
struct nfs_delegation *delegation)
{
set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
+ set_bit(NFS4SERV_DELEGRETURN, &server->delegation_flags);
set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state);
}
-static bool
-nfs4_is_valid_delegation(const struct nfs_delegation *delegation,
- fmode_t flags)
+static bool nfs4_is_valid_delegation(const struct nfs_delegation *delegation,
+ fmode_t type)
{
- if (delegation != NULL && (delegation->type & flags) == flags &&
+ if (delegation != NULL && (delegation->type & type) == type &&
!test_bit(NFS_DELEGATION_REVOKED, &delegation->flags) &&
!test_bit(NFS_DELEGATION_RETURNING, &delegation->flags))
return true;
@@ -103,19 +112,22 @@ struct nfs_delegation *nfs4_get_valid_delegation(const struct inode *inode)
return NULL;
}
-static int
-nfs4_do_check_delegation(struct inode *inode, fmode_t flags, bool mark)
+static int nfs4_do_check_delegation(struct inode *inode, fmode_t type,
+ int flags, bool mark)
{
struct nfs_delegation *delegation;
int ret = 0;
- flags &= FMODE_READ|FMODE_WRITE;
+ type &= FMODE_READ|FMODE_WRITE;
rcu_read_lock();
delegation = rcu_dereference(NFS_I(inode)->delegation);
- if (nfs4_is_valid_delegation(delegation, flags)) {
+ if (nfs4_is_valid_delegation(delegation, type)) {
if (mark)
nfs_mark_delegation_referenced(delegation);
ret = 1;
+ if ((flags & NFS_DELEGATION_FLAG_TIME) &&
+ !test_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags))
+ ret = 0;
}
rcu_read_unlock();
return ret;
@@ -124,22 +136,23 @@ nfs4_do_check_delegation(struct inode *inode, fmode_t flags, bool mark)
* nfs4_have_delegation - check if inode has a delegation, mark it
* NFS_DELEGATION_REFERENCED if there is one.
* @inode: inode to check
- * @flags: delegation types to check for
+ * @type: delegation types to check for
+ * @flags: various modifiers
*
* Returns one if inode has the indicated delegation, otherwise zero.
*/
-int nfs4_have_delegation(struct inode *inode, fmode_t flags)
+int nfs4_have_delegation(struct inode *inode, fmode_t type, int flags)
{
- return nfs4_do_check_delegation(inode, flags, true);
+ return nfs4_do_check_delegation(inode, type, flags, true);
}
/*
* nfs4_check_delegation - check if inode has a delegation, do not mark
* NFS_DELEGATION_REFERENCED if it has one.
*/
-int nfs4_check_delegation(struct inode *inode, fmode_t flags)
+int nfs4_check_delegation(struct inode *inode, fmode_t type)
{
- return nfs4_do_check_delegation(inode, flags, false);
+ return nfs4_do_check_delegation(inode, type, 0, false);
}
static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_stateid *stateid)
@@ -156,8 +169,8 @@ static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_state
list = &flctx->flc_posix;
spin_lock(&flctx->flc_lock);
restart:
- list_for_each_entry(fl, list, fl_list) {
- if (nfs_file_open_context(fl->fl_file)->state != state)
+ for_each_file_lock(fl, list) {
+ if (nfs_file_open_context(fl->c.flc_file)->state != state)
continue;
spin_unlock(&flctx->flc_lock);
status = nfs4_lock_delegation_recall(fl, state, stateid);
@@ -181,7 +194,6 @@ static int nfs_delegation_claim_opens(struct inode *inode,
struct nfs_open_context *ctx;
struct nfs4_state_owner *sp;
struct nfs4_state *state;
- unsigned int seq;
int err;
again:
@@ -202,12 +214,9 @@ again:
sp = state->owner;
/* Block nfs4_proc_unlck */
mutex_lock(&sp->so_delegreturn_mutex);
- seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
err = nfs4_open_delegation_recall(ctx, state, stateid);
if (!err)
err = nfs_delegation_claim_locks(state, stateid);
- if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
- err = -EAGAIN;
mutex_unlock(&sp->so_delegreturn_mutex);
put_nfs_open_context(ctx);
if (err != 0)
@@ -225,40 +234,51 @@ again:
* @type: delegation type
* @stateid: delegation stateid
* @pagemod_limit: write delegation "space_limit"
+ * @deleg_type: raw delegation type
*
*/
void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred,
fmode_t type, const nfs4_stateid *stateid,
- unsigned long pagemod_limit)
+ unsigned long pagemod_limit, u32 deleg_type)
{
struct nfs_delegation *delegation;
const struct cred *oldcred = NULL;
rcu_read_lock();
delegation = rcu_dereference(NFS_I(inode)->delegation);
- if (delegation != NULL) {
- spin_lock(&delegation->lock);
- nfs4_stateid_copy(&delegation->stateid, stateid);
- delegation->type = type;
- delegation->pagemod_limit = pagemod_limit;
- oldcred = delegation->cred;
- delegation->cred = get_cred(cred);
- clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags);
- if (test_and_clear_bit(NFS_DELEGATION_REVOKED,
- &delegation->flags))
- atomic_long_inc(&nfs_active_delegations);
- spin_unlock(&delegation->lock);
- rcu_read_unlock();
- put_cred(oldcred);
- trace_nfs4_reclaim_delegation(inode, type);
- } else {
+ if (!delegation) {
rcu_read_unlock();
nfs_inode_set_delegation(inode, cred, type, stateid,
- pagemod_limit);
+ pagemod_limit, deleg_type);
+ return;
}
+
+ spin_lock(&delegation->lock);
+ nfs4_stateid_copy(&delegation->stateid, stateid);
+ delegation->type = type;
+ delegation->pagemod_limit = pagemod_limit;
+ oldcred = delegation->cred;
+ delegation->cred = get_cred(cred);
+ switch (deleg_type) {
+ case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG:
+ case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG:
+ set_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags);
+ break;
+ default:
+ clear_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags);
+ }
+ clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags);
+ if (test_and_clear_bit(NFS_DELEGATION_REVOKED, &delegation->flags))
+ atomic_long_inc(&NFS_SERVER(inode)->nr_active_delegations);
+ spin_unlock(&delegation->lock);
+ rcu_read_unlock();
+ put_cred(oldcred);
+ trace_nfs4_reclaim_delegation(inode, type);
}
-static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync)
+static int nfs_do_return_delegation(struct inode *inode,
+ struct nfs_delegation *delegation,
+ int issync)
{
const struct cred *cred;
int res = 0;
@@ -267,9 +287,8 @@ static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *
spin_lock(&delegation->lock);
cred = get_cred(delegation->cred);
spin_unlock(&delegation->lock);
- res = nfs4_proc_delegreturn(inode, cred,
- &delegation->stateid,
- issync);
+ res = nfs4_proc_delegreturn(inode, cred, &delegation->stateid,
+ delegation, issync);
put_cred(cred);
}
return res;
@@ -297,7 +316,8 @@ nfs_start_delegation_return_locked(struct nfs_inode *nfsi)
if (delegation == NULL)
goto out;
spin_lock(&delegation->lock);
- if (!test_and_set_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) {
+ if (delegation->inode &&
+ !test_and_set_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) {
clear_bit(NFS_DELEGATION_RETURN_DELAYED, &delegation->flags);
/* Refcount matched in nfs_end_delegation_return() */
ret = nfs_get_delegation(delegation);
@@ -321,14 +341,16 @@ nfs_start_delegation_return(struct nfs_inode *nfsi)
}
static void nfs_abort_delegation_return(struct nfs_delegation *delegation,
- struct nfs_client *clp, int err)
+ struct nfs_server *server, int err)
{
-
spin_lock(&delegation->lock);
clear_bit(NFS_DELEGATION_RETURNING, &delegation->flags);
if (err == -EAGAIN) {
set_bit(NFS_DELEGATION_RETURN_DELAYED, &delegation->flags);
- set_bit(NFS4CLNT_DELEGRETURN_DELAYED, &clp->cl_state);
+ set_bit(NFS4SERV_DELEGRETURN_DELAYED,
+ &server->delegation_flags);
+ set_bit(NFS4CLNT_DELEGRETURN_DELAYED,
+ &server->nfs_client->cl_state);
}
spin_unlock(&delegation->lock);
}
@@ -342,6 +364,8 @@ nfs_detach_delegation_locked(struct nfs_inode *nfsi,
rcu_dereference_protected(nfsi->delegation,
lockdep_is_held(&clp->cl_lock));
+ trace_nfs4_detach_delegation(&nfsi->vfs_inode, delegation->type);
+
if (deleg_cur == NULL || delegation != deleg_cur)
return NULL;
@@ -350,6 +374,7 @@ nfs_detach_delegation_locked(struct nfs_inode *nfsi,
spin_unlock(&delegation->lock);
return NULL;
}
+ hlist_del_init_rcu(&delegation->hash);
list_del_rcu(&delegation->super_list);
delegation->inode = NULL;
rcu_assign_pointer(nfsi->delegation, NULL);
@@ -397,7 +422,8 @@ nfs_update_delegation_cred(struct nfs_delegation *delegation,
}
static void
-nfs_update_inplace_delegation(struct nfs_delegation *delegation,
+nfs_update_inplace_delegation(struct nfs_server *server,
+ struct nfs_delegation *delegation,
const struct nfs_delegation *update)
{
if (nfs4_stateid_is_newer(&update->stateid, &delegation->stateid)) {
@@ -410,7 +436,7 @@ nfs_update_inplace_delegation(struct nfs_delegation *delegation,
nfs_update_delegation_cred(delegation, update->cred);
/* smp_mb__before_atomic() is implicit due to xchg() */
clear_bit(NFS_DELEGATION_REVOKED, &delegation->flags);
- atomic_long_inc(&nfs_active_delegations);
+ atomic_long_inc(&server->nr_active_delegations);
}
}
}
@@ -422,13 +448,13 @@ nfs_update_inplace_delegation(struct nfs_delegation *delegation,
* @type: delegation type
* @stateid: delegation stateid
* @pagemod_limit: write delegation "space_limit"
+ * @deleg_type: raw delegation type
*
* Returns zero on success, or a negative errno value.
*/
int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred,
- fmode_t type,
- const nfs4_stateid *stateid,
- unsigned long pagemod_limit)
+ fmode_t type, const nfs4_stateid *stateid,
+ unsigned long pagemod_limit, u32 deleg_type)
{
struct nfs_server *server = NFS_SERVER(inode);
struct nfs_client *clp = server->nfs_client;
@@ -448,6 +474,12 @@ int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred,
delegation->cred = get_cred(cred);
delegation->inode = inode;
delegation->flags = 1<<NFS_DELEGATION_REFERENCED;
+ switch (deleg_type) {
+ case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG:
+ case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG:
+ delegation->flags |= BIT(NFS_DELEGATION_DELEGTIME);
+ }
+ delegation->test_gen = 0;
spin_lock_init(&delegation->lock);
spin_lock(&clp->cl_lock);
@@ -459,7 +491,7 @@ int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred,
if (nfs4_stateid_match_other(&old_delegation->stateid,
&delegation->stateid)) {
spin_lock(&old_delegation->lock);
- nfs_update_inplace_delegation(old_delegation,
+ nfs_update_inplace_delegation(server, old_delegation,
delegation);
spin_unlock(&old_delegation->lock);
goto out;
@@ -505,19 +537,26 @@ add_new:
spin_unlock(&inode->i_lock);
list_add_tail_rcu(&delegation->super_list, &server->delegations);
+ hlist_add_head_rcu(&delegation->hash,
+ nfs_delegation_hash(server, &NFS_I(inode)->fh));
rcu_assign_pointer(nfsi->delegation, delegation);
delegation = NULL;
- atomic_long_inc(&nfs_active_delegations);
+ atomic_long_inc(&server->nr_active_delegations);
trace_nfs4_set_delegation(inode, type);
+
+ /* If we hold writebacks and have delegated mtime then update */
+ if (deleg_type == NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG &&
+ nfs_have_writebacks(inode))
+ nfs_update_delegated_mtime(inode);
out:
spin_unlock(&clp->cl_lock);
if (delegation != NULL)
__nfs_free_delegation(delegation);
if (freeme != NULL) {
nfs_do_return_delegation(inode, freeme, 0);
- nfs_free_delegation(freeme);
+ nfs_free_delegation(server, freeme);
}
return status;
}
@@ -527,7 +566,7 @@ out:
*/
static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation *delegation, int issync)
{
- struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+ struct nfs_server *server = NFS_SERVER(inode);
unsigned int mode = O_WRONLY | O_RDWR;
int err = 0;
@@ -549,11 +588,11 @@ static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation
/*
* Guard against state recovery
*/
- err = nfs4_wait_clnt_recover(clp);
+ err = nfs4_wait_clnt_recover(server->nfs_client);
}
if (err) {
- nfs_abort_delegation_return(delegation, clp, err);
+ nfs_abort_delegation_return(delegation, server, err);
goto out;
}
@@ -568,19 +607,10 @@ static bool nfs_delegation_need_return(struct nfs_delegation *delegation)
{
bool ret = false;
+ trace_nfs_delegation_need_return(delegation);
+
if (test_and_clear_bit(NFS_DELEGATION_RETURN, &delegation->flags))
ret = true;
- else if (test_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags)) {
- struct inode *inode;
-
- spin_lock(&delegation->lock);
- inode = delegation->inode;
- if (inode && list_empty(&NFS_I(inode)->open_files))
- ret = true;
- spin_unlock(&delegation->lock);
- }
- if (ret)
- clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags);
if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags) ||
test_bit(NFS_DELEGATION_RETURN_DELAYED, &delegation->flags) ||
test_bit(NFS_DELEGATION_REVOKED, &delegation->flags))
@@ -599,6 +629,9 @@ static int nfs_server_return_marked_delegations(struct nfs_server *server,
struct nfs_delegation *place_holder_deleg = NULL;
int err = 0;
+ if (!test_and_clear_bit(NFS4SERV_DELEGRETURN,
+ &server->delegation_flags))
+ return 0;
restart:
/*
* To avoid quadratic looping we hold a reference
@@ -627,6 +660,9 @@ restart:
prev = delegation;
continue;
}
+ inode = nfs_delegation_grab_inode(delegation);
+ if (inode == NULL)
+ continue;
if (prev) {
struct inode *tmp = nfs_delegation_grab_inode(prev);
@@ -637,12 +673,6 @@ restart:
}
}
- inode = nfs_delegation_grab_inode(delegation);
- if (inode == NULL) {
- rcu_read_unlock();
- iput(to_put);
- goto restart;
- }
delegation = nfs_start_delegation_return_locked(NFS_I(inode));
rcu_read_unlock();
@@ -653,6 +683,7 @@ restart:
cond_resched();
if (!err)
goto restart;
+ set_bit(NFS4SERV_DELEGRETURN, &server->delegation_flags);
set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state);
goto out;
}
@@ -667,6 +698,9 @@ static bool nfs_server_clear_delayed_delegations(struct nfs_server *server)
struct nfs_delegation *d;
bool ret = false;
+ if (!test_and_clear_bit(NFS4SERV_DELEGRETURN_DELAYED,
+ &server->delegation_flags))
+ goto out;
list_for_each_entry_rcu (d, &server->delegations, super_list) {
if (!test_bit(NFS_DELEGATION_RETURN_DELAYED, &d->flags))
continue;
@@ -674,6 +708,7 @@ static bool nfs_server_clear_delayed_delegations(struct nfs_server *server)
clear_bit(NFS_DELEGATION_RETURN_DELAYED, &d->flags);
ret = true;
}
+out:
return ret;
}
@@ -733,7 +768,7 @@ void nfs_inode_evict_delegation(struct inode *inode)
set_bit(NFS_DELEGATION_RETURNING, &delegation->flags);
set_bit(NFS_DELEGATION_INODE_FREEING, &delegation->flags);
nfs_do_return_delegation(inode, delegation, 1);
- nfs_free_delegation(delegation);
+ nfs_free_delegation(NFS_SERVER(inode), delegation);
}
}
@@ -764,6 +799,43 @@ int nfs4_inode_return_delegation(struct inode *inode)
}
/**
+ * nfs4_inode_set_return_delegation_on_close - asynchronously return a delegation
+ * @inode: inode to process
+ *
+ * This routine is called to request that the delegation be returned as soon
+ * as the file is closed. If the file is already closed, the delegation is
+ * immediately returned.
+ */
+void nfs4_inode_set_return_delegation_on_close(struct inode *inode)
+{
+ struct nfs_delegation *delegation;
+ struct nfs_delegation *ret = NULL;
+
+ if (!inode)
+ return;
+ rcu_read_lock();
+ delegation = nfs4_get_valid_delegation(inode);
+ if (!delegation)
+ goto out;
+ spin_lock(&delegation->lock);
+ if (!delegation->inode)
+ goto out_unlock;
+ if (list_empty(&NFS_I(inode)->open_files) &&
+ !test_and_set_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) {
+ /* Refcount matched in nfs_end_delegation_return() */
+ ret = nfs_get_delegation(delegation);
+ } else
+ set_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags);
+out_unlock:
+ spin_unlock(&delegation->lock);
+ if (ret)
+ nfs_clear_verifier_delegated(inode);
+out:
+ rcu_read_unlock();
+ nfs_end_delegation_return(inode, ret, 0);
+}
+
+/**
* nfs4_inode_return_delegation_on_close - asynchronously return a delegation
* @inode: inode to process
*
@@ -782,7 +854,8 @@ void nfs4_inode_return_delegation_on_close(struct inode *inode)
if (!delegation)
goto out;
if (test_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags) ||
- atomic_long_read(&nfs_active_delegations) >= nfs_delegation_watermark) {
+ atomic_long_read(&NFS_SERVER(inode)->nr_active_delegations) >=
+ nfs_delegation_watermark) {
spin_lock(&delegation->lock);
if (delegation->inode &&
list_empty(&NFS_I(inode)->open_files) &&
@@ -824,11 +897,25 @@ int nfs4_inode_make_writeable(struct inode *inode)
return nfs4_inode_return_delegation(inode);
}
-static void nfs_mark_return_if_closed_delegation(struct nfs_server *server,
- struct nfs_delegation *delegation)
+static void
+nfs_mark_return_if_closed_delegation(struct nfs_server *server,
+ struct nfs_delegation *delegation)
{
- set_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags);
- set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state);
+ struct inode *inode;
+
+ if (test_bit(NFS_DELEGATION_RETURN, &delegation->flags) ||
+ test_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags))
+ return;
+ spin_lock(&delegation->lock);
+ inode = delegation->inode;
+ if (!inode)
+ goto out;
+ if (list_empty(&NFS_I(inode)->open_files))
+ nfs_mark_return_delegation(server, delegation);
+ else
+ set_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags);
+out:
+ spin_unlock(&delegation->lock);
}
static bool nfs_server_mark_return_all_delegations(struct nfs_server *server)
@@ -944,7 +1031,7 @@ static void nfs_revoke_delegation(struct inode *inode,
}
spin_unlock(&delegation->lock);
}
- nfs_mark_delegation_revoked(delegation);
+ nfs_mark_delegation_revoked(NFS_SERVER(inode), delegation);
ret = true;
out:
rcu_read_unlock();
@@ -952,13 +1039,6 @@ out:
nfs_inode_find_state_and_recover(inode, stateid);
}
-void nfs_remove_bad_delegation(struct inode *inode,
- const nfs4_stateid *stateid)
-{
- nfs_revoke_delegation(inode, stateid);
-}
-EXPORT_SYMBOL_GPL(nfs_remove_bad_delegation);
-
void nfs_delegation_mark_returned(struct inode *inode,
const nfs4_stateid *stateid)
{
@@ -983,7 +1063,12 @@ void nfs_delegation_mark_returned(struct inode *inode,
delegation->stateid.seqid = stateid->seqid;
}
- nfs_mark_delegation_revoked(delegation);
+ nfs_mark_delegation_revoked(NFS_SERVER(inode), delegation);
+ clear_bit(NFS_DELEGATION_RETURNING, &delegation->flags);
+ spin_unlock(&delegation->lock);
+ if (nfs_detach_delegation(NFS_I(inode), delegation, NFS_SERVER(inode)))
+ nfs_put_delegation(delegation);
+ goto out_rcu_unlock;
out_clear_returning:
clear_bit(NFS_DELEGATION_RETURNING, &delegation->flags);
@@ -996,6 +1081,24 @@ out_rcu_unlock:
}
/**
+ * nfs_remove_bad_delegation - handle delegations that are unusable
+ * @inode: inode to process
+ * @stateid: the delegation's stateid
+ *
+ * If the server ACK-ed our FREE_STATEID then clean
+ * up the delegation, else mark and keep the revoked state.
+ */
+void nfs_remove_bad_delegation(struct inode *inode,
+ const nfs4_stateid *stateid)
+{
+ if (stateid && stateid->type == NFS4_FREED_STATEID_TYPE)
+ nfs_delegation_mark_returned(inode, stateid);
+ else
+ nfs_revoke_delegation(inode, stateid);
+}
+EXPORT_SYMBOL_GPL(nfs_remove_bad_delegation);
+
+/**
* nfs_expire_unused_delegation_types
* @clp: client to process
* @flags: delegation types to expire
@@ -1073,11 +1176,12 @@ static struct inode *
nfs_delegation_find_inode_server(struct nfs_server *server,
const struct nfs_fh *fhandle)
{
+ struct hlist_head *head = nfs_delegation_hash(server, fhandle);
struct nfs_delegation *delegation;
struct super_block *freeme = NULL;
struct inode *res = NULL;
- list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
+ hlist_for_each_entry_rcu(delegation, head, hash) {
spin_lock(&delegation->lock);
if (delegation->inode != NULL &&
!test_bit(NFS_DELEGATION_REVOKED, &delegation->flags) &&
@@ -1164,7 +1268,6 @@ static int nfs_server_reap_unclaimed_delegations(struct nfs_server *server,
struct inode *inode;
restart:
rcu_read_lock();
-restart_locked:
list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
if (test_bit(NFS_DELEGATION_INODE_FREEING,
&delegation->flags) ||
@@ -1175,13 +1278,13 @@ restart_locked:
continue;
inode = nfs_delegation_grab_inode(delegation);
if (inode == NULL)
- goto restart_locked;
+ continue;
delegation = nfs_start_delegation_return_locked(NFS_I(inode));
rcu_read_unlock();
if (delegation != NULL) {
if (nfs_detach_delegation(NFS_I(inode), delegation,
server) != NULL)
- nfs_free_delegation(delegation);
+ nfs_free_delegation(server, delegation);
/* Match nfs_start_delegation_return_locked */
nfs_put_delegation(delegation);
}
@@ -1218,6 +1321,7 @@ static void nfs_mark_test_expired_delegation(struct nfs_server *server,
return;
clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags);
set_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags);
+ set_bit(NFS4SERV_DELEGATION_EXPIRED, &server->delegation_flags);
set_bit(NFS4CLNT_DELEGATION_EXPIRED, &server->nfs_client->cl_state);
}
@@ -1294,24 +1398,30 @@ static int nfs_server_reap_expired_delegations(struct nfs_server *server,
struct inode *inode;
const struct cred *cred;
nfs4_stateid stateid;
+ unsigned long gen = ++server->delegation_gen;
+
+ if (!test_and_clear_bit(NFS4SERV_DELEGATION_EXPIRED,
+ &server->delegation_flags))
+ return 0;
restart:
rcu_read_lock();
-restart_locked:
list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
if (test_bit(NFS_DELEGATION_INODE_FREEING,
&delegation->flags) ||
test_bit(NFS_DELEGATION_RETURNING,
&delegation->flags) ||
test_bit(NFS_DELEGATION_TEST_EXPIRED,
- &delegation->flags) == 0)
+ &delegation->flags) == 0 ||
+ delegation->test_gen == gen)
continue;
inode = nfs_delegation_grab_inode(delegation);
if (inode == NULL)
- goto restart_locked;
+ continue;
spin_lock(&delegation->lock);
cred = get_cred_rcu(delegation->cred);
nfs4_stateid_copy(&stateid, &delegation->stateid);
spin_unlock(&delegation->lock);
+ delegation->test_gen = gen;
clear_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags);
rcu_read_unlock();
nfs_delegation_test_free_expired(inode, &stateid, cred);
@@ -1322,6 +1432,9 @@ restart_locked:
goto restart;
}
nfs_inode_mark_test_expired_delegation(server,inode);
+ set_bit(NFS4SERV_DELEGATION_EXPIRED, &server->delegation_flags);
+ set_bit(NFS4CLNT_DELEGATION_EXPIRED,
+ &server->nfs_client->cl_state);
iput(inode);
return -EAGAIN;
}
@@ -1476,4 +1589,17 @@ out:
return ret;
}
-module_param_named(delegation_watermark, nfs_delegation_watermark, uint, 0644);
+int nfs4_delegation_hash_alloc(struct nfs_server *server)
+{
+ int delegation_buckets, i;
+
+ delegation_buckets = roundup_pow_of_two(nfs_delegation_watermark / 16);
+ server->delegation_hash_mask = delegation_buckets - 1;
+ server->delegation_hash_table = kmalloc_array(delegation_buckets,
+ sizeof(*server->delegation_hash_table), GFP_KERNEL);
+ if (!server->delegation_hash_table)
+ return -ENOMEM;
+ for (i = 0; i < delegation_buckets; i++)
+ INIT_HLIST_HEAD(&server->delegation_hash_table[i]);
+ return 0;
+}
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 1c378992b7c0..08ec2e9c68a4 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -14,6 +14,7 @@
* NFSv4 delegation
*/
struct nfs_delegation {
+ struct hlist_node hash;
struct list_head super_list;
const struct cred *cred;
struct inode *inode;
@@ -21,6 +22,7 @@ struct nfs_delegation {
fmode_t type;
unsigned long pagemod_limit;
__u64 change_attr;
+ unsigned long test_gen;
unsigned long flags;
refcount_t refcount;
spinlock_t lock;
@@ -37,14 +39,18 @@ enum {
NFS_DELEGATION_TEST_EXPIRED,
NFS_DELEGATION_INODE_FREEING,
NFS_DELEGATION_RETURN_DELAYED,
+ NFS_DELEGATION_DELEGTIME,
};
int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred,
- fmode_t type, const nfs4_stateid *stateid, unsigned long pagemod_limit);
+ fmode_t type, const nfs4_stateid *stateid,
+ unsigned long pagemod_limit, u32 deleg_type);
void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred,
- fmode_t type, const nfs4_stateid *stateid, unsigned long pagemod_limit);
+ fmode_t type, const nfs4_stateid *stateid,
+ unsigned long pagemod_limit, u32 deleg_type);
int nfs4_inode_return_delegation(struct inode *inode);
void nfs4_inode_return_delegation_on_close(struct inode *inode);
+void nfs4_inode_set_return_delegation_on_close(struct inode *inode);
int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
void nfs_inode_evict_delegation(struct inode *inode);
@@ -66,7 +72,9 @@ void nfs_test_expired_all_delegations(struct nfs_client *clp);
void nfs_reap_expired_delegations(struct nfs_client *clp);
/* NFSv4 delegation-related procedures */
-int nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, const nfs4_stateid *stateid, int issync);
+int nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred,
+ const nfs4_stateid *stateid,
+ struct nfs_delegation *delegation, int issync);
int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid);
int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid);
bool nfs4_copy_delegation_stateid(struct inode *inode, fmode_t flags, nfs4_stateid *dst, const struct cred **cred);
@@ -74,8 +82,8 @@ bool nfs4_refresh_delegation_stateid(nfs4_stateid *dst, struct inode *inode);
struct nfs_delegation *nfs4_get_valid_delegation(const struct inode *inode);
void nfs_mark_delegation_referenced(struct nfs_delegation *delegation);
-int nfs4_have_delegation(struct inode *inode, fmode_t flags);
-int nfs4_check_delegation(struct inode *inode, fmode_t flags);
+int nfs4_have_delegation(struct inode *inode, fmode_t type, int flags);
+int nfs4_check_delegation(struct inode *inode, fmode_t type);
bool nfs4_delegation_flush_on_close(const struct inode *inode);
void nfs_inode_find_delegation_state_and_recover(struct inode *inode,
const nfs4_stateid *stateid);
@@ -83,9 +91,39 @@ int nfs4_inode_make_writeable(struct inode *inode);
#endif
+#define NFS_DELEGATION_FLAG_TIME BIT(1)
+
+void nfs_update_delegated_atime(struct inode *inode);
+void nfs_update_delegated_mtime(struct inode *inode);
+void nfs_update_delegated_mtime_locked(struct inode *inode);
+
+static inline int nfs_have_read_or_write_delegation(struct inode *inode)
+{
+ return NFS_PROTO(inode)->have_delegation(inode, FMODE_READ, 0);
+}
+
+static inline int nfs_have_write_delegation(struct inode *inode)
+{
+ return NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE, 0);
+}
+
static inline int nfs_have_delegated_attributes(struct inode *inode)
{
- return NFS_PROTO(inode)->have_delegation(inode, FMODE_READ);
+ return NFS_PROTO(inode)->have_delegation(inode, FMODE_READ, 0);
}
+static inline int nfs_have_delegated_atime(struct inode *inode)
+{
+ return NFS_PROTO(inode)->have_delegation(inode, FMODE_READ,
+ NFS_DELEGATION_FLAG_TIME);
+}
+
+static inline int nfs_have_delegated_mtime(struct inode *inode)
+{
+ return NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE,
+ NFS_DELEGATION_FLAG_TIME);
+}
+
+int nfs4_delegation_hash_alloc(struct nfs_server *server);
+
#endif
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index f7e4a88d5d92..ea9f6ca8f30f 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -55,7 +55,9 @@ static int nfs_closedir(struct inode *, struct file *);
static int nfs_readdir(struct file *, struct dir_context *);
static int nfs_fsync_dir(struct file *, loff_t, loff_t, int);
static loff_t nfs_llseek_dir(struct file *, loff_t, int);
-static void nfs_readdir_free_folio(struct folio *);
+static void nfs_readdir_clear_array(struct folio *);
+static int nfs_do_create(struct inode *dir, struct dentry *dentry,
+ umode_t mode, int open_flags);
const struct file_operations nfs_dir_operations = {
.llseek = nfs_llseek_dir,
@@ -67,7 +69,7 @@ const struct file_operations nfs_dir_operations = {
};
const struct address_space_operations nfs_dir_aops = {
- .free_folio = nfs_readdir_free_folio,
+ .free_folio = nfs_readdir_clear_array,
};
#define NFS_INIT_DTSIZE PAGE_SIZE
@@ -146,18 +148,18 @@ struct nfs_cache_array {
u64 change_attr;
u64 last_cookie;
unsigned int size;
- unsigned char page_full : 1,
- page_is_eof : 1,
+ unsigned char folio_full : 1,
+ folio_is_eof : 1,
cookies_are_ordered : 1;
- struct nfs_cache_array_entry array[];
+ struct nfs_cache_array_entry array[] __counted_by(size);
};
struct nfs_readdir_descriptor {
struct file *file;
- struct page *page;
+ struct folio *folio;
struct dir_context *ctx;
- pgoff_t page_index;
- pgoff_t page_index_max;
+ pgoff_t folio_index;
+ pgoff_t folio_index_max;
u64 dir_cookie;
u64 last_cookie;
loff_t current_index;
@@ -198,62 +200,57 @@ static void nfs_grow_dtsize(struct nfs_readdir_descriptor *desc)
nfs_set_dtsize(desc, desc->dtsize << 1);
}
-static void nfs_readdir_page_init_array(struct page *page, u64 last_cookie,
- u64 change_attr)
+static void nfs_readdir_folio_init_array(struct folio *folio, u64 last_cookie,
+ u64 change_attr)
{
struct nfs_cache_array *array;
- array = kmap_atomic(page);
+ array = kmap_local_folio(folio, 0);
array->change_attr = change_attr;
array->last_cookie = last_cookie;
array->size = 0;
- array->page_full = 0;
- array->page_is_eof = 0;
+ array->folio_full = 0;
+ array->folio_is_eof = 0;
array->cookies_are_ordered = 1;
- kunmap_atomic(array);
+ kunmap_local(array);
}
/*
* we are freeing strings created by nfs_add_to_readdir_array()
*/
-static void nfs_readdir_clear_array(struct page *page)
+static void nfs_readdir_clear_array(struct folio *folio)
{
struct nfs_cache_array *array;
unsigned int i;
- array = kmap_atomic(page);
+ array = kmap_local_folio(folio, 0);
for (i = 0; i < array->size; i++)
kfree(array->array[i].name);
array->size = 0;
- kunmap_atomic(array);
-}
-
-static void nfs_readdir_free_folio(struct folio *folio)
-{
- nfs_readdir_clear_array(&folio->page);
+ kunmap_local(array);
}
-static void nfs_readdir_page_reinit_array(struct page *page, u64 last_cookie,
- u64 change_attr)
+static void nfs_readdir_folio_reinit_array(struct folio *folio, u64 last_cookie,
+ u64 change_attr)
{
- nfs_readdir_clear_array(page);
- nfs_readdir_page_init_array(page, last_cookie, change_attr);
+ nfs_readdir_clear_array(folio);
+ nfs_readdir_folio_init_array(folio, last_cookie, change_attr);
}
-static struct page *
-nfs_readdir_page_array_alloc(u64 last_cookie, gfp_t gfp_flags)
+static struct folio *
+nfs_readdir_folio_array_alloc(u64 last_cookie, gfp_t gfp_flags)
{
- struct page *page = alloc_page(gfp_flags);
- if (page)
- nfs_readdir_page_init_array(page, last_cookie, 0);
- return page;
+ struct folio *folio = folio_alloc(gfp_flags, 0);
+ if (folio)
+ nfs_readdir_folio_init_array(folio, last_cookie, 0);
+ return folio;
}
-static void nfs_readdir_page_array_free(struct page *page)
+static void nfs_readdir_folio_array_free(struct folio *folio)
{
- if (page) {
- nfs_readdir_clear_array(page);
- put_page(page);
+ if (folio) {
+ nfs_readdir_clear_array(folio);
+ folio_put(folio);
}
}
@@ -264,13 +261,13 @@ static u64 nfs_readdir_array_index_cookie(struct nfs_cache_array *array)
static void nfs_readdir_array_set_eof(struct nfs_cache_array *array)
{
- array->page_is_eof = 1;
- array->page_full = 1;
+ array->folio_is_eof = 1;
+ array->folio_full = 1;
}
static bool nfs_readdir_array_is_full(struct nfs_cache_array *array)
{
- return array->page_full;
+ return array->folio_full;
}
/*
@@ -302,18 +299,18 @@ static size_t nfs_readdir_array_maxentries(void)
*/
static int nfs_readdir_array_can_expand(struct nfs_cache_array *array)
{
- if (array->page_full)
+ if (array->folio_full)
return -ENOSPC;
if (array->size == nfs_readdir_array_maxentries()) {
- array->page_full = 1;
+ array->folio_full = 1;
return -ENOSPC;
}
return 0;
}
-static int nfs_readdir_page_array_append(struct page *page,
- const struct nfs_entry *entry,
- u64 *cookie)
+static int nfs_readdir_folio_array_append(struct folio *folio,
+ const struct nfs_entry *entry,
+ u64 *cookie)
{
struct nfs_cache_array *array;
struct nfs_cache_array_entry *cache_entry;
@@ -322,7 +319,7 @@ static int nfs_readdir_page_array_append(struct page *page,
name = nfs_readdir_copy_name(entry->name, entry->len);
- array = kmap_atomic(page);
+ array = kmap_local_folio(folio, 0);
if (!name)
goto out;
ret = nfs_readdir_array_can_expand(array);
@@ -331,7 +328,8 @@ static int nfs_readdir_page_array_append(struct page *page,
goto out;
}
- cache_entry = &array->array[array->size];
+ array->size++;
+ cache_entry = &array->array[array->size - 1];
cache_entry->cookie = array->last_cookie;
cache_entry->ino = entry->ino;
cache_entry->d_type = entry->d_type;
@@ -340,12 +338,11 @@ static int nfs_readdir_page_array_append(struct page *page,
array->last_cookie = entry->cookie;
if (array->last_cookie <= cache_entry->cookie)
array->cookies_are_ordered = 0;
- array->size++;
if (entry->eof != 0)
nfs_readdir_array_set_eof(array);
out:
*cookie = array->last_cookie;
- kunmap_atomic(array);
+ kunmap_local(array);
return ret;
}
@@ -361,102 +358,104 @@ out:
* 127 readdir entries for a typical 64-bit system, that works out to a
* cache of ~ 33 million entries per directory.
*/
-static pgoff_t nfs_readdir_page_cookie_hash(u64 cookie)
+static pgoff_t nfs_readdir_folio_cookie_hash(u64 cookie)
{
if (cookie == 0)
return 0;
return hash_64(cookie, 18);
}
-static bool nfs_readdir_page_validate(struct page *page, u64 last_cookie,
- u64 change_attr)
+static bool nfs_readdir_folio_validate(struct folio *folio, u64 last_cookie,
+ u64 change_attr)
{
- struct nfs_cache_array *array = kmap_atomic(page);
+ struct nfs_cache_array *array = kmap_local_folio(folio, 0);
int ret = true;
if (array->change_attr != change_attr)
ret = false;
if (nfs_readdir_array_index_cookie(array) != last_cookie)
ret = false;
- kunmap_atomic(array);
+ kunmap_local(array);
return ret;
}
-static void nfs_readdir_page_unlock_and_put(struct page *page)
+static void nfs_readdir_folio_unlock_and_put(struct folio *folio)
{
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
}
-static void nfs_readdir_page_init_and_validate(struct page *page, u64 cookie,
- u64 change_attr)
+static void nfs_readdir_folio_init_and_validate(struct folio *folio, u64 cookie,
+ u64 change_attr)
{
- if (PageUptodate(page)) {
- if (nfs_readdir_page_validate(page, cookie, change_attr))
+ if (folio_test_uptodate(folio)) {
+ if (nfs_readdir_folio_validate(folio, cookie, change_attr))
return;
- nfs_readdir_clear_array(page);
+ nfs_readdir_clear_array(folio);
}
- nfs_readdir_page_init_array(page, cookie, change_attr);
- SetPageUptodate(page);
+ nfs_readdir_folio_init_array(folio, cookie, change_attr);
+ folio_mark_uptodate(folio);
}
-static struct page *nfs_readdir_page_get_locked(struct address_space *mapping,
- u64 cookie, u64 change_attr)
+static struct folio *nfs_readdir_folio_get_locked(struct address_space *mapping,
+ u64 cookie, u64 change_attr)
{
- pgoff_t index = nfs_readdir_page_cookie_hash(cookie);
- struct page *page;
+ pgoff_t index = nfs_readdir_folio_cookie_hash(cookie);
+ struct folio *folio;
- page = grab_cache_page(mapping, index);
- if (!page)
+ folio = filemap_grab_folio(mapping, index);
+ if (IS_ERR(folio))
return NULL;
- nfs_readdir_page_init_and_validate(page, cookie, change_attr);
- return page;
+ nfs_readdir_folio_init_and_validate(folio, cookie, change_attr);
+ return folio;
}
-static u64 nfs_readdir_page_last_cookie(struct page *page)
+static u64 nfs_readdir_folio_last_cookie(struct folio *folio)
{
struct nfs_cache_array *array;
u64 ret;
- array = kmap_atomic(page);
+ array = kmap_local_folio(folio, 0);
ret = array->last_cookie;
- kunmap_atomic(array);
+ kunmap_local(array);
return ret;
}
-static bool nfs_readdir_page_needs_filling(struct page *page)
+static bool nfs_readdir_folio_needs_filling(struct folio *folio)
{
struct nfs_cache_array *array;
bool ret;
- array = kmap_atomic(page);
+ array = kmap_local_folio(folio, 0);
ret = !nfs_readdir_array_is_full(array);
- kunmap_atomic(array);
+ kunmap_local(array);
return ret;
}
-static void nfs_readdir_page_set_eof(struct page *page)
+static void nfs_readdir_folio_set_eof(struct folio *folio)
{
struct nfs_cache_array *array;
- array = kmap_atomic(page);
+ array = kmap_local_folio(folio, 0);
nfs_readdir_array_set_eof(array);
- kunmap_atomic(array);
+ kunmap_local(array);
}
-static struct page *nfs_readdir_page_get_next(struct address_space *mapping,
- u64 cookie, u64 change_attr)
+static struct folio *nfs_readdir_folio_get_next(struct address_space *mapping,
+ u64 cookie, u64 change_attr)
{
- pgoff_t index = nfs_readdir_page_cookie_hash(cookie);
- struct page *page;
+ pgoff_t index = nfs_readdir_folio_cookie_hash(cookie);
+ struct folio *folio;
- page = grab_cache_page_nowait(mapping, index);
- if (!page)
+ folio = __filemap_get_folio(mapping, index,
+ FGP_LOCK|FGP_CREAT|FGP_NOFS|FGP_NOWAIT,
+ mapping_gfp_mask(mapping));
+ if (IS_ERR(folio))
return NULL;
- nfs_readdir_page_init_and_validate(page, cookie, change_attr);
- if (nfs_readdir_page_last_cookie(page) != cookie)
- nfs_readdir_page_reinit_array(page, cookie, change_attr);
- return page;
+ nfs_readdir_folio_init_and_validate(folio, cookie, change_attr);
+ if (nfs_readdir_folio_last_cookie(folio) != cookie)
+ nfs_readdir_folio_reinit_array(folio, cookie, change_attr);
+ return folio;
}
static inline
@@ -481,11 +480,11 @@ bool nfs_readdir_use_cookie(const struct file *filp)
static void nfs_readdir_seek_next_array(struct nfs_cache_array *array,
struct nfs_readdir_descriptor *desc)
{
- if (array->page_full) {
+ if (array->folio_full) {
desc->last_cookie = array->last_cookie;
desc->current_index += array->size;
desc->cache_entry_index = 0;
- desc->page_index++;
+ desc->folio_index++;
} else
desc->last_cookie = nfs_readdir_array_index_cookie(array);
}
@@ -494,7 +493,7 @@ static void nfs_readdir_rewind_search(struct nfs_readdir_descriptor *desc)
{
desc->current_index = 0;
desc->last_cookie = 0;
- desc->page_index = 0;
+ desc->folio_index = 0;
}
static int nfs_readdir_search_for_pos(struct nfs_cache_array *array,
@@ -506,7 +505,7 @@ static int nfs_readdir_search_for_pos(struct nfs_cache_array *array,
if (diff < 0)
goto out_eof;
if (diff >= array->size) {
- if (array->page_is_eof)
+ if (array->folio_is_eof)
goto out_eof;
nfs_readdir_seek_next_array(array, desc);
return -EAGAIN;
@@ -554,7 +553,7 @@ static int nfs_readdir_search_for_cookie(struct nfs_cache_array *array,
}
}
check_eof:
- if (array->page_is_eof) {
+ if (array->folio_is_eof) {
status = -EBADCOOKIE;
if (desc->dir_cookie == array->last_cookie)
desc->eof = true;
@@ -568,14 +567,14 @@ static int nfs_readdir_search_array(struct nfs_readdir_descriptor *desc)
struct nfs_cache_array *array;
int status;
- array = kmap_atomic(desc->page);
+ array = kmap_local_folio(desc->folio, 0);
if (desc->dir_cookie == 0)
status = nfs_readdir_search_for_pos(array, desc);
else
status = nfs_readdir_search_for_cookie(array, desc);
- kunmap_atomic(array);
+ kunmap_local(array);
return status;
}
@@ -667,6 +666,8 @@ static bool nfs_use_readdirplus(struct inode *dir, struct dir_context *ctx,
{
if (!nfs_server_capable(dir, NFS_CAP_READDIRPLUS))
return false;
+ if (NFS_SERVER(dir)->flags & NFS_MOUNT_FORCE_RDIRPLUS)
+ return true;
if (ctx->pos == 0 ||
cache_hits + cache_misses > NFS_READDIR_CACHE_USAGE_THRESHOLD)
return true;
@@ -819,61 +820,62 @@ static int nfs_readdir_entry_decode(struct nfs_readdir_descriptor *desc,
}
/* Perform conversion from xdr to cache array */
-static int nfs_readdir_page_filler(struct nfs_readdir_descriptor *desc,
- struct nfs_entry *entry,
- struct page **xdr_pages, unsigned int buflen,
- struct page **arrays, size_t narrays,
- u64 change_attr)
+static int nfs_readdir_folio_filler(struct nfs_readdir_descriptor *desc,
+ struct nfs_entry *entry,
+ struct page **xdr_pages, unsigned int buflen,
+ struct folio **arrays, size_t narrays,
+ u64 change_attr)
{
struct address_space *mapping = desc->file->f_mapping;
+ struct folio *new, *folio = *arrays;
struct xdr_stream stream;
+ struct folio *scratch;
struct xdr_buf buf;
- struct page *scratch, *new, *page = *arrays;
u64 cookie;
int status;
- scratch = alloc_page(GFP_KERNEL);
+ scratch = folio_alloc(GFP_KERNEL, 0);
if (scratch == NULL)
return -ENOMEM;
xdr_init_decode_pages(&stream, &buf, xdr_pages, buflen);
- xdr_set_scratch_page(&stream, scratch);
+ xdr_set_scratch_folio(&stream, scratch);
do {
status = nfs_readdir_entry_decode(desc, entry, &stream);
if (status != 0)
break;
- status = nfs_readdir_page_array_append(page, entry, &cookie);
+ status = nfs_readdir_folio_array_append(folio, entry, &cookie);
if (status != -ENOSPC)
continue;
- if (page->mapping != mapping) {
+ if (folio->mapping != mapping) {
if (!--narrays)
break;
- new = nfs_readdir_page_array_alloc(cookie, GFP_KERNEL);
+ new = nfs_readdir_folio_array_alloc(cookie, GFP_KERNEL);
if (!new)
break;
arrays++;
- *arrays = page = new;
+ *arrays = folio = new;
} else {
- new = nfs_readdir_page_get_next(mapping, cookie,
- change_attr);
+ new = nfs_readdir_folio_get_next(mapping, cookie,
+ change_attr);
if (!new)
break;
- if (page != *arrays)
- nfs_readdir_page_unlock_and_put(page);
- page = new;
+ if (folio != *arrays)
+ nfs_readdir_folio_unlock_and_put(folio);
+ folio = new;
}
- desc->page_index_max++;
- status = nfs_readdir_page_array_append(page, entry, &cookie);
+ desc->folio_index_max++;
+ status = nfs_readdir_folio_array_append(folio, entry, &cookie);
} while (!status && !entry->eof);
switch (status) {
case -EBADCOOKIE:
if (!entry->eof)
break;
- nfs_readdir_page_set_eof(page);
+ nfs_readdir_folio_set_eof(folio);
fallthrough;
case -EAGAIN:
status = 0;
@@ -886,10 +888,10 @@ static int nfs_readdir_page_filler(struct nfs_readdir_descriptor *desc,
;
}
- if (page != *arrays)
- nfs_readdir_page_unlock_and_put(page);
+ if (folio != *arrays)
+ nfs_readdir_folio_unlock_and_put(folio);
- put_page(scratch);
+ folio_put(scratch);
return status;
}
@@ -927,11 +929,11 @@ out_freepages:
static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
__be32 *verf_arg, __be32 *verf_res,
- struct page **arrays, size_t narrays)
+ struct folio **arrays, size_t narrays)
{
u64 change_attr;
struct page **pages;
- struct page *page = *arrays;
+ struct folio *folio = *arrays;
struct nfs_entry *entry;
size_t array_size;
struct inode *inode = file_inode(desc->file);
@@ -942,7 +944,7 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
entry = kzalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
return -ENOMEM;
- entry->cookie = nfs_readdir_page_last_cookie(page);
+ entry->cookie = nfs_readdir_folio_last_cookie(folio);
entry->fh = nfs_alloc_fhandle();
entry->fattr = nfs_alloc_fattr_with_label(NFS_SERVER(inode));
entry->server = NFS_SERVER(inode);
@@ -962,10 +964,10 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
pglen = status;
if (pglen != 0)
- status = nfs_readdir_page_filler(desc, entry, pages, pglen,
- arrays, narrays, change_attr);
+ status = nfs_readdir_folio_filler(desc, entry, pages, pglen,
+ arrays, narrays, change_attr);
else
- nfs_readdir_page_set_eof(page);
+ nfs_readdir_folio_set_eof(folio);
desc->buffer_fills++;
free_pages:
@@ -977,33 +979,33 @@ out:
return status;
}
-static void nfs_readdir_page_put(struct nfs_readdir_descriptor *desc)
+static void nfs_readdir_folio_put(struct nfs_readdir_descriptor *desc)
{
- put_page(desc->page);
- desc->page = NULL;
+ folio_put(desc->folio);
+ desc->folio = NULL;
}
static void
-nfs_readdir_page_unlock_and_put_cached(struct nfs_readdir_descriptor *desc)
+nfs_readdir_folio_unlock_and_put_cached(struct nfs_readdir_descriptor *desc)
{
- unlock_page(desc->page);
- nfs_readdir_page_put(desc);
+ folio_unlock(desc->folio);
+ nfs_readdir_folio_put(desc);
}
-static struct page *
-nfs_readdir_page_get_cached(struct nfs_readdir_descriptor *desc)
+static struct folio *
+nfs_readdir_folio_get_cached(struct nfs_readdir_descriptor *desc)
{
struct address_space *mapping = desc->file->f_mapping;
u64 change_attr = inode_peek_iversion_raw(mapping->host);
u64 cookie = desc->last_cookie;
- struct page *page;
+ struct folio *folio;
- page = nfs_readdir_page_get_locked(mapping, cookie, change_attr);
- if (!page)
+ folio = nfs_readdir_folio_get_locked(mapping, cookie, change_attr);
+ if (!folio)
return NULL;
- if (desc->clear_cache && !nfs_readdir_page_needs_filling(page))
- nfs_readdir_page_reinit_array(page, cookie, change_attr);
- return page;
+ if (desc->clear_cache && !nfs_readdir_folio_needs_filling(folio))
+ nfs_readdir_folio_reinit_array(folio, cookie, change_attr);
+ return folio;
}
/*
@@ -1017,21 +1019,21 @@ static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc)
__be32 verf[NFS_DIR_VERIFIER_SIZE];
int res;
- desc->page = nfs_readdir_page_get_cached(desc);
- if (!desc->page)
+ desc->folio = nfs_readdir_folio_get_cached(desc);
+ if (!desc->folio)
return -ENOMEM;
- if (nfs_readdir_page_needs_filling(desc->page)) {
+ if (nfs_readdir_folio_needs_filling(desc->folio)) {
/* Grow the dtsize if we had to go back for more pages */
- if (desc->page_index == desc->page_index_max)
+ if (desc->folio_index == desc->folio_index_max)
nfs_grow_dtsize(desc);
- desc->page_index_max = desc->page_index;
+ desc->folio_index_max = desc->folio_index;
trace_nfs_readdir_cache_fill(desc->file, nfsi->cookieverf,
desc->last_cookie,
- desc->page->index, desc->dtsize);
+ desc->folio->index, desc->dtsize);
res = nfs_readdir_xdr_to_array(desc, nfsi->cookieverf, verf,
- &desc->page, 1);
+ &desc->folio, 1);
if (res < 0) {
- nfs_readdir_page_unlock_and_put_cached(desc);
+ nfs_readdir_folio_unlock_and_put_cached(desc);
trace_nfs_readdir_cache_fill_done(inode, res);
if (res == -EBADCOOKIE || res == -ENOTSYNC) {
invalidate_inode_pages2(desc->file->f_mapping);
@@ -1059,7 +1061,7 @@ static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc)
res = nfs_readdir_search_array(desc);
if (res == 0)
return 0;
- nfs_readdir_page_unlock_and_put_cached(desc);
+ nfs_readdir_folio_unlock_and_put_cached(desc);
return res;
}
@@ -1087,10 +1089,21 @@ static void nfs_do_filldir(struct nfs_readdir_descriptor *desc,
unsigned int i;
bool first_emit = !desc->dir_cookie;
- array = kmap_local_page(desc->page);
+ array = kmap_local_folio(desc->folio, 0);
for (i = desc->cache_entry_index; i < array->size; i++) {
struct nfs_cache_array_entry *ent;
+ /*
+ * nfs_readdir_handle_cache_misses return force clear at
+ * (cache_misses > NFS_READDIR_CACHE_MISS_THRESHOLD) for
+ * readdir heuristic, NFS_READDIR_CACHE_MISS_THRESHOLD + 1
+ * entries need be emitted here.
+ */
+ if (first_emit && i > NFS_READDIR_CACHE_MISS_THRESHOLD + 2) {
+ desc->eob = true;
+ break;
+ }
+
ent = &array->array[i];
if (!dir_emit(desc->ctx, ent->name, ent->name_len,
nfs_compat_user_ino64(ent->ino), ent->d_type)) {
@@ -1109,12 +1122,8 @@ static void nfs_do_filldir(struct nfs_readdir_descriptor *desc,
desc->ctx->pos = desc->dir_cookie;
else
desc->ctx->pos++;
- if (first_emit && i > NFS_READDIR_CACHE_MISS_THRESHOLD + 1) {
- desc->eob = true;
- break;
- }
}
- if (array->page_is_eof)
+ if (array->folio_is_eof)
desc->eof = !desc->eob;
kunmap_local(array);
@@ -1136,7 +1145,7 @@ static void nfs_do_filldir(struct nfs_readdir_descriptor *desc,
*/
static int uncached_readdir(struct nfs_readdir_descriptor *desc)
{
- struct page **arrays;
+ struct folio **arrays;
size_t i, sz = 512;
__be32 verf[NFS_DIR_VERIFIER_SIZE];
int status = -ENOMEM;
@@ -1147,14 +1156,14 @@ static int uncached_readdir(struct nfs_readdir_descriptor *desc)
arrays = kcalloc(sz, sizeof(*arrays), GFP_KERNEL);
if (!arrays)
goto out;
- arrays[0] = nfs_readdir_page_array_alloc(desc->dir_cookie, GFP_KERNEL);
+ arrays[0] = nfs_readdir_folio_array_alloc(desc->dir_cookie, GFP_KERNEL);
if (!arrays[0])
goto out;
- desc->page_index = 0;
+ desc->folio_index = 0;
desc->cache_entry_index = 0;
desc->last_cookie = desc->dir_cookie;
- desc->page_index_max = 0;
+ desc->folio_index_max = 0;
trace_nfs_readdir_uncached(desc->file, desc->verf, desc->last_cookie,
-1, desc->dtsize);
@@ -1166,10 +1175,10 @@ static int uncached_readdir(struct nfs_readdir_descriptor *desc)
}
for (i = 0; !desc->eob && i < sz && arrays[i]; i++) {
- desc->page = arrays[i];
+ desc->folio = arrays[i];
nfs_do_filldir(desc, verf);
}
- desc->page = NULL;
+ desc->folio = NULL;
/*
* Grow the dtsize if we have to go back for more pages,
@@ -1179,16 +1188,16 @@ static int uncached_readdir(struct nfs_readdir_descriptor *desc)
if (!desc->eob)
nfs_grow_dtsize(desc);
else if (desc->buffer_fills == 1 &&
- i < (desc->page_index_max >> 1))
+ i < (desc->folio_index_max >> 1))
nfs_shrink_dtsize(desc);
}
out_free:
for (i = 0; i < sz && arrays[i]; i++)
- nfs_readdir_page_array_free(arrays[i]);
+ nfs_readdir_folio_array_free(arrays[i]);
out:
if (!nfs_readdir_use_cookie(desc->file))
nfs_readdir_rewind_search(desc);
- desc->page_index_max = -1;
+ desc->folio_index_max = -1;
kfree(arrays);
dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, status);
return status;
@@ -1240,11 +1249,11 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
goto out;
desc->file = file;
desc->ctx = ctx;
- desc->page_index_max = -1;
+ desc->folio_index_max = -1;
spin_lock(&file->f_lock);
desc->dir_cookie = dir_ctx->dir_cookie;
- desc->page_index = dir_ctx->page_index;
+ desc->folio_index = dir_ctx->page_index;
desc->last_cookie = dir_ctx->last_cookie;
desc->attr_gencount = dir_ctx->attr_gencount;
desc->eof = dir_ctx->eof;
@@ -1291,8 +1300,8 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
break;
nfs_do_filldir(desc, nfsi->cookieverf);
- nfs_readdir_page_unlock_and_put_cached(desc);
- if (desc->page_index == desc->page_index_max)
+ nfs_readdir_folio_unlock_and_put_cached(desc);
+ if (desc->folio_index == desc->folio_index_max)
desc->clear_cache = force_clear;
} while (!desc->eob && !desc->eof);
@@ -1300,7 +1309,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
dir_ctx->dir_cookie = desc->dir_cookie;
dir_ctx->last_cookie = desc->last_cookie;
dir_ctx->attr_gencount = desc->attr_gencount;
- dir_ctx->page_index = desc->page_index;
+ dir_ctx->page_index = desc->folio_index;
dir_ctx->force_clear = force_clear;
dir_ctx->eof = desc->eof;
dir_ctx->dtsize = desc->dtsize;
@@ -1426,11 +1435,11 @@ static bool nfs_verifier_is_delegated(struct dentry *dentry)
static void nfs_set_verifier_locked(struct dentry *dentry, unsigned long verf)
{
struct inode *inode = d_inode(dentry);
- struct inode *dir = d_inode(dentry->d_parent);
+ struct inode *dir = d_inode_rcu(dentry->d_parent);
- if (!nfs_verify_change_attribute(dir, verf))
+ if (!dir || !nfs_verify_change_attribute(dir, verf))
return;
- if (inode && NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
+ if (inode && NFS_PROTO(inode)->have_delegation(inode, FMODE_READ, 0))
nfs_set_verifier_delegated(&verf);
dentry->d_time = verf;
}
@@ -1525,7 +1534,8 @@ static int nfs_is_exclusive_create(struct inode *dir, unsigned int flags)
{
if (NFS_PROTO(dir)->version == 2)
return 0;
- return flags & LOOKUP_EXCL;
+ return (flags & (LOOKUP_CREATE | LOOKUP_EXCL)) ==
+ (LOOKUP_CREATE | LOOKUP_EXCL);
}
/*
@@ -1620,7 +1630,16 @@ nfs_lookup_revalidate_done(struct inode *dir, struct dentry *dentry,
switch (error) {
case 1:
break;
- case 0:
+ case -ETIMEDOUT:
+ if (inode && (IS_ROOT(dentry) ||
+ NFS_SERVER(inode)->flags & NFS_MOUNT_SOFTREVAL))
+ error = 1;
+ break;
+ case -ESTALE:
+ case -ENOENT:
+ error = 0;
+ fallthrough;
+ default:
/*
* We can't d_drop the root of a disconnected tree:
* its d_hash is on the s_anon list and d_drop() would hide
@@ -1656,7 +1675,7 @@ nfs_lookup_revalidate_delegated(struct inode *dir, struct dentry *dentry,
return nfs_lookup_revalidate_done(dir, dentry, inode, 1);
}
-static int nfs_lookup_revalidate_dentry(struct inode *dir,
+static int nfs_lookup_revalidate_dentry(struct inode *dir, const struct qstr *name,
struct dentry *dentry,
struct inode *inode, unsigned int flags)
{
@@ -1674,19 +1693,9 @@ static int nfs_lookup_revalidate_dentry(struct inode *dir,
goto out;
dir_verifier = nfs_save_change_attribute(dir);
- ret = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr);
- if (ret < 0) {
- switch (ret) {
- case -ESTALE:
- case -ENOENT:
- ret = 0;
- break;
- case -ETIMEDOUT:
- if (NFS_SERVER(inode)->flags & NFS_MOUNT_SOFTREVAL)
- ret = 1;
- }
+ ret = NFS_PROTO(dir)->lookup(dir, dentry, name, fhandle, fattr);
+ if (ret < 0)
goto out;
- }
/* Request help from readdirplus */
nfs_lookup_advise_force_readdirplus(dir, flags);
@@ -1726,11 +1735,11 @@ out:
* cached dentry and do a new lookup.
*/
static int
-nfs_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
- unsigned int flags)
+nfs_do_lookup_revalidate(struct inode *dir, const struct qstr *name,
+ struct dentry *dentry, unsigned int flags)
{
struct inode *inode;
- int error;
+ int error = 0;
nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
inode = d_inode(dentry);
@@ -1769,47 +1778,57 @@ nfs_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
if (NFS_STALE(inode))
goto out_bad;
- return nfs_lookup_revalidate_dentry(dir, dentry, inode, flags);
+ return nfs_lookup_revalidate_dentry(dir, name, dentry, inode, flags);
out_valid:
return nfs_lookup_revalidate_done(dir, dentry, inode, 1);
out_bad:
if (flags & LOOKUP_RCU)
return -ECHILD;
- return nfs_lookup_revalidate_done(dir, dentry, inode, 0);
+ return nfs_lookup_revalidate_done(dir, dentry, inode, error);
}
static int
-__nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags,
- int (*reval)(struct inode *, struct dentry *, unsigned int))
+__nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
{
- struct dentry *parent;
- struct inode *dir;
- int ret;
-
if (flags & LOOKUP_RCU) {
if (dentry->d_fsdata == NFS_FSDATA_BLOCKED)
return -ECHILD;
- parent = READ_ONCE(dentry->d_parent);
- dir = d_inode_rcu(parent);
- if (!dir)
- return -ECHILD;
- ret = reval(dir, dentry, flags);
- if (parent != READ_ONCE(dentry->d_parent))
- return -ECHILD;
} else {
- /* Wait for unlink to complete */
+ /* Wait for unlink to complete - see unblock_revalidate() */
wait_var_event(&dentry->d_fsdata,
- dentry->d_fsdata != NFS_FSDATA_BLOCKED);
- parent = dget_parent(dentry);
- ret = reval(d_inode(parent), dentry, flags);
- dput(parent);
+ smp_load_acquire(&dentry->d_fsdata)
+ != NFS_FSDATA_BLOCKED);
}
- return ret;
+ return 0;
}
-static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
+static int nfs_lookup_revalidate(struct inode *dir, const struct qstr *name,
+ struct dentry *dentry, unsigned int flags)
{
- return __nfs_lookup_revalidate(dentry, flags, nfs_do_lookup_revalidate);
+ if (__nfs_lookup_revalidate(dentry, flags))
+ return -ECHILD;
+ return nfs_do_lookup_revalidate(dir, name, dentry, flags);
+}
+
+static void block_revalidate(struct dentry *dentry)
+{
+ /* old devname - just in case */
+ kfree(dentry->d_fsdata);
+
+ /* Any new reference that could lead to an open
+ * will take ->d_lock in lookup_open() -> d_lookup().
+ * Holding this lock ensures we cannot race with
+ * __nfs_lookup_revalidate() and removes and need
+ * for further barriers.
+ */
+ lockdep_assert_held(&dentry->d_lock);
+
+ dentry->d_fsdata = NFS_FSDATA_BLOCKED;
+}
+
+static void unblock_revalidate(struct dentry *dentry)
+{
+ store_release_wake_up(&dentry->d_fsdata, NULL);
}
/*
@@ -1952,7 +1971,8 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in
dir_verifier = nfs_save_change_attribute(dir);
trace_nfs_lookup_enter(dir, dentry, flags);
- error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr);
+ error = NFS_PROTO(dir)->lookup(dir, dentry, &dentry->d_name,
+ fhandle, fattr);
if (error == -ENOENT) {
if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE))
dir_verifier = inode_peek_iversion_raw(dir);
@@ -1995,7 +2015,8 @@ void nfs_d_prune_case_insensitive_aliases(struct inode *inode)
EXPORT_SYMBOL_GPL(nfs_d_prune_case_insensitive_aliases);
#if IS_ENABLED(CONFIG_NFS_V4)
-static int nfs4_lookup_revalidate(struct dentry *, unsigned int);
+static int nfs4_lookup_revalidate(struct inode *, const struct qstr *,
+ struct dentry *, unsigned int);
const struct dentry_operations nfs4_dentry_operations = {
.d_revalidate = nfs4_lookup_revalidate,
@@ -2177,18 +2198,21 @@ no_open:
else
dput(dentry);
}
- if (IS_ERR(res))
- return PTR_ERR(res);
return finish_no_open(file, res);
}
EXPORT_SYMBOL_GPL(nfs_atomic_open);
static int
-nfs4_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
- unsigned int flags)
+nfs4_lookup_revalidate(struct inode *dir, const struct qstr *name,
+ struct dentry *dentry, unsigned int flags)
{
struct inode *inode;
+ if (__nfs_lookup_revalidate(dentry, flags))
+ return -ECHILD;
+
+ trace_nfs_lookup_revalidate_enter(dir, dentry, flags);
+
if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY))
goto full_reval;
if (d_mountpoint(dentry))
@@ -2222,19 +2246,46 @@ nfs4_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
reval_dentry:
if (flags & LOOKUP_RCU)
return -ECHILD;
- return nfs_lookup_revalidate_dentry(dir, dentry, inode, flags);
+ return nfs_lookup_revalidate_dentry(dir, name, dentry, inode, flags);
full_reval:
- return nfs_do_lookup_revalidate(dir, dentry, flags);
+ return nfs_do_lookup_revalidate(dir, name, dentry, flags);
}
-static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
+#endif /* CONFIG_NFSV4 */
+
+int nfs_atomic_open_v23(struct inode *dir, struct dentry *dentry,
+ struct file *file, unsigned int open_flags,
+ umode_t mode)
{
- return __nfs_lookup_revalidate(dentry, flags,
- nfs4_do_lookup_revalidate);
-}
+ struct dentry *res = NULL;
+ /* Same as look+open from lookup_open(), but with different O_TRUNC
+ * handling.
+ */
+ int error = 0;
-#endif /* CONFIG_NFSV4 */
+ if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
+ return -ENAMETOOLONG;
+
+ if (open_flags & O_CREAT) {
+ error = nfs_do_create(dir, dentry, mode, open_flags);
+ if (!error) {
+ file->f_mode |= FMODE_CREATED;
+ return finish_open(file, dentry, NULL);
+ } else if (error != -EEXIST || open_flags & O_EXCL)
+ return error;
+ }
+ if (d_in_lookup(dentry)) {
+ /* The only flags nfs_lookup considers are
+ * LOOKUP_EXCL and LOOKUP_RENAME_TARGET, and
+ * we want those to be zero so the lookup isn't skipped.
+ */
+ res = nfs_lookup(dir, dentry, 0);
+ }
+ return finish_no_open(file, res);
+
+}
+EXPORT_SYMBOL_GPL(nfs_atomic_open_v23);
struct dentry *
nfs_add_or_obtain(struct dentry *dentry, struct nfs_fh *fhandle,
@@ -2249,7 +2300,8 @@ nfs_add_or_obtain(struct dentry *dentry, struct nfs_fh *fhandle,
d_drop(dentry);
if (fhandle->size == 0) {
- error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr);
+ error = NFS_PROTO(dir)->lookup(dir, dentry, &dentry->d_name,
+ fhandle, fattr);
if (error)
goto out_error;
}
@@ -2296,18 +2348,23 @@ EXPORT_SYMBOL_GPL(nfs_instantiate);
* that the operation succeeded on the server, but an error in the
* reply path made it appear to have failed.
*/
-int nfs_create(struct user_namespace *mnt_userns, struct inode *dir,
- struct dentry *dentry, umode_t mode, bool excl)
+static int nfs_do_create(struct inode *dir, struct dentry *dentry,
+ umode_t mode, int open_flags)
{
struct iattr attr;
- int open_flags = excl ? O_CREAT | O_EXCL : O_CREAT;
int error;
+ open_flags |= O_CREAT;
+
dfprintk(VFS, "NFS: create(%s/%lu), %pd\n",
dir->i_sb->s_id, dir->i_ino, dentry);
attr.ia_mode = mode;
attr.ia_valid = ATTR_MODE;
+ if (open_flags & O_TRUNC) {
+ attr.ia_size = 0;
+ attr.ia_valid |= ATTR_SIZE;
+ }
trace_nfs_create_enter(dir, dentry, open_flags);
error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags);
@@ -2319,13 +2376,19 @@ out_err:
d_drop(dentry);
return error;
}
+
+int nfs_create(struct mnt_idmap *idmap, struct inode *dir,
+ struct dentry *dentry, umode_t mode, bool excl)
+{
+ return nfs_do_create(dir, dentry, mode, excl ? O_EXCL : 0);
+}
EXPORT_SYMBOL_GPL(nfs_create);
/*
* See comments for nfs_proc_create regarding failed operations.
*/
int
-nfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+nfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, dev_t rdev)
{
struct iattr attr;
@@ -2352,11 +2415,11 @@ EXPORT_SYMBOL_GPL(nfs_mknod);
/*
* See comments for nfs_proc_create regarding failed operations.
*/
-int nfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
- struct dentry *dentry, umode_t mode)
+struct dentry *nfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
struct iattr attr;
- int error;
+ struct dentry *ret;
dfprintk(VFS, "NFS: mkdir(%s/%lu), %pd\n",
dir->i_sb->s_id, dir->i_ino, dentry);
@@ -2365,14 +2428,9 @@ int nfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
attr.ia_mode = mode | S_IFDIR;
trace_nfs_mkdir_enter(dir, dentry);
- error = NFS_PROTO(dir)->mkdir(dir, dentry, &attr);
- trace_nfs_mkdir_exit(dir, dentry, error);
- if (error != 0)
- goto out_err;
- return 0;
-out_err:
- d_drop(dentry);
- return error;
+ ret = NFS_PROTO(dir)->mkdir(dir, dentry, &attr);
+ trace_nfs_mkdir_exit(dir, dentry, PTR_ERR_OR_ZERO(ret));
+ return ret;
}
EXPORT_SYMBOL_GPL(nfs_mkdir);
@@ -2494,15 +2552,12 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry)
spin_unlock(&dentry->d_lock);
goto out;
}
- /* old devname */
- kfree(dentry->d_fsdata);
- dentry->d_fsdata = NFS_FSDATA_BLOCKED;
+ block_revalidate(dentry);
spin_unlock(&dentry->d_lock);
error = nfs_safe_remove(dentry);
nfs_dentry_remove_handle_error(dir, dentry, error);
- dentry->d_fsdata = NULL;
- wake_up_var(&dentry->d_fsdata);
+ unblock_revalidate(dentry);
out:
trace_nfs_unlink_exit(dir, dentry, error);
return error;
@@ -2524,10 +2579,10 @@ EXPORT_SYMBOL_GPL(nfs_unlink);
* now have a new file handle and can instantiate an in-core NFS inode
* and move the raw page into its mapping.
*/
-int nfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+int nfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, const char *symname)
{
- struct page *page;
+ struct folio *folio;
char *kaddr;
struct iattr attr;
unsigned int pathlen = strlen(symname);
@@ -2542,24 +2597,24 @@ int nfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
attr.ia_mode = S_IFLNK | S_IRWXUGO;
attr.ia_valid = ATTR_MODE;
- page = alloc_page(GFP_USER);
- if (!page)
+ folio = folio_alloc(GFP_USER, 0);
+ if (!folio)
return -ENOMEM;
- kaddr = page_address(page);
+ kaddr = folio_address(folio);
memcpy(kaddr, symname, pathlen);
if (pathlen < PAGE_SIZE)
memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen);
trace_nfs_symlink_enter(dir, dentry);
- error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr);
+ error = NFS_PROTO(dir)->symlink(dir, dentry, folio, pathlen, &attr);
trace_nfs_symlink_exit(dir, dentry, error);
if (error != 0) {
dfprintk(VFS, "NFS: symlink(%s/%lu, %pd, %s) error %d\n",
dir->i_sb->s_id, dir->i_ino,
dentry, symname, error);
d_drop(dentry);
- __free_page(page);
+ folio_put(folio);
return error;
}
@@ -2569,18 +2624,13 @@ int nfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
* No big deal if we can't add this page to the page cache here.
* READLINK will get the missing page from the server if needed.
*/
- if (!add_to_page_cache_lru(page, d_inode(dentry)->i_mapping, 0,
- GFP_KERNEL)) {
- SetPageUptodate(page);
- unlock_page(page);
- /*
- * add_to_page_cache_lru() grabs an extra page refcount.
- * Drop it here to avoid leaking this page later.
- */
- put_page(page);
- } else
- __free_page(page);
+ if (filemap_add_folio(d_inode(dentry)->i_mapping, folio, 0,
+ GFP_KERNEL) == 0) {
+ folio_mark_uptodate(folio);
+ folio_unlock(folio);
+ }
+ folio_put(folio);
return 0;
}
EXPORT_SYMBOL_GPL(nfs_symlink);
@@ -2614,8 +2664,19 @@ nfs_unblock_rename(struct rpc_task *task, struct nfs_renamedata *data)
{
struct dentry *new_dentry = data->new_dentry;
- new_dentry->d_fsdata = NULL;
- wake_up_var(&new_dentry->d_fsdata);
+ unblock_revalidate(new_dentry);
+}
+
+static bool nfs_rename_is_unsafe_cross_dir(struct dentry *old_dentry,
+ struct dentry *new_dentry)
+{
+ struct nfs_server *server = NFS_SB(old_dentry->d_sb);
+
+ if (old_dentry->d_parent != new_dentry->d_parent)
+ return false;
+ if (server->fh_expire_type & NFS_FH_RENAME_UNSAFE)
+ return !(server->fh_expire_type & NFS_FH_NOEXPIRE_WITH_OPEN);
+ return true;
}
/*
@@ -2642,7 +2703,7 @@ nfs_unblock_rename(struct rpc_task *task, struct nfs_renamedata *data)
* If these conditions are met, we can drop the dentries before doing
* the rename.
*/
-int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+int nfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
struct dentry *old_dentry, struct inode *new_dir,
struct dentry *new_dentry, unsigned int flags)
{
@@ -2677,11 +2738,6 @@ int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
if (WARN_ON(new_dentry->d_flags & DCACHE_NFSFS_RENAMED) ||
WARN_ON(new_dentry->d_fsdata == NFS_FSDATA_BLOCKED))
goto out;
- if (new_dentry->d_fsdata) {
- /* old devname */
- kfree(new_dentry->d_fsdata);
- new_dentry->d_fsdata = NULL;
- }
spin_lock(&new_dentry->d_lock);
if (d_count(new_dentry) > 2) {
@@ -2703,18 +2759,21 @@ int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
new_dentry = dentry;
new_inode = NULL;
} else {
- new_dentry->d_fsdata = NFS_FSDATA_BLOCKED;
+ block_revalidate(new_dentry);
must_unblock = true;
spin_unlock(&new_dentry->d_lock);
}
}
- if (S_ISREG(old_inode->i_mode))
+ if (S_ISREG(old_inode->i_mode) &&
+ nfs_rename_is_unsafe_cross_dir(old_dentry, new_dentry))
nfs_sync_inode(old_inode);
task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry,
must_unblock ? nfs_unblock_rename : NULL);
if (IS_ERR(task)) {
+ if (must_unblock)
+ unblock_revalidate(new_dentry);
error = PTR_ERR(task);
goto out;
}
@@ -2963,7 +3022,7 @@ static u64 nfs_access_login_time(const struct task_struct *task,
rcu_read_lock();
for (;;) {
parent = rcu_dereference(task->real_parent);
- pcred = rcu_dereference(parent->cred);
+ pcred = __task_cred(parent);
if (parent == task || cred_fscmp(pcred, cred) != 0)
break;
task = parent;
@@ -3089,7 +3148,6 @@ static void nfs_access_add_rbtree(struct inode *inode,
else
goto found;
}
- set->timestamp = ktime_get_ns();
rb_link_node(&set->rb_node, parent, p);
rb_insert_color(&set->rb_node, root_node);
list_add_tail(&set->lru, &nfsi->access_cache_entry_lru);
@@ -3114,6 +3172,7 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set,
cache->fsgid = cred->fsgid;
cache->group_info = get_group_info(cred->group_info);
cache->mask = set->mask;
+ cache->timestamp = ktime_get_ns();
/* The above field assignments must be visible
* before this item appears on the lru. We cannot easily
@@ -3262,7 +3321,7 @@ static int nfs_execute_ok(struct inode *inode, int mask)
return ret;
}
-int nfs_permission(struct user_namespace *mnt_userns,
+int nfs_permission(struct mnt_idmap *idmap,
struct inode *inode,
int mask)
{
@@ -3313,7 +3372,7 @@ out_notsup:
res = nfs_revalidate_inode(inode, NFS_INO_INVALID_MODE |
NFS_INO_INVALID_OTHER);
if (res == 0)
- res = generic_permission(&init_user_ns, inode, mask);
+ res = generic_permission(&nop_mnt_idmap, inode, mask);
goto out;
}
EXPORT_SYMBOL_GPL(nfs_permission);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 1707f46b1335..48d89716193a 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -56,6 +56,7 @@
#include <linux/uaccess.h>
#include <linux/atomic.h>
+#include "delegation.h"
#include "internal.h"
#include "iostat.h"
#include "pnfs.h"
@@ -93,12 +94,10 @@ nfs_direct_handle_truncated(struct nfs_direct_req *dreq,
dreq->max_count = dreq_len;
if (dreq->count > dreq_len)
dreq->count = dreq_len;
-
- if (test_bit(NFS_IOHDR_ERROR, &hdr->flags))
- dreq->error = hdr->error;
- else /* Clear outstanding error if this is EOF */
- dreq->error = 0;
}
+
+ if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && !dreq->error)
+ dreq->error = hdr->error;
}
static void
@@ -120,6 +119,32 @@ nfs_direct_count_bytes(struct nfs_direct_req *dreq,
dreq->count = dreq_len;
}
+static void nfs_direct_truncate_request(struct nfs_direct_req *dreq,
+ struct nfs_page *req)
+{
+ loff_t offs = req_offset(req);
+ size_t req_start = (size_t)(offs - dreq->io_start);
+
+ if (req_start < dreq->max_count)
+ dreq->max_count = req_start;
+ if (req_start < dreq->count)
+ dreq->count = req_start;
+}
+
+static void nfs_direct_file_adjust_size_locked(struct inode *inode,
+ loff_t offset, size_t count)
+{
+ loff_t newsize = offset + (loff_t)count;
+ loff_t oldsize = i_size_read(inode);
+
+ if (newsize > oldsize) {
+ i_size_write(inode, newsize);
+ NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_SIZE;
+ trace_nfs_size_grow(inode, newsize);
+ nfs_inc_stats(inode, NFSIOS_EXTENDWRITE);
+ }
+}
+
/**
* nfs_swap_rw - NFS address space operation for swap I/O
* @iocb: target I/O control block
@@ -131,8 +156,6 @@ int nfs_swap_rw(struct kiocb *iocb, struct iov_iter *iter)
{
ssize_t ret;
- VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE);
-
if (iov_iter_rw(iter) == READ)
ret = nfs_file_direct_read(iocb, iter, true);
else
@@ -195,9 +218,10 @@ static void nfs_direct_req_release(struct nfs_direct_req *dreq)
kref_put(&dreq->kref, nfs_direct_req_free);
}
-ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq)
+ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq, loff_t offset)
{
- return dreq->bytes_left;
+ loff_t start = offset - dreq->io_start;
+ return dreq->max_count - start;
}
EXPORT_SYMBOL_GPL(nfs_dreq_bytes_left);
@@ -263,6 +287,8 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
nfs_direct_count_bytes(dreq, hdr);
spin_unlock(&dreq->lock);
+ nfs_update_delegated_atime(dreq->inode);
+
while (!list_empty(&hdr->pages)) {
struct nfs_page *req = nfs_list_entry(hdr->pages.next);
struct page *page = req->wb_page;
@@ -294,6 +320,7 @@ static void nfs_read_sync_pgio_error(struct list_head *head, int error)
static void nfs_direct_pgio_init(struct nfs_pgio_header *hdr)
{
get_dreq(hdr->dreq);
+ set_bit(NFS_IOHDR_ODIRECT, &hdr->flags);
}
static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = {
@@ -343,14 +370,12 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
struct nfs_page *req;
unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
/* XXX do we need to do the eof zeroing found in async_filler? */
- req = nfs_create_request(dreq->ctx, pagevec[i],
- pgbase, req_len);
+ req = nfs_page_create_from_page(dreq->ctx, pagevec[i],
+ pgbase, pos, req_len);
if (IS_ERR(req)) {
result = PTR_ERR(req);
break;
}
- req->wb_index = pos >> PAGE_SHIFT;
- req->wb_offset = pos & ~PAGE_MASK;
if (!nfs_pageio_add_request(&desc, req)) {
result = desc.pg_error;
nfs_release_request(req);
@@ -360,7 +385,6 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
bytes -= req_len;
requested_bytes += req_len;
pos += req_len;
- dreq->bytes_left -= req_len;
}
nfs_direct_release_pages(pagevec, npages);
kvfree(pagevec);
@@ -432,7 +456,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
goto out;
dreq->inode = inode;
- dreq->bytes_left = dreq->max_count = count;
+ dreq->max_count = count;
dreq->io_start = iocb->ki_pos;
dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
l_ctx = nfs_get_lock_context(dreq->ctx);
@@ -448,8 +472,16 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
if (user_backed_iter(iter))
dreq->flags = NFS_ODIRECT_SHOULD_DIRTY;
- if (!swap)
- nfs_start_io_direct(inode);
+ if (!swap) {
+ result = nfs_start_io_direct(inode);
+ if (result) {
+ /* release the reference that would usually be
+ * consumed by nfs_direct_read_schedule_iovec()
+ */
+ nfs_direct_req_release(dreq);
+ goto out_release;
+ }
+ }
NFS_I(inode)->read_io += count;
requested = nfs_direct_read_schedule_iovec(dreq, iter, iocb->ki_pos);
@@ -474,21 +506,47 @@ out:
return result;
}
-static void
-nfs_direct_join_group(struct list_head *list, struct inode *inode)
+static void nfs_direct_add_page_head(struct list_head *list,
+ struct nfs_page *req)
+{
+ struct nfs_page *head = req->wb_head;
+
+ if (!list_empty(&head->wb_list) || !nfs_lock_request(head))
+ return;
+ if (!list_empty(&head->wb_list)) {
+ nfs_unlock_request(head);
+ return;
+ }
+ list_add(&head->wb_list, list);
+ kref_get(&head->wb_kref);
+ kref_get(&head->wb_kref);
+}
+
+static void nfs_direct_join_group(struct list_head *list,
+ struct nfs_commit_info *cinfo,
+ struct inode *inode)
{
- struct nfs_page *req, *next;
+ struct nfs_page *req, *subreq;
list_for_each_entry(req, list, wb_list) {
- if (req->wb_head != req || req->wb_this_page == req)
+ if (req->wb_head != req) {
+ nfs_direct_add_page_head(&req->wb_list, req);
continue;
- for (next = req->wb_this_page;
- next != req->wb_head;
- next = next->wb_this_page) {
- nfs_list_remove_request(next);
- nfs_release_request(next);
}
- nfs_join_page_group(req, inode);
+ subreq = req->wb_this_page;
+ if (subreq == req)
+ continue;
+ do {
+ /*
+ * Remove subrequests from this list before freeing
+ * them in the call to nfs_join_page_group().
+ */
+ if (!list_empty(&subreq->wb_list)) {
+ nfs_list_remove_request(subreq);
+ nfs_release_request(subreq);
+ }
+ } while ((subreq = subreq->wb_this_page) != req);
+ nfs_join_page_group(req, cinfo, inode);
}
}
@@ -506,20 +564,15 @@ nfs_direct_write_scan_commit_list(struct inode *inode,
static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
{
struct nfs_pageio_descriptor desc;
- struct nfs_page *req, *tmp;
+ struct nfs_page *req;
LIST_HEAD(reqs);
struct nfs_commit_info cinfo;
- LIST_HEAD(failed);
nfs_init_cinfo_from_dreq(&cinfo, dreq);
nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo);
- nfs_direct_join_group(&reqs, dreq->inode);
+ nfs_direct_join_group(&reqs, &cinfo, dreq->inode);
- dreq->count = 0;
- dreq->max_count = 0;
- list_for_each_entry(req, &reqs, wb_list)
- dreq->max_count += req->wb_bytes;
nfs_clear_pnfs_ds_commit_verifiers(&dreq->ds_cinfo);
get_dreq(dreq);
@@ -527,27 +580,40 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
&nfs_direct_write_completion_ops);
desc.pg_dreq = dreq;
- list_for_each_entry_safe(req, tmp, &reqs, wb_list) {
+ while (!list_empty(&reqs)) {
+ req = nfs_list_entry(reqs.next);
/* Bump the transmission count */
req->wb_nio++;
if (!nfs_pageio_add_request(&desc, req)) {
- nfs_list_move_request(req, &failed);
- spin_lock(&cinfo.inode->i_lock);
- dreq->flags = 0;
- if (desc.pg_error < 0)
+ spin_lock(&dreq->lock);
+ if (dreq->error < 0) {
+ desc.pg_error = dreq->error;
+ } else if (desc.pg_error != -EAGAIN) {
+ dreq->flags = 0;
+ if (!desc.pg_error)
+ desc.pg_error = -EIO;
dreq->error = desc.pg_error;
- else
- dreq->error = -EIO;
- spin_unlock(&cinfo.inode->i_lock);
+ } else
+ dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+ spin_unlock(&dreq->lock);
+ break;
}
nfs_release_request(req);
}
nfs_pageio_complete(&desc);
- while (!list_empty(&failed)) {
- req = nfs_list_entry(failed.next);
+ while (!list_empty(&reqs)) {
+ req = nfs_list_entry(reqs.next);
nfs_list_remove_request(req);
nfs_unlock_and_release_request(req);
+ if (desc.pg_error == -EAGAIN) {
+ nfs_mark_request_commit(req, NULL, &cinfo, 0);
+ } else {
+ spin_lock(&dreq->lock);
+ nfs_direct_truncate_request(dreq, req);
+ spin_unlock(&dreq->lock);
+ nfs_release_request(req);
+ }
}
if (put_dreq(dreq))
@@ -564,30 +630,38 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)
trace_nfs_direct_commit_complete(dreq);
+ spin_lock(&dreq->lock);
if (status < 0) {
/* Errors in commit are fatal */
dreq->error = status;
- dreq->max_count = 0;
- dreq->count = 0;
dreq->flags = NFS_ODIRECT_DONE;
} else {
status = dreq->error;
}
+ spin_unlock(&dreq->lock);
nfs_init_cinfo_from_dreq(&cinfo, dreq);
while (!list_empty(&data->pages)) {
req = nfs_list_entry(data->pages.next);
nfs_list_remove_request(req);
- if (status >= 0 && !nfs_write_match_verf(verf, req)) {
- dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+ if (status < 0) {
+ spin_lock(&dreq->lock);
+ nfs_direct_truncate_request(dreq, req);
+ spin_unlock(&dreq->lock);
+ nfs_release_request(req);
+ } else if (!nfs_write_match_verf(verf, req)) {
+ spin_lock(&dreq->lock);
+ if (dreq->flags == 0)
+ dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+ spin_unlock(&dreq->lock);
/*
* Despite the reboot, the write was successful,
* so reset wb_nio.
*/
req->wb_nio = 0;
nfs_mark_request_commit(req, NULL, &cinfo, 0);
- } else /* Error or match */
+ } else
nfs_release_request(req);
nfs_unlock_and_release_request(req);
}
@@ -622,10 +696,17 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
LIST_HEAD(mds_list);
nfs_init_cinfo_from_dreq(&cinfo, dreq);
+ nfs_commit_begin(cinfo.mds);
nfs_scan_commit(dreq->inode, &mds_list, &cinfo);
res = nfs_generic_commit_list(dreq->inode, &mds_list, 0, &cinfo);
- if (res < 0) /* res == -ENOMEM */
- nfs_direct_write_reschedule(dreq);
+ if (res < 0) { /* res == -ENOMEM */
+ spin_lock(&dreq->lock);
+ if (dreq->flags == 0)
+ dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+ spin_unlock(&dreq->lock);
+ }
+ if (nfs_commit_end(cinfo.mds))
+ nfs_direct_write_complete(dreq);
}
static void nfs_direct_write_clear_reqs(struct nfs_direct_req *dreq)
@@ -640,6 +721,7 @@ static void nfs_direct_write_clear_reqs(struct nfs_direct_req *dreq)
while (!list_empty(&reqs)) {
req = nfs_list_entry(reqs.next);
nfs_list_remove_request(req);
+ nfs_direct_truncate_request(dreq, req);
nfs_release_request(req);
nfs_unlock_and_release_request(req);
}
@@ -675,7 +757,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
{
struct nfs_direct_req *dreq = hdr->dreq;
struct nfs_commit_info cinfo;
- struct nfs_page *req = nfs_list_entry(hdr->pages.next);
+ struct inode *inode = dreq->inode;
int flags = NFS_ODIRECT_DONE;
trace_nfs_direct_write_completion(dreq);
@@ -689,14 +771,21 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
}
nfs_direct_count_bytes(dreq, hdr);
- if (test_bit(NFS_IOHDR_UNSTABLE_WRITES, &hdr->flags)) {
+ if (test_bit(NFS_IOHDR_UNSTABLE_WRITES, &hdr->flags) &&
+ !test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
if (!dreq->flags)
dreq->flags = NFS_ODIRECT_DO_COMMIT;
flags = dreq->flags;
}
spin_unlock(&dreq->lock);
+ spin_lock(&inode->i_lock);
+ nfs_direct_file_adjust_size_locked(inode, dreq->io_start, dreq->count);
+ nfs_update_delegated_mtime_locked(dreq->inode);
+ spin_unlock(&inode->i_lock);
+
while (!list_empty(&hdr->pages)) {
+ struct nfs_page *req;
req = nfs_list_entry(hdr->pages.next);
nfs_list_remove_request(req);
@@ -733,18 +822,23 @@ static void nfs_write_sync_pgio_error(struct list_head *head, int error)
static void nfs_direct_write_reschedule_io(struct nfs_pgio_header *hdr)
{
struct nfs_direct_req *dreq = hdr->dreq;
+ struct nfs_page *req;
+ struct nfs_commit_info cinfo;
trace_nfs_direct_write_reschedule_io(dreq);
+ nfs_init_cinfo_from_dreq(&cinfo, dreq);
spin_lock(&dreq->lock);
- if (dreq->error == 0) {
+ if (dreq->error == 0)
dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
- /* fake unstable write to let common nfs resend pages */
- hdr->verf.committed = NFS_UNSTABLE;
- hdr->good_bytes = hdr->args.offset + hdr->args.count -
- hdr->io_start;
- }
+ set_bit(NFS_IOHDR_REDO, &hdr->flags);
spin_unlock(&dreq->lock);
+ while (!list_empty(&hdr->pages)) {
+ req = nfs_list_entry(hdr->pages.next);
+ nfs_list_remove_request(req);
+ nfs_unlock_request(req);
+ nfs_mark_request_commit(req, NULL, &cinfo, 0);
+ }
}
static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {
@@ -772,9 +866,11 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
{
struct nfs_pageio_descriptor desc;
struct inode *inode = dreq->inode;
+ struct nfs_commit_info cinfo;
ssize_t result = 0;
size_t requested_bytes = 0;
size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE);
+ bool defer = false;
trace_nfs_direct_write_schedule_iovec(dreq);
@@ -802,8 +898,8 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
struct nfs_page *req;
unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
- req = nfs_create_request(dreq->ctx, pagevec[i],
- pgbase, req_len);
+ req = nfs_page_create_from_page(dreq->ctx, pagevec[i],
+ pgbase, pos, req_len);
if (IS_ERR(req)) {
result = PTR_ERR(req);
break;
@@ -815,19 +911,36 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
break;
}
+ pgbase = 0;
+ bytes -= req_len;
+ requested_bytes += req_len;
+ pos += req_len;
+
+ if (defer) {
+ nfs_mark_request_commit(req, NULL, &cinfo, 0);
+ continue;
+ }
+
nfs_lock_request(req);
- req->wb_index = pos >> PAGE_SHIFT;
- req->wb_offset = pos & ~PAGE_MASK;
- if (!nfs_pageio_add_request(&desc, req)) {
+ if (nfs_pageio_add_request(&desc, req))
+ continue;
+
+ /* Exit on hard errors */
+ if (desc.pg_error < 0 && desc.pg_error != -EAGAIN) {
result = desc.pg_error;
nfs_unlock_and_release_request(req);
break;
}
- pgbase = 0;
- bytes -= req_len;
- requested_bytes += req_len;
- pos += req_len;
- dreq->bytes_left -= req_len;
+
+ /* If the error is soft, defer remaining requests */
+ nfs_init_cinfo_from_dreq(&cinfo, dreq);
+ spin_lock(&dreq->lock);
+ dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+ spin_unlock(&dreq->lock);
+ nfs_unlock_request(req);
+ nfs_mark_request_commit(req, NULL, &cinfo, 0);
+ desc.pg_error = 0;
+ defer = true;
}
nfs_direct_release_pages(pagevec, npages);
kvfree(pagevec);
@@ -908,7 +1021,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
goto out;
dreq->inode = inode;
- dreq->bytes_left = dreq->max_count = count;
+ dreq->max_count = count;
dreq->io_start = pos;
dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
l_ctx = nfs_get_lock_context(dreq->ctx);
@@ -926,7 +1039,14 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
requested = nfs_direct_write_schedule_iovec(dreq, iter, pos,
FLUSH_STABLE);
} else {
- nfs_start_io_direct(inode);
+ result = nfs_start_io_direct(inode);
+ if (result) {
+ /* release the reference that would usually be
+ * consumed by nfs_direct_write_schedule_iovec()
+ */
+ nfs_direct_req_release(dreq);
+ goto out_release;
+ }
requested = nfs_direct_write_schedule_iovec(dreq, iter, pos,
FLUSH_COND_STABLE);
@@ -966,8 +1086,7 @@ int __init nfs_init_directcache(void)
{
nfs_direct_cachep = kmem_cache_create("nfs_direct_cache",
sizeof(struct nfs_direct_req),
- 0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD),
+ 0, SLAB_RECLAIM_ACCOUNT,
NULL);
if (nfs_direct_cachep == NULL)
return -ENOMEM;
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
index 6603b5cee029..714975e5c0db 100644
--- a/fs/nfs/dns_resolve.c
+++ b/fs/nfs/dns_resolve.c
@@ -7,14 +7,16 @@
* Resolves DNS hostnames into valid ip addresses
*/
-#ifdef CONFIG_NFS_USE_KERNEL_DNS
-
#include <linux/module.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/addr.h>
-#include <linux/dns_resolver.h>
+
#include "dns_resolve.h"
+#ifdef CONFIG_NFS_USE_KERNEL_DNS
+
+#include <linux/dns_resolver.h>
+
ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen,
struct sockaddr_storage *ss, size_t salen)
{
@@ -35,7 +37,6 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen,
#else
-#include <linux/module.h>
#include <linux/hash.h>
#include <linux/string.h>
#include <linux/kmod.h>
@@ -43,15 +44,12 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen,
#include <linux/socket.h>
#include <linux/seq_file.h>
#include <linux/inet.h>
-#include <linux/sunrpc/clnt.h>
-#include <linux/sunrpc/addr.h>
#include <linux/sunrpc/cache.h>
#include <linux/sunrpc/svcauth.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
#include <linux/nfs_fs.h>
#include "nfs4_fs.h"
-#include "dns_resolve.h"
#include "cache_lib.h"
#include "netns.h"
diff --git a/fs/nfs/export.c b/fs/nfs/export.c
index 01596f2d0a1e..a10dd5f9d078 100644
--- a/fs/nfs/export.c
+++ b/fs/nfs/export.c
@@ -42,7 +42,7 @@ nfs_encode_fh(struct inode *inode, __u32 *p, int *max_len, struct inode *parent)
dprintk("%s: max fh len %d inode %p parent %p",
__func__, *max_len, inode, parent);
- if (*max_len < len || IS_AUTOMOUNT(inode)) {
+ if (*max_len < len) {
dprintk("%s: fh len %d too small, required %d\n",
__func__, *max_len, len);
*max_len = len;
@@ -66,14 +66,21 @@ nfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
{
struct nfs_fattr *fattr = NULL;
struct nfs_fh *server_fh = nfs_exp_embedfh(fid->raw);
- size_t fh_size = offsetof(struct nfs_fh, data) + server_fh->size;
+ size_t fh_size = offsetof(struct nfs_fh, data);
const struct nfs_rpc_ops *rpc_ops;
struct dentry *dentry;
struct inode *inode;
- int len = EMBED_FH_OFF + XDR_QUADLEN(fh_size);
+ int len = EMBED_FH_OFF;
u32 *p = fid->raw;
int ret;
+ /* Initial check of bounds */
+ if (fh_len < len + XDR_QUADLEN(fh_size) ||
+ fh_len > XDR_QUADLEN(NFS_MAXFHSIZE))
+ return NULL;
+ /* Calculate embedded filehandle size */
+ fh_size += server_fh->size;
+ len += XDR_QUADLEN(fh_size);
/* NULL translates to ESTALE */
if (fh_len < len || fh_type != len)
return NULL;
@@ -145,18 +152,15 @@ out:
return parent;
}
-static u64 nfs_fetch_iversion(struct inode *inode)
-{
- nfs_revalidate_inode(inode, NFS_INO_INVALID_CHANGE);
- return inode_peek_iversion_raw(inode);
-}
-
const struct export_operations nfs_export_ops = {
.encode_fh = nfs_encode_fh,
.fh_to_dentry = nfs_fh_to_dentry,
.get_parent = nfs_get_parent,
- .fetch_iversion = nfs_fetch_iversion,
- .flags = EXPORT_OP_NOWCC|EXPORT_OP_NOSUBTREECHK|
- EXPORT_OP_CLOSE_BEFORE_UNLINK|EXPORT_OP_REMOTE_FS|
- EXPORT_OP_NOATOMIC_ATTR,
+ .flags = EXPORT_OP_NOWCC |
+ EXPORT_OP_NOSUBTREECHK |
+ EXPORT_OP_CLOSE_BEFORE_UNLINK |
+ EXPORT_OP_REMOTE_FS |
+ EXPORT_OP_NOATOMIC_ATTR |
+ EXPORT_OP_FLUSH_ON_CLOSE |
+ EXPORT_OP_NOLOCKS,
};
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index d8ec889a4b3f..d020aab40c64 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -28,9 +28,12 @@
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/gfp.h>
+#include <linux/rmap.h>
#include <linux/swap.h>
+#include <linux/compaction.h>
#include <linux/uaccess.h>
+#include <linux/filelock.h>
#include "delegation.h"
#include "internal.h"
@@ -158,6 +161,8 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
struct inode *inode = file_inode(iocb->ki_filp);
ssize_t result;
+ trace_nfs_file_read(iocb, to);
+
if (iocb->ki_flags & IOCB_DIRECT)
return nfs_file_direct_read(iocb, to, false);
@@ -165,7 +170,10 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
iocb->ki_filp,
iov_iter_count(to), (unsigned long) iocb->ki_pos);
- nfs_start_io_read(inode);
+ result = nfs_start_io_read(inode);
+ if (result)
+ return result;
+
result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
if (!result) {
result = generic_file_read_iter(iocb, to);
@@ -177,25 +185,50 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
}
EXPORT_SYMBOL_GPL(nfs_file_read);
+ssize_t
+nfs_file_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe,
+ size_t len, unsigned int flags)
+{
+ struct inode *inode = file_inode(in);
+ ssize_t result;
+
+ dprintk("NFS: splice_read(%pD2, %zu@%llu)\n", in, len, *ppos);
+
+ result = nfs_start_io_read(inode);
+ if (result)
+ return result;
+
+ result = nfs_revalidate_mapping(inode, in->f_mapping);
+ if (!result) {
+ result = filemap_splice_read(in, ppos, pipe, len, flags);
+ if (result > 0)
+ nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, result);
+ }
+ nfs_end_io_read(inode);
+ return result;
+}
+EXPORT_SYMBOL_GPL(nfs_file_splice_read);
+
int
-nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
+nfs_file_mmap_prepare(struct vm_area_desc *desc)
{
+ struct file *file = desc->file;
struct inode *inode = file_inode(file);
int status;
dprintk("NFS: mmap(%pD2)\n", file);
- /* Note: generic_file_mmap() returns ENOSYS on nommu systems
+ /* Note: generic_file_mmap_prepare() returns ENOSYS on nommu systems
* so we call that before revalidating the mapping
*/
- status = generic_file_mmap(file, vma);
+ status = generic_file_mmap_prepare(desc);
if (!status) {
- vma->vm_ops = &nfs_file_vm_ops;
+ desc->vm_ops = &nfs_file_vm_ops;
status = nfs_revalidate_mapping(inode, file->f_mapping);
}
return status;
}
-EXPORT_SYMBOL_GPL(nfs_file_mmap);
+EXPORT_SYMBOL_GPL(nfs_file_mmap_prepare);
/*
* Flush any dirty pages for this process, and check for write errors.
@@ -250,6 +283,37 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
}
EXPORT_SYMBOL_GPL(nfs_file_fsync);
+void nfs_truncate_last_folio(struct address_space *mapping, loff_t from,
+ loff_t to)
+{
+ struct folio *folio;
+
+ if (from >= to)
+ return;
+
+ folio = filemap_lock_folio(mapping, from >> PAGE_SHIFT);
+ if (IS_ERR(folio))
+ return;
+
+ if (folio_mkclean(folio))
+ folio_mark_dirty(folio);
+
+ if (folio_test_uptodate(folio)) {
+ loff_t fpos = folio_pos(folio);
+ size_t offset = from - fpos;
+ size_t end = folio_size(folio);
+
+ if (to - fpos < end)
+ end = to - fpos;
+ folio_zero_segment(folio, offset, end);
+ trace_nfs_size_truncate_folio(mapping->host, to);
+ }
+
+ folio_unlock(folio);
+ folio_put(folio);
+}
+EXPORT_SYMBOL_GPL(nfs_truncate_last_folio);
+
/*
* Decide whether a read/modify/write cycle may be more efficient
* then a modify/write/read cycle when writing to a page in the
@@ -276,28 +340,31 @@ EXPORT_SYMBOL_GPL(nfs_file_fsync);
* and that the new data won't completely replace the old data in
* that range of the file.
*/
-static bool nfs_full_page_write(struct page *page, loff_t pos, unsigned int len)
+static bool nfs_folio_is_full_write(struct folio *folio, loff_t pos,
+ unsigned int len)
{
- unsigned int pglen = nfs_page_length(page);
- unsigned int offset = pos & (PAGE_SIZE - 1);
+ unsigned int pglen = nfs_folio_length(folio);
+ unsigned int offset = offset_in_folio(folio, pos);
unsigned int end = offset + len;
return !pglen || (end >= pglen && !offset);
}
-static bool nfs_want_read_modify_write(struct file *file, struct page *page,
- loff_t pos, unsigned int len)
+static bool nfs_want_read_modify_write(struct file *file, struct folio *folio,
+ loff_t pos, unsigned int len)
{
/*
* Up-to-date pages, those with ongoing or full-page write
* don't need read/modify/write
*/
- if (PageUptodate(page) || PagePrivate(page) ||
- nfs_full_page_write(page, pos, len))
+ if (folio_test_uptodate(folio) || folio_test_private(folio) ||
+ nfs_folio_is_full_write(folio, pos, len))
return false;
- if (pnfs_ld_read_whole_page(file->f_mapping->host))
+ if (pnfs_ld_read_whole_page(file_inode(file)))
return true;
+ if (folio_test_dropbehind(folio))
+ return false;
/* Open for reading too? */
if (file->f_mode & FMODE_READ)
return true;
@@ -312,47 +379,59 @@ static bool nfs_want_read_modify_write(struct file *file, struct page *page,
* If the writer ends up delaying the write, the writer needs to
* increment the page use counts until he is done with the page.
*/
-static int nfs_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len,
- struct page **pagep, void **fsdata)
+static int nfs_write_begin(const struct kiocb *iocb,
+ struct address_space *mapping,
+ loff_t pos, unsigned len, struct folio **foliop,
+ void **fsdata)
{
- int ret;
- pgoff_t index = pos >> PAGE_SHIFT;
- struct page *page;
+ struct folio *folio;
+ struct file *file = iocb->ki_filp;
int once_thru = 0;
+ int ret;
+
+ trace_nfs_write_begin(file_inode(file), pos, len);
dfprintk(PAGECACHE, "NFS: write_begin(%pD2(%lu), %u@%lld)\n",
file, mapping->host->i_ino, len, (long long) pos);
+ nfs_truncate_last_folio(mapping, i_size_read(mapping->host), pos);
start:
- page = grab_cache_page_write_begin(mapping, index);
- if (!page)
- return -ENOMEM;
- *pagep = page;
+ folio = write_begin_get_folio(iocb, mapping, pos >> PAGE_SHIFT, len);
+ if (IS_ERR(folio)) {
+ ret = PTR_ERR(folio);
+ goto out;
+ }
+ *foliop = folio;
- ret = nfs_flush_incompatible(file, page);
+ ret = nfs_flush_incompatible(file, folio);
if (ret) {
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
} else if (!once_thru &&
- nfs_want_read_modify_write(file, page, pos, len)) {
+ nfs_want_read_modify_write(file, folio, pos, len)) {
once_thru = 1;
- ret = nfs_read_folio(file, page_folio(page));
- put_page(page);
+ folio_clear_dropbehind(folio);
+ ret = nfs_read_folio(file, folio);
+ folio_put(folio);
if (!ret)
goto start;
}
+out:
+ trace_nfs_write_begin_done(file_inode(file), pos, len, ret);
return ret;
}
-static int nfs_write_end(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+static int nfs_write_end(const struct kiocb *iocb,
+ struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct folio *folio, void *fsdata)
{
- unsigned offset = pos & (PAGE_SIZE - 1);
+ struct file *file = iocb->ki_filp;
struct nfs_open_context *ctx = nfs_file_open_context(file);
+ unsigned offset = offset_in_folio(folio, pos);
int status;
+ trace_nfs_write_end(file_inode(file), pos, len);
dfprintk(PAGECACHE, "NFS: write_end(%pD2(%lu), %u@%lld)\n",
file, mapping->host->i_ino, len, (long long) pos);
@@ -360,34 +439,37 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
* Zero any uninitialised parts of the page, and then mark the page
* as up to date if it turns out that we're extending the file.
*/
- if (!PageUptodate(page)) {
- unsigned pglen = nfs_page_length(page);
+ if (!folio_test_uptodate(folio)) {
+ size_t fsize = folio_size(folio);
+ unsigned pglen = nfs_folio_length(folio);
unsigned end = offset + copied;
if (pglen == 0) {
- zero_user_segments(page, 0, offset,
- end, PAGE_SIZE);
- SetPageUptodate(page);
+ folio_zero_segments(folio, 0, offset, end, fsize);
+ folio_mark_uptodate(folio);
} else if (end >= pglen) {
- zero_user_segment(page, end, PAGE_SIZE);
+ folio_zero_segment(folio, end, fsize);
if (offset == 0)
- SetPageUptodate(page);
+ folio_mark_uptodate(folio);
} else
- zero_user_segment(page, pglen, PAGE_SIZE);
+ folio_zero_segment(folio, pglen, fsize);
}
- status = nfs_updatepage(file, page, offset, copied);
+ status = nfs_update_folio(file, folio, offset, copied);
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
- if (status < 0)
+ if (status < 0) {
+ trace_nfs_write_end_done(file_inode(file), pos, len, status);
return status;
+ }
NFS_I(mapping->host)->write_io += copied;
if (nfs_ctx_key_to_expire(ctx, mapping->host))
nfs_wb_all(mapping->host);
+ trace_nfs_write_end_done(file_inode(file), pos, len, copied);
return copied;
}
@@ -401,14 +483,17 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
static void nfs_invalidate_folio(struct folio *folio, size_t offset,
size_t length)
{
+ struct inode *inode = folio->mapping->host;
dfprintk(PAGECACHE, "NFS: invalidate_folio(%lu, %zu, %zu)\n",
folio->index, offset, length);
- if (offset != 0 || length < folio_size(folio))
- return;
/* Cancel any unstarted writes on this page */
- nfs_wb_folio_cancel(folio->mapping->host, folio);
- folio_wait_fscache(folio);
+ if (offset != 0 || length < folio_size(folio))
+ nfs_wb_folio(inode, folio);
+ else
+ nfs_wb_folio_cancel(inode, folio);
+ folio_wait_private_2(folio); /* [DEPRECATED] */
+ trace_nfs_invalidate_folio(inode, folio_pos(folio) + offset, length);
}
/*
@@ -422,8 +507,13 @@ static bool nfs_release_folio(struct folio *folio, gfp_t gfp)
dfprintk(PAGECACHE, "NFS: release_folio(%p)\n", folio);
/* If the private flag is set, then the folio is not freeable */
- if (folio_test_private(folio))
- return false;
+ if (folio_test_private(folio)) {
+ if ((current_gfp_context(gfp) & GFP_KERNEL) != GFP_KERNEL ||
+ current_is_kswapd() || current_is_kcompactd())
+ return false;
+ if (nfs_wb_folio(folio->mapping->host, folio) < 0)
+ return false;
+ }
return nfs_fscache_release_folio(folio, gfp);
}
@@ -464,12 +554,16 @@ static void nfs_check_dirty_writeback(struct folio *folio,
static int nfs_launder_folio(struct folio *folio)
{
struct inode *inode = folio->mapping->host;
+ int ret;
dfprintk(PAGECACHE, "NFS: launder_folio(%ld, %llu)\n",
inode->i_ino, folio_pos(folio));
- folio_wait_fscache(folio);
- return nfs_wb_page(inode, &folio->page);
+ folio_wait_private_2(folio); /* [DEPRECATED] */
+ ret = nfs_wb_folio(inode, folio);
+ trace_nfs_launder_folio_done(inode, folio_pos(folio),
+ folio_size(folio), ret);
+ return ret;
}
static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file,
@@ -524,7 +618,6 @@ const struct address_space_operations nfs_file_aops = {
.read_folio = nfs_read_folio,
.readahead = nfs_readahead,
.dirty_folio = filemap_dirty_folio,
- .writepage = nfs_writepage,
.writepages = nfs_writepages,
.write_begin = nfs_write_begin,
.write_end = nfs_write_end,
@@ -533,7 +626,7 @@ const struct address_space_operations nfs_file_aops = {
.migrate_folio = nfs_migrate_folio,
.launder_folio = nfs_launder_folio,
.is_dirty_writeback = nfs_check_dirty_writeback,
- .error_remove_page = generic_error_remove_page,
+ .error_remove_folio = generic_error_remove_folio,
.swap_activate = nfs_swap_activate,
.swap_deactivate = nfs_swap_deactivate,
.swap_rw = nfs_swap_rw,
@@ -546,22 +639,22 @@ const struct address_space_operations nfs_file_aops = {
*/
static vm_fault_t nfs_vm_page_mkwrite(struct vm_fault *vmf)
{
- struct page *page = vmf->page;
struct file *filp = vmf->vma->vm_file;
struct inode *inode = file_inode(filp);
unsigned pagelen;
vm_fault_t ret = VM_FAULT_NOPAGE;
struct address_space *mapping;
+ struct folio *folio = page_folio(vmf->page);
dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%pD2(%lu), offset %lld)\n",
- filp, filp->f_mapping->host->i_ino,
- (long long)page_offset(page));
+ filp, filp->f_mapping->host->i_ino,
+ (long long)folio_pos(folio));
sb_start_pagefault(inode->i_sb);
/* make sure the cache has finished storing the page */
- if (PageFsCache(page) &&
- wait_on_page_fscache_killable(vmf->page) < 0) {
+ if (folio_test_private_2(folio) && /* [DEPRECATED] */
+ folio_wait_private_2_killable(folio) < 0) {
ret = VM_FAULT_RETRY;
goto out;
}
@@ -570,25 +663,25 @@ static vm_fault_t nfs_vm_page_mkwrite(struct vm_fault *vmf)
nfs_wait_bit_killable,
TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
- lock_page(page);
- mapping = page_file_mapping(page);
+ folio_lock(folio);
+ mapping = folio->mapping;
if (mapping != inode->i_mapping)
goto out_unlock;
- wait_on_page_writeback(page);
+ folio_wait_writeback(folio);
- pagelen = nfs_page_length(page);
+ pagelen = nfs_folio_length(folio);
if (pagelen == 0)
goto out_unlock;
ret = VM_FAULT_LOCKED;
- if (nfs_flush_incompatible(filp, page) == 0 &&
- nfs_updatepage(filp, page, 0, pagelen) == 0)
+ if (nfs_flush_incompatible(filp, folio) == 0 &&
+ nfs_update_folio(filp, folio, 0, pagelen) == 0)
goto out;
ret = VM_FAULT_SIGBUS;
out_unlock:
- unlock_page(page);
+ folio_unlock(folio);
out:
sb_end_pagefault(inode->i_sb);
return ret;
@@ -609,6 +702,8 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
errseq_t since;
int error;
+ trace_nfs_file_write(iocb, from);
+
result = nfs_key_timeout_notify(file, inode);
if (result)
return result;
@@ -633,19 +728,17 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
nfs_clear_invalid_mapping(file->f_mapping);
since = filemap_sample_wb_err(file->f_mapping);
- nfs_start_io_write(inode);
+ error = nfs_start_io_write(inode);
+ if (error)
+ return error;
result = generic_write_checks(iocb, from);
- if (result > 0) {
- current->backing_dev_info = inode_to_bdi(inode);
+ if (result > 0)
result = generic_perform_write(iocb, from);
- current->backing_dev_info = NULL;
- }
nfs_end_io_write(inode);
if (result <= 0)
goto out;
written = result;
- iocb->ki_pos += written;
nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written);
if (mntflags & NFS_MOUNT_WRITE_EAGER) {
@@ -691,17 +784,17 @@ do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
{
struct inode *inode = filp->f_mapping->host;
int status = 0;
- unsigned int saved_type = fl->fl_type;
+ unsigned int saved_type = fl->c.flc_type;
/* Try local locking first */
posix_test_lock(filp, fl);
- if (fl->fl_type != F_UNLCK) {
+ if (fl->c.flc_type != F_UNLCK) {
/* found a conflict */
goto out;
}
- fl->fl_type = saved_type;
+ fl->c.flc_type = saved_type;
- if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
+ if (nfs_have_read_or_write_delegation(inode))
goto out_noconflict;
if (is_local)
@@ -711,7 +804,7 @@ do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
out:
return status;
out_noconflict:
- fl->fl_type = F_UNLCK;
+ fl->c.flc_type = F_UNLCK;
goto out;
}
@@ -736,7 +829,7 @@ do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
* If we're signalled while cleaning up locks on process exit, we
* still need to complete the unlock.
*/
- if (status < 0 && !(fl->fl_flags & FL_CLOSE))
+ if (status < 0 && !(fl->c.flc_flags & FL_CLOSE))
return status;
}
@@ -784,7 +877,7 @@ do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
* This makes locking act as a cache coherency point.
*/
nfs_sync_mapping(filp->f_mapping);
- if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) {
+ if (!nfs_have_read_or_write_delegation(inode)) {
nfs_zap_caches(inode);
if (mapping_mapped(filp->f_mapping))
nfs_revalidate_mapping(inode, filp->f_mapping);
@@ -803,12 +896,12 @@ int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
int is_local = 0;
dprintk("NFS: lock(%pD2, t=%x, fl=%x, r=%lld:%lld)\n",
- filp, fl->fl_type, fl->fl_flags,
+ filp, fl->c.flc_type, fl->c.flc_flags,
(long long)fl->fl_start, (long long)fl->fl_end);
nfs_inc_stats(inode, NFSIOS_VFSLOCK);
- if (fl->fl_flags & FL_RECLAIM)
+ if (fl->c.flc_flags & FL_RECLAIM)
return -ENOGRACE;
if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FCNTL)
@@ -822,7 +915,7 @@ int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
if (IS_GETLK(cmd))
ret = do_getlk(filp, cmd, fl, is_local);
- else if (fl->fl_type == F_UNLCK)
+ else if (lock_is_unlock(fl))
ret = do_unlk(filp, cmd, fl, is_local);
else
ret = do_setlk(filp, cmd, fl, is_local);
@@ -840,16 +933,16 @@ int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
int is_local = 0;
dprintk("NFS: flock(%pD2, t=%x, fl=%x)\n",
- filp, fl->fl_type, fl->fl_flags);
+ filp, fl->c.flc_type, fl->c.flc_flags);
- if (!(fl->fl_flags & FL_FLOCK))
+ if (!(fl->c.flc_flags & FL_FLOCK))
return -ENOLCK;
if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FLOCK)
is_local = 1;
/* We're simulating flock() locks using posix locks on the server */
- if (fl->fl_type == F_UNLCK)
+ if (lock_is_unlock(fl))
return do_unlk(filp, cmd, fl, is_local);
return do_setlk(filp, cmd, fl, is_local);
}
@@ -859,16 +952,17 @@ const struct file_operations nfs_file_operations = {
.llseek = nfs_file_llseek,
.read_iter = nfs_file_read,
.write_iter = nfs_file_write,
- .mmap = nfs_file_mmap,
+ .mmap_prepare = nfs_file_mmap_prepare,
.open = nfs_file_open,
.flush = nfs_file_flush,
.release = nfs_file_release,
.fsync = nfs_file_fsync,
.lock = nfs_lock,
.flock = nfs_flock,
- .splice_read = generic_file_splice_read,
+ .splice_read = nfs_file_splice_read,
.splice_write = iter_file_splice_write,
.check_flags = nfs_check_flags,
.setlease = simple_nosetlease,
+ .fop_flags = FOP_DONTCACHE,
};
EXPORT_SYMBOL_GPL(nfs_file_operations);
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index 4974cd18ca46..5c4551117c58 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -488,7 +488,7 @@ filelayout_read_pagelist(struct nfs_pgio_header *hdr)
/* Perform an asynchronous read to ds */
nfs_initiate_pgio(ds_clnt, hdr, hdr->cred,
NFS_PROTO(hdr->inode), &filelayout_read_call_ops,
- 0, RPC_TASK_SOFTCONN);
+ 0, RPC_TASK_SOFTCONN, NULL);
return PNFS_ATTEMPTED;
}
@@ -530,7 +530,7 @@ filelayout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
/* Perform an asynchronous write */
nfs_initiate_pgio(ds_clnt, hdr, hdr->cred,
NFS_PROTO(hdr->inode), &filelayout_write_call_ops,
- sync, RPC_TASK_SOFTCONN);
+ sync, RPC_TASK_SOFTCONN, NULL);
return PNFS_ATTEMPTED;
}
@@ -605,14 +605,6 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
dprintk("--> %s\n", __func__);
- /* FIXME: remove this check when layout segment support is added */
- if (lgr->range.offset != 0 ||
- lgr->range.length != NFS4_MAX_UINT64) {
- dprintk("%s Only whole file layouts supported. Use MDS i/o\n",
- __func__);
- goto out;
- }
-
if (fl->pattern_offset > lgr->range.offset) {
dprintk("%s pattern_offset %lld too large\n",
__func__, fl->pattern_offset);
@@ -654,19 +646,19 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
{
struct xdr_stream stream;
struct xdr_buf buf;
- struct page *scratch;
+ struct folio *scratch;
__be32 *p;
uint32_t nfl_util;
int i;
dprintk("%s: set_layout_map Begin\n", __func__);
- scratch = alloc_page(gfp_flags);
+ scratch = folio_alloc(gfp_flags, 0);
if (!scratch)
return -ENOMEM;
xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages, lgr->layoutp->len);
- xdr_set_scratch_page(&stream, scratch);
+ xdr_set_scratch_folio(&stream, scratch);
/* 20 = ufl_util (4), first_stripe_index (4), pattern_offset (8),
* num_fh (4) */
@@ -732,11 +724,11 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
fl->fh_array[i]->size);
}
- __free_page(scratch);
+ folio_put(scratch);
return 0;
out_err:
- __free_page(scratch);
+ folio_put(scratch);
return -EIO;
}
@@ -862,6 +854,8 @@ fl_pnfs_update_layout(struct inode *ino,
status = filelayout_check_deviceid(lo, fl, gfp_flags);
if (status) {
+ pnfs_error_mark_layout_for_return(ino, lseg);
+ pnfs_set_lo_fail(lseg);
pnfs_put_lseg(lseg);
lseg = NULL;
}
@@ -873,15 +867,15 @@ static void
filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req)
{
- pnfs_generic_pg_check_layout(pgio);
+ pnfs_generic_pg_check_layout(pgio, req);
if (!pgio->pg_lseg) {
pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode,
nfs_req_openctx(req),
- 0,
- NFS4_MAX_UINT64,
+ req_offset(req),
+ req->wb_bytes,
IOMODE_READ,
false,
- GFP_KERNEL);
+ nfs_io_gfp_mask());
if (IS_ERR(pgio->pg_lseg)) {
pgio->pg_error = PTR_ERR(pgio->pg_lseg);
pgio->pg_lseg = NULL;
@@ -897,15 +891,15 @@ static void
filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req)
{
- pnfs_generic_pg_check_layout(pgio);
+ pnfs_generic_pg_check_layout(pgio, req);
if (!pgio->pg_lseg) {
pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode,
nfs_req_openctx(req),
- 0,
- NFS4_MAX_UINT64,
+ req_offset(req),
+ req->wb_bytes,
IOMODE_RW,
false,
- GFP_NOFS);
+ nfs_io_gfp_mask());
if (IS_ERR(pgio->pg_lseg)) {
pgio->pg_error = PTR_ERR(pgio->pg_lseg);
pgio->pg_lseg = NULL;
@@ -1017,7 +1011,7 @@ static int filelayout_initiate_commit(struct nfs_commit_data *data, int how)
data->args.fh = fh;
return nfs_initiate_commit(ds_clnt, data, NFS_PROTO(data->inode),
&filelayout_commit_call_ops, how,
- RPC_TASK_SOFTCONN);
+ RPC_TASK_SOFTCONN, NULL);
out_err:
pnfs_generic_prepare_to_resend_writes(data);
pnfs_generic_commit_release(data);
@@ -1116,7 +1110,6 @@ static const struct pnfs_commit_ops filelayout_commit_ops = {
.clear_request_commit = pnfs_generic_clear_request_commit,
.scan_commit_lists = pnfs_generic_scan_commit_lists,
.recover_commit_reqs = pnfs_generic_recover_commit_reqs,
- .search_commit_reqs = pnfs_generic_search_commit_reqs,
.commit_pagelist = filelayout_commit_pagelist,
};
diff --git a/fs/nfs/filelayout/filelayout.h b/fs/nfs/filelayout/filelayout.h
index aed0748fd6ec..c7bb5da93307 100644
--- a/fs/nfs/filelayout/filelayout.h
+++ b/fs/nfs/filelayout/filelayout.h
@@ -51,7 +51,7 @@ struct nfs4_file_layout_dsaddr {
u32 stripe_count;
u8 *stripe_indices;
u32 ds_num;
- struct nfs4_pnfs_ds *ds_list[];
+ struct nfs4_pnfs_ds *ds_list[] __counted_by(ds_num);
};
struct nfs4_filelayout_segment {
diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c
index acf4b88889dc..df79aeb68db4 100644
--- a/fs/nfs/filelayout/filelayoutdev.c
+++ b/fs/nfs/filelayout/filelayoutdev.c
@@ -35,6 +35,7 @@
#include "../internal.h"
#include "../nfs4session.h"
#include "filelayout.h"
+#include "../nfs4trace.h"
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
@@ -72,17 +73,18 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
struct nfs4_file_layout_dsaddr *dsaddr = NULL;
struct xdr_stream stream;
struct xdr_buf buf;
- struct page *scratch;
+ struct folio *scratch;
struct list_head dsaddrs;
struct nfs4_pnfs_ds_addr *da;
+ struct net *net = server->nfs_client->cl_net;
/* set up xdr stream */
- scratch = alloc_page(gfp_flags);
+ scratch = folio_alloc(gfp_flags, 0);
if (!scratch)
goto out_err;
xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen);
- xdr_set_scratch_page(&stream, scratch);
+ xdr_set_scratch_folio(&stream, scratch);
/* Get the stripe count (number of stripe index) */
p = xdr_inline_decode(&stream, 4);
@@ -158,8 +160,7 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
mp_count = be32_to_cpup(p); /* multipath count */
for (j = 0; j < mp_count; j++) {
- da = nfs4_decode_mp_ds_addr(server->nfs_client->cl_net,
- &stream, gfp_flags);
+ da = nfs4_decode_mp_ds_addr(net, &stream, gfp_flags);
if (da)
list_add_tail(&da->da_node, &dsaddrs);
}
@@ -169,9 +170,10 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
goto out_err_free_deviceid;
}
- dsaddr->ds_list[i] = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags);
+ dsaddr->ds_list[i] = nfs4_pnfs_ds_add(net, &dsaddrs, gfp_flags);
if (!dsaddr->ds_list[i])
goto out_err_drain_dsaddrs;
+ trace_fl_getdevinfo(server, &pdev->dev_id, dsaddr->ds_list[i]->ds_remotestr);
/* If DS was already in cache, free ds addrs */
while (!list_empty(&dsaddrs)) {
@@ -184,7 +186,7 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
}
}
- __free_page(scratch);
+ folio_put(scratch);
return dsaddr;
out_err_drain_dsaddrs:
@@ -202,7 +204,7 @@ out_err_free_deviceid:
out_err_free_stripe_indices:
kfree(stripe_indices);
out_err_free_scratch:
- __free_page(scratch);
+ folio_put(scratch);
out_err:
dprintk("%s ERROR: returning NULL\n", __func__);
return NULL;
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 7deb3cd76abe..9056f05a67dc 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -11,6 +11,7 @@
#include <linux/nfs_mount.h>
#include <linux/nfs_page.h>
#include <linux/module.h>
+#include <linux/file.h>
#include <linux/sched/mm.h>
#include <linux/sunrpc/metrics.h>
@@ -46,7 +47,7 @@ ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo,
int dev_limit, enum nfs4_ff_op_type type);
static void ff_layout_encode_ff_layoutupdate(struct xdr_stream *xdr,
const struct nfs42_layoutstat_devinfo *devinfo,
- struct nfs4_ff_layout_mirror *mirror);
+ struct nfs4_ff_layout_ds_stripe *dss_info);
static struct pnfs_layout_hdr *
ff_layout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
@@ -162,18 +163,33 @@ decode_name(struct xdr_stream *xdr, u32 *id)
return 0;
}
-static bool ff_mirror_match_fh(const struct nfs4_ff_layout_mirror *m1,
- const struct nfs4_ff_layout_mirror *m2)
+static struct nfsd_file *
+ff_local_open_fh(struct pnfs_layout_segment *lseg, u32 ds_idx, u32 dss_id,
+ struct nfs_client *clp, const struct cred *cred,
+ struct nfs_fh *fh, fmode_t mode)
+{
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+ struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx);
+
+ return nfs_local_open_fh(clp, cred, fh, &mirror->dss[dss_id].nfl, mode);
+#else
+ return NULL;
+#endif
+}
+
+static bool ff_dss_match_fh(const struct nfs4_ff_layout_ds_stripe *dss1,
+ const struct nfs4_ff_layout_ds_stripe *dss2)
{
int i, j;
- if (m1->fh_versions_cnt != m2->fh_versions_cnt)
+ if (dss1->fh_versions_cnt != dss2->fh_versions_cnt)
return false;
- for (i = 0; i < m1->fh_versions_cnt; i++) {
+
+ for (i = 0; i < dss1->fh_versions_cnt; i++) {
bool found_fh = false;
- for (j = 0; j < m2->fh_versions_cnt; j++) {
- if (nfs_compare_fh(&m1->fh_versions[i],
- &m2->fh_versions[j]) == 0) {
+ for (j = 0; j < dss2->fh_versions_cnt; j++) {
+ if (nfs_compare_fh(&dss1->fh_versions[i],
+ &dss2->fh_versions[j]) == 0) {
found_fh = true;
break;
}
@@ -184,6 +200,38 @@ static bool ff_mirror_match_fh(const struct nfs4_ff_layout_mirror *m1,
return true;
}
+static bool ff_mirror_match_fh(const struct nfs4_ff_layout_mirror *m1,
+ const struct nfs4_ff_layout_mirror *m2)
+{
+ u32 dss_id;
+
+ if (m1->dss_count != m2->dss_count)
+ return false;
+
+ for (dss_id = 0; dss_id < m1->dss_count; dss_id++)
+ if (!ff_dss_match_fh(&m1->dss[dss_id], &m2->dss[dss_id]))
+ return false;
+
+ return true;
+}
+
+static bool ff_mirror_match_devid(const struct nfs4_ff_layout_mirror *m1,
+ const struct nfs4_ff_layout_mirror *m2)
+{
+ u32 dss_id;
+
+ if (m1->dss_count != m2->dss_count)
+ return false;
+
+ for (dss_id = 0; dss_id < m1->dss_count; dss_id++)
+ if (memcmp(&m1->dss[dss_id].devid,
+ &m2->dss[dss_id].devid,
+ sizeof(m1->dss[dss_id].devid)) != 0)
+ return false;
+
+ return true;
+}
+
static struct nfs4_ff_layout_mirror *
ff_layout_add_mirror(struct pnfs_layout_hdr *lo,
struct nfs4_ff_layout_mirror *mirror)
@@ -194,7 +242,7 @@ ff_layout_add_mirror(struct pnfs_layout_hdr *lo,
spin_lock(&inode->i_lock);
list_for_each_entry(pos, &ff_layout->mirrors, mirrors) {
- if (memcmp(&mirror->devid, &pos->devid, sizeof(pos->devid)) != 0)
+ if (!ff_mirror_match_devid(mirror, pos))
continue;
if (!ff_mirror_match_fh(mirror, pos))
continue;
@@ -222,30 +270,52 @@ ff_layout_remove_mirror(struct nfs4_ff_layout_mirror *mirror)
mirror->layout = NULL;
}
-static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags)
+static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(u32 dss_count,
+ gfp_t gfp_flags)
{
struct nfs4_ff_layout_mirror *mirror;
mirror = kzalloc(sizeof(*mirror), gfp_flags);
- if (mirror != NULL) {
- spin_lock_init(&mirror->lock);
- refcount_set(&mirror->ref, 1);
- INIT_LIST_HEAD(&mirror->mirrors);
+ if (mirror == NULL)
+ return NULL;
+
+ spin_lock_init(&mirror->lock);
+ refcount_set(&mirror->ref, 1);
+ INIT_LIST_HEAD(&mirror->mirrors);
+
+ mirror->dss_count = dss_count;
+ mirror->dss =
+ kcalloc(dss_count, sizeof(struct nfs4_ff_layout_ds_stripe),
+ gfp_flags);
+ if (mirror->dss == NULL) {
+ kfree(mirror);
+ return NULL;
}
+
+ for (u32 dss_id = 0; dss_id < mirror->dss_count; dss_id++)
+ nfs_localio_file_init(&mirror->dss[dss_id].nfl);
+
return mirror;
}
static void ff_layout_free_mirror(struct nfs4_ff_layout_mirror *mirror)
{
const struct cred *cred;
+ u32 dss_id;
ff_layout_remove_mirror(mirror);
- kfree(mirror->fh_versions);
- cred = rcu_access_pointer(mirror->ro_cred);
- put_cred(cred);
- cred = rcu_access_pointer(mirror->rw_cred);
- put_cred(cred);
- nfs4_ff_layout_put_deviceid(mirror->mirror_ds);
+
+ for (dss_id = 0; dss_id < mirror->dss_count; dss_id++) {
+ kfree(mirror->dss[dss_id].fh_versions);
+ cred = rcu_access_pointer(mirror->dss[dss_id].ro_cred);
+ put_cred(cred);
+ cred = rcu_access_pointer(mirror->dss[dss_id].rw_cred);
+ put_cred(cred);
+ nfs_close_local_fh(&mirror->dss[dss_id].nfl);
+ nfs4_ff_layout_put_deviceid(mirror->dss[dss_id].mirror_ds);
+ }
+
+ kfree(mirror->dss);
kfree(mirror);
}
@@ -276,7 +346,7 @@ ff_lseg_match_mirrors(struct pnfs_layout_segment *l1,
struct pnfs_layout_segment *l2)
{
const struct nfs4_ff_layout_segment *fl1 = FF_LAYOUT_LSEG(l1);
- const struct nfs4_ff_layout_segment *fl2 = FF_LAYOUT_LSEG(l1);
+ const struct nfs4_ff_layout_segment *fl2 = FF_LAYOUT_LSEG(l2);
u32 i;
if (fl1->mirror_array_cnt != fl2->mirror_array_cnt)
@@ -349,14 +419,24 @@ ff_layout_add_lseg(struct pnfs_layout_hdr *lo,
free_me);
}
+static u32 ff_mirror_efficiency_sum(const struct nfs4_ff_layout_mirror *mirror)
+{
+ u32 dss_id, sum = 0;
+
+ for (dss_id = 0; dss_id < mirror->dss_count; dss_id++)
+ sum += mirror->dss[dss_id].efficiency;
+
+ return sum;
+}
+
static void ff_layout_sort_mirrors(struct nfs4_ff_layout_segment *fls)
{
int i, j;
for (i = 0; i < fls->mirror_array_cnt - 1; i++) {
for (j = i + 1; j < fls->mirror_array_cnt; j++)
- if (fls->mirror_array[i]->efficiency <
- fls->mirror_array[j]->efficiency)
+ if (ff_mirror_efficiency_sum(fls->mirror_array[i]) <
+ ff_mirror_efficiency_sum(fls->mirror_array[j]))
swap(fls->mirror_array[i],
fls->mirror_array[j]);
}
@@ -371,20 +451,21 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
struct nfs4_ff_layout_segment *fls = NULL;
struct xdr_stream stream;
struct xdr_buf buf;
- struct page *scratch;
+ struct folio *scratch;
u64 stripe_unit;
u32 mirror_array_cnt;
__be32 *p;
int i, rc;
+ struct nfs4_ff_layout_ds_stripe *dss_info;
dprintk("--> %s\n", __func__);
- scratch = alloc_page(gfp_flags);
+ scratch = folio_alloc(gfp_flags, 0);
if (!scratch)
return ERR_PTR(-ENOMEM);
xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages,
lgr->layoutp->len);
- xdr_set_scratch_page(&stream, scratch);
+ xdr_set_scratch_folio(&stream, scratch);
/* stripe unit and mirror_array_cnt */
rc = -EIO;
@@ -410,116 +491,134 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
fls->mirror_array_cnt = mirror_array_cnt;
fls->stripe_unit = stripe_unit;
+ u32 dss_count = 0;
for (i = 0; i < fls->mirror_array_cnt; i++) {
struct nfs4_ff_layout_mirror *mirror;
struct cred *kcred;
const struct cred __rcu *cred;
kuid_t uid;
kgid_t gid;
- u32 ds_count, fh_count, id;
- int j;
+ u32 fh_count, id;
+ int j, dss_id;
rc = -EIO;
p = xdr_inline_decode(&stream, 4);
if (!p)
goto out_err_free;
- ds_count = be32_to_cpup(p);
- /* FIXME: allow for striping? */
- if (ds_count != 1)
+ // Ensure all mirrors have same stripe count.
+ if (dss_count == 0)
+ dss_count = be32_to_cpup(p);
+ else if (dss_count != be32_to_cpup(p))
+ goto out_err_free;
+
+ if (dss_count > NFS4_FLEXFILE_LAYOUT_MAX_STRIPE_CNT ||
+ dss_count == 0)
goto out_err_free;
- fls->mirror_array[i] = ff_layout_alloc_mirror(gfp_flags);
+ if (dss_count > 1 && stripe_unit == 0)
+ goto out_err_free;
+
+ fls->mirror_array[i] = ff_layout_alloc_mirror(dss_count, gfp_flags);
if (fls->mirror_array[i] == NULL) {
rc = -ENOMEM;
goto out_err_free;
}
- fls->mirror_array[i]->ds_count = ds_count;
+ for (dss_id = 0; dss_id < dss_count; dss_id++) {
+ dss_info = &fls->mirror_array[i]->dss[dss_id];
+ dss_info->mirror = fls->mirror_array[i];
- /* deviceid */
- rc = decode_deviceid(&stream, &fls->mirror_array[i]->devid);
- if (rc)
- goto out_err_free;
+ /* deviceid */
+ rc = decode_deviceid(&stream, &dss_info->devid);
+ if (rc)
+ goto out_err_free;
- /* efficiency */
- rc = -EIO;
- p = xdr_inline_decode(&stream, 4);
- if (!p)
- goto out_err_free;
- fls->mirror_array[i]->efficiency = be32_to_cpup(p);
+ /* efficiency */
+ rc = -EIO;
+ p = xdr_inline_decode(&stream, 4);
+ if (!p)
+ goto out_err_free;
+ dss_info->efficiency = be32_to_cpup(p);
- /* stateid */
- rc = decode_pnfs_stateid(&stream, &fls->mirror_array[i]->stateid);
- if (rc)
- goto out_err_free;
+ /* stateid */
+ rc = decode_pnfs_stateid(&stream, &dss_info->stateid);
+ if (rc)
+ goto out_err_free;
- /* fh */
- rc = -EIO;
- p = xdr_inline_decode(&stream, 4);
- if (!p)
- goto out_err_free;
- fh_count = be32_to_cpup(p);
+ /* fh */
+ rc = -EIO;
+ p = xdr_inline_decode(&stream, 4);
+ if (!p)
+ goto out_err_free;
+ fh_count = be32_to_cpup(p);
- fls->mirror_array[i]->fh_versions =
- kcalloc(fh_count, sizeof(struct nfs_fh),
- gfp_flags);
- if (fls->mirror_array[i]->fh_versions == NULL) {
- rc = -ENOMEM;
- goto out_err_free;
- }
+ dss_info->fh_versions =
+ kcalloc(fh_count, sizeof(struct nfs_fh),
+ gfp_flags);
+ if (dss_info->fh_versions == NULL) {
+ rc = -ENOMEM;
+ goto out_err_free;
+ }
- for (j = 0; j < fh_count; j++) {
- rc = decode_nfs_fh(&stream,
- &fls->mirror_array[i]->fh_versions[j]);
+ for (j = 0; j < fh_count; j++) {
+ rc = decode_nfs_fh(&stream,
+ &dss_info->fh_versions[j]);
+ if (rc)
+ goto out_err_free;
+ }
+
+ dss_info->fh_versions_cnt = fh_count;
+
+ /* user */
+ rc = decode_name(&stream, &id);
if (rc)
goto out_err_free;
- }
- fls->mirror_array[i]->fh_versions_cnt = fh_count;
+ uid = make_kuid(&init_user_ns, id);
- /* user */
- rc = decode_name(&stream, &id);
- if (rc)
- goto out_err_free;
+ /* group */
+ rc = decode_name(&stream, &id);
+ if (rc)
+ goto out_err_free;
- uid = make_kuid(&init_user_ns, id);
+ gid = make_kgid(&init_user_ns, id);
- /* group */
- rc = decode_name(&stream, &id);
- if (rc)
- goto out_err_free;
+ if (gfp_flags & __GFP_FS)
+ kcred = prepare_kernel_cred(&init_task);
+ else {
+ unsigned int nofs_flags = memalloc_nofs_save();
- gid = make_kgid(&init_user_ns, id);
-
- if (gfp_flags & __GFP_FS)
- kcred = prepare_kernel_cred(&init_task);
- else {
- unsigned int nofs_flags = memalloc_nofs_save();
- kcred = prepare_kernel_cred(&init_task);
- memalloc_nofs_restore(nofs_flags);
+ kcred = prepare_kernel_cred(&init_task);
+ memalloc_nofs_restore(nofs_flags);
+ }
+ rc = -ENOMEM;
+ if (!kcred)
+ goto out_err_free;
+ kcred->fsuid = uid;
+ kcred->fsgid = gid;
+ cred = RCU_INITIALIZER(kcred);
+
+ if (lgr->range.iomode == IOMODE_READ)
+ rcu_assign_pointer(dss_info->ro_cred, cred);
+ else
+ rcu_assign_pointer(dss_info->rw_cred, cred);
}
- rc = -ENOMEM;
- if (!kcred)
- goto out_err_free;
- kcred->fsuid = uid;
- kcred->fsgid = gid;
- cred = RCU_INITIALIZER(kcred);
-
- if (lgr->range.iomode == IOMODE_READ)
- rcu_assign_pointer(fls->mirror_array[i]->ro_cred, cred);
- else
- rcu_assign_pointer(fls->mirror_array[i]->rw_cred, cred);
mirror = ff_layout_add_mirror(lh, fls->mirror_array[i]);
if (mirror != fls->mirror_array[i]) {
- /* swap cred ptrs so free_mirror will clean up old */
- if (lgr->range.iomode == IOMODE_READ) {
- cred = xchg(&mirror->ro_cred, cred);
- rcu_assign_pointer(fls->mirror_array[i]->ro_cred, cred);
- } else {
- cred = xchg(&mirror->rw_cred, cred);
- rcu_assign_pointer(fls->mirror_array[i]->rw_cred, cred);
+ for (dss_id = 0; dss_id < dss_count; dss_id++) {
+ dss_info = &fls->mirror_array[i]->dss[dss_id];
+ /* swap cred ptrs so free_mirror will clean up old */
+ if (lgr->range.iomode == IOMODE_READ) {
+ cred = xchg(&mirror->dss[dss_id].ro_cred,
+ dss_info->ro_cred);
+ rcu_assign_pointer(dss_info->ro_cred, cred);
+ } else {
+ cred = xchg(&mirror->dss[dss_id].rw_cred,
+ dss_info->rw_cred);
+ rcu_assign_pointer(dss_info->rw_cred, cred);
+ }
}
ff_layout_free_mirror(fls->mirror_array[i]);
fls->mirror_array[i] = mirror;
@@ -547,7 +646,7 @@ out_sort_mirrors:
ret = &fls->generic_hdr;
dprintk("<-- %s (success)\n", __func__);
out_free_page:
- __free_page(scratch);
+ folio_put(scratch);
return ret;
out_err_free:
_ff_layout_free_lseg(fls);
@@ -576,6 +675,26 @@ ff_layout_free_lseg(struct pnfs_layout_segment *lseg)
_ff_layout_free_lseg(fls);
}
+static u32 calc_commit_idx(struct pnfs_layout_segment *lseg,
+ u32 mirror_idx, u32 dss_id)
+{
+ struct nfs4_ff_layout_segment *flseg = FF_LAYOUT_LSEG(lseg);
+
+ return (mirror_idx * flseg->mirror_array[0]->dss_count) + dss_id;
+}
+
+static u32 calc_mirror_idx_from_commit(struct pnfs_layout_segment *lseg,
+ u32 commit_index)
+{
+ return commit_index / FF_LAYOUT_LSEG(lseg)->mirror_array[0]->dss_count;
+}
+
+static u32 calc_dss_id_from_commit(struct pnfs_layout_segment *lseg,
+ u32 commit_index)
+{
+ return commit_index % FF_LAYOUT_LSEG(lseg)->mirror_array[0]->dss_count;
+}
+
static void
nfs4_ff_start_busy_timer(struct nfs4_ff_busy_timer *timer, ktime_t now)
{
@@ -600,6 +719,7 @@ nfs4_ff_end_busy_timer(struct nfs4_ff_busy_timer *timer, ktime_t now)
static bool
nfs4_ff_layoutstat_start_io(struct nfs4_ff_layout_mirror *mirror,
+ u32 dss_id,
struct nfs4_ff_layoutstat *layoutstat,
ktime_t now)
{
@@ -607,8 +727,8 @@ nfs4_ff_layoutstat_start_io(struct nfs4_ff_layout_mirror *mirror,
struct nfs4_flexfile_layout *ffl = FF_LAYOUT_FROM_HDR(mirror->layout);
nfs4_ff_start_busy_timer(&layoutstat->busy_timer, now);
- if (!mirror->start_time)
- mirror->start_time = now;
+ if (!mirror->dss[dss_id].start_time)
+ mirror->dss[dss_id].start_time = now;
if (mirror->report_interval != 0)
report_interval = (s64)mirror->report_interval * 1000LL;
else if (layoutstats_timer != 0)
@@ -658,13 +778,16 @@ nfs4_ff_layout_stat_io_update_completed(struct nfs4_ff_layoutstat *layoutstat,
static void
nfs4_ff_layout_stat_io_start_read(struct inode *inode,
struct nfs4_ff_layout_mirror *mirror,
+ u32 dss_id,
__u64 requested, ktime_t now)
{
bool report;
spin_lock(&mirror->lock);
- report = nfs4_ff_layoutstat_start_io(mirror, &mirror->read_stat, now);
- nfs4_ff_layout_stat_io_update_requested(&mirror->read_stat, requested);
+ report = nfs4_ff_layoutstat_start_io(
+ mirror, dss_id, &mirror->dss[dss_id].read_stat, now);
+ nfs4_ff_layout_stat_io_update_requested(
+ &mirror->dss[dss_id].read_stat, requested);
set_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags);
spin_unlock(&mirror->lock);
@@ -675,11 +798,12 @@ nfs4_ff_layout_stat_io_start_read(struct inode *inode,
static void
nfs4_ff_layout_stat_io_end_read(struct rpc_task *task,
struct nfs4_ff_layout_mirror *mirror,
+ u32 dss_id,
__u64 requested,
__u64 completed)
{
spin_lock(&mirror->lock);
- nfs4_ff_layout_stat_io_update_completed(&mirror->read_stat,
+ nfs4_ff_layout_stat_io_update_completed(&mirror->dss[dss_id].read_stat,
requested, completed,
ktime_get(), task->tk_start);
set_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags);
@@ -689,13 +813,20 @@ nfs4_ff_layout_stat_io_end_read(struct rpc_task *task,
static void
nfs4_ff_layout_stat_io_start_write(struct inode *inode,
struct nfs4_ff_layout_mirror *mirror,
+ u32 dss_id,
__u64 requested, ktime_t now)
{
bool report;
spin_lock(&mirror->lock);
- report = nfs4_ff_layoutstat_start_io(mirror , &mirror->write_stat, now);
- nfs4_ff_layout_stat_io_update_requested(&mirror->write_stat, requested);
+ report = nfs4_ff_layoutstat_start_io(
+ mirror,
+ dss_id,
+ &mirror->dss[dss_id].write_stat,
+ now);
+ nfs4_ff_layout_stat_io_update_requested(
+ &mirror->dss[dss_id].write_stat,
+ requested);
set_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags);
spin_unlock(&mirror->lock);
@@ -706,6 +837,7 @@ nfs4_ff_layout_stat_io_start_write(struct inode *inode,
static void
nfs4_ff_layout_stat_io_end_write(struct rpc_task *task,
struct nfs4_ff_layout_mirror *mirror,
+ u32 dss_id,
__u64 requested,
__u64 completed,
enum nfs3_stable_how committed)
@@ -714,25 +846,25 @@ nfs4_ff_layout_stat_io_end_write(struct rpc_task *task,
requested = completed = 0;
spin_lock(&mirror->lock);
- nfs4_ff_layout_stat_io_update_completed(&mirror->write_stat,
+ nfs4_ff_layout_stat_io_update_completed(&mirror->dss[dss_id].write_stat,
requested, completed, ktime_get(), task->tk_start);
set_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags);
spin_unlock(&mirror->lock);
}
static void
-ff_layout_mark_ds_unreachable(struct pnfs_layout_segment *lseg, u32 idx)
+ff_layout_mark_ds_unreachable(struct pnfs_layout_segment *lseg, u32 idx, u32 dss_id)
{
- struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
+ struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx, dss_id);
if (devid)
nfs4_mark_deviceid_unavailable(devid);
}
static void
-ff_layout_mark_ds_reachable(struct pnfs_layout_segment *lseg, u32 idx)
+ff_layout_mark_ds_reachable(struct pnfs_layout_segment *lseg, u32 idx, u32 dss_id)
{
- struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
+ struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx, dss_id);
if (devid)
nfs4_mark_deviceid_available(devid);
@@ -741,69 +873,87 @@ ff_layout_mark_ds_reachable(struct pnfs_layout_segment *lseg, u32 idx)
static struct nfs4_pnfs_ds *
ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg,
u32 start_idx, u32 *best_idx,
+ u32 offset, u32 *dss_id,
bool check_device)
{
struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg);
struct nfs4_ff_layout_mirror *mirror;
- struct nfs4_pnfs_ds *ds;
+ struct nfs4_pnfs_ds *ds = ERR_PTR(-EAGAIN);
u32 idx;
/* mirrors are initially sorted by efficiency */
for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) {
mirror = FF_LAYOUT_COMP(lseg, idx);
- ds = nfs4_ff_layout_prepare_ds(lseg, mirror, false);
- if (!ds)
+ *dss_id = nfs4_ff_layout_calc_dss_id(
+ fls->stripe_unit,
+ fls->mirror_array[idx]->dss_count,
+ offset);
+ ds = nfs4_ff_layout_prepare_ds(lseg, mirror, *dss_id, false);
+ if (IS_ERR(ds))
continue;
if (check_device &&
- nfs4_test_deviceid_unavailable(&mirror->mirror_ds->id_node))
+ nfs4_test_deviceid_unavailable(&mirror->dss[*dss_id].mirror_ds->id_node)) {
+ // reinitialize the error state in case if this is the last iteration
+ ds = ERR_PTR(-EINVAL);
continue;
+ }
*best_idx = idx;
- return ds;
+ break;
}
- return NULL;
+ return ds;
}
static struct nfs4_pnfs_ds *
ff_layout_choose_any_ds_for_read(struct pnfs_layout_segment *lseg,
- u32 start_idx, u32 *best_idx)
+ u32 start_idx, u32 *best_idx,
+ u32 offset, u32 *dss_id)
{
- return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx, false);
+ return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx,
+ offset, dss_id, false);
}
static struct nfs4_pnfs_ds *
ff_layout_choose_valid_ds_for_read(struct pnfs_layout_segment *lseg,
- u32 start_idx, u32 *best_idx)
+ u32 start_idx, u32 *best_idx,
+ u32 offset, u32 *dss_id)
{
- return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx, true);
+ return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx,
+ offset, dss_id, true);
}
static struct nfs4_pnfs_ds *
ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg,
- u32 start_idx, u32 *best_idx)
+ u32 start_idx, u32 *best_idx,
+ u32 offset, u32 *dss_id)
{
struct nfs4_pnfs_ds *ds;
- ds = ff_layout_choose_valid_ds_for_read(lseg, start_idx, best_idx);
- if (ds)
+ ds = ff_layout_choose_valid_ds_for_read(lseg, start_idx, best_idx,
+ offset, dss_id);
+ if (!IS_ERR(ds))
return ds;
- return ff_layout_choose_any_ds_for_read(lseg, start_idx, best_idx);
+ return ff_layout_choose_any_ds_for_read(lseg, start_idx, best_idx,
+ offset, dss_id);
}
static struct nfs4_pnfs_ds *
ff_layout_get_ds_for_read(struct nfs_pageio_descriptor *pgio,
- u32 *best_idx)
+ u32 *best_idx,
+ u32 offset,
+ u32 *dss_id)
{
struct pnfs_layout_segment *lseg = pgio->pg_lseg;
struct nfs4_pnfs_ds *ds;
ds = ff_layout_choose_best_ds_for_read(lseg, pgio->pg_mirror_idx,
- best_idx);
- if (ds || !pgio->pg_mirror_idx)
+ best_idx, offset, dss_id);
+ if (!IS_ERR(ds) || !pgio->pg_mirror_idx)
return ds;
- return ff_layout_choose_best_ds_for_read(lseg, 0, best_idx);
+ return ff_layout_choose_best_ds_for_read(lseg, 0, best_idx,
+ offset, dss_id);
}
static void
@@ -822,12 +972,54 @@ ff_layout_pg_get_read(struct nfs_pageio_descriptor *pgio,
}
}
-static void
-ff_layout_pg_check_layout(struct nfs_pageio_descriptor *pgio,
- struct nfs_page *req)
+static bool
+ff_layout_lseg_is_striped(const struct nfs4_ff_layout_segment *fls)
{
- pnfs_generic_pg_check_layout(pgio);
- pnfs_generic_pg_check_range(pgio, req);
+ return fls->mirror_array[0]->dss_count > 1;
+}
+
+/*
+ * ff_layout_pg_test(). Called by nfs_can_coalesce_requests()
+ *
+ * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
+ * of bytes (maximum @req->wb_bytes) that can be coalesced.
+ */
+static size_t
+ff_layout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
+ struct nfs_page *req)
+{
+ unsigned int size;
+ u64 p_stripe, r_stripe;
+ u32 stripe_offset;
+ u64 segment_offset = pgio->pg_lseg->pls_range.offset;
+ u32 stripe_unit = FF_LAYOUT_LSEG(pgio->pg_lseg)->stripe_unit;
+
+ /* calls nfs_generic_pg_test */
+ size = pnfs_generic_pg_test(pgio, prev, req);
+ if (!size)
+ return 0;
+ else if (!ff_layout_lseg_is_striped(FF_LAYOUT_LSEG(pgio->pg_lseg)))
+ return size;
+
+ /* see if req and prev are in the same stripe */
+ if (prev) {
+ p_stripe = (u64)req_offset(prev) - segment_offset;
+ r_stripe = (u64)req_offset(req) - segment_offset;
+ do_div(p_stripe, stripe_unit);
+ do_div(r_stripe, stripe_unit);
+
+ if (p_stripe != r_stripe)
+ return 0;
+ }
+
+ /* calculate remaining bytes in the current stripe */
+ div_u64_rem((u64)req_offset(req) - segment_offset,
+ stripe_unit,
+ &stripe_offset);
+ WARN_ON_ONCE(stripe_offset > stripe_unit);
+ if (stripe_offset >= stripe_unit)
+ return 0;
+ return min(stripe_unit - (unsigned int)stripe_offset, size);
}
static void
@@ -837,10 +1029,13 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
struct nfs_pgio_mirror *pgm;
struct nfs4_ff_layout_mirror *mirror;
struct nfs4_pnfs_ds *ds;
- u32 ds_idx;
+ u32 ds_idx, dss_id;
+ if (NFS_SERVER(pgio->pg_inode)->flags &
+ (NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR))
+ pgio->pg_maxretrans = io_maxretrans;
retry:
- ff_layout_pg_check_layout(pgio, req);
+ pnfs_generic_pg_check_layout(pgio, req);
/* Use full layout for now */
if (!pgio->pg_lseg) {
ff_layout_pg_get_read(pgio, req, false);
@@ -852,9 +1047,12 @@ retry:
if (!pgio->pg_lseg)
goto out_nolseg;
}
+ /* Reset wb_nio, since getting layout segment was successful */
+ req->wb_nio = 0;
- ds = ff_layout_get_ds_for_read(pgio, &ds_idx);
- if (!ds) {
+ ds = ff_layout_get_ds_for_read(pgio, &ds_idx,
+ req_offset(req), &dss_id);
+ if (IS_ERR(ds)) {
if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg))
goto out_mds;
pnfs_generic_pg_cleanup(pgio);
@@ -865,17 +1063,27 @@ retry:
mirror = FF_LAYOUT_COMP(pgio->pg_lseg, ds_idx);
pgm = &pgio->pg_mirrors[0];
- pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize;
+ pgm->pg_bsize = mirror->dss[dss_id].mirror_ds->ds_versions[0].rsize;
pgio->pg_mirror_idx = ds_idx;
-
- if (NFS_SERVER(pgio->pg_inode)->flags &
- (NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR))
- pgio->pg_maxretrans = io_maxretrans;
return;
out_nolseg:
- if (pgio->pg_error < 0)
- return;
+ if (pgio->pg_error < 0) {
+ if (pgio->pg_error != -EAGAIN)
+ return;
+ /* Retry getting layout segment if lower layer returned -EAGAIN */
+ if (pgio->pg_maxretrans && req->wb_nio++ > pgio->pg_maxretrans) {
+ if (NFS_SERVER(pgio->pg_inode)->flags & NFS_MOUNT_SOFTERR)
+ pgio->pg_error = -ETIMEDOUT;
+ else
+ pgio->pg_error = -EIO;
+ return;
+ }
+ pgio->pg_error = 0;
+ /* Sleep for 1 second before retrying */
+ ssleep(1);
+ goto retry;
+ }
out_mds:
trace_pnfs_mds_fallback_pg_init_read(pgio->pg_inode,
0, NFS4_MAX_UINT64, IOMODE_READ,
@@ -892,10 +1100,10 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio,
struct nfs4_ff_layout_mirror *mirror;
struct nfs_pgio_mirror *pgm;
struct nfs4_pnfs_ds *ds;
- u32 i;
+ u32 i, dss_id;
retry:
- ff_layout_pg_check_layout(pgio, req);
+ pnfs_generic_pg_check_layout(pgio, req);
if (!pgio->pg_lseg) {
pgio->pg_lseg =
pnfs_update_layout(pgio->pg_inode, nfs_req_openctx(req),
@@ -917,8 +1125,13 @@ retry:
for (i = 0; i < pgio->pg_mirror_count; i++) {
mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i);
- ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, mirror, true);
- if (!ds) {
+ dss_id = nfs4_ff_layout_calc_dss_id(
+ FF_LAYOUT_LSEG(pgio->pg_lseg)->stripe_unit,
+ mirror->dss_count,
+ req_offset(req));
+ ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, mirror,
+ dss_id, true);
+ if (IS_ERR(ds)) {
if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg))
goto out_mds;
pnfs_generic_pg_cleanup(pgio);
@@ -927,7 +1140,7 @@ retry:
goto retry;
}
pgm = &pgio->pg_mirrors[i];
- pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].wsize;
+ pgm->pg_bsize = mirror->dss[dss_id].mirror_ds->ds_versions[0].wsize;
}
if (NFS_SERVER(pgio->pg_inode)->flags &
@@ -993,14 +1206,14 @@ ff_layout_pg_get_mirror_write(struct nfs_pageio_descriptor *desc, u32 idx)
static const struct nfs_pageio_ops ff_layout_pg_read_ops = {
.pg_init = ff_layout_pg_init_read,
- .pg_test = pnfs_generic_pg_test,
+ .pg_test = ff_layout_pg_test,
.pg_doio = pnfs_generic_pg_readpages,
.pg_cleanup = pnfs_generic_pg_cleanup,
};
static const struct nfs_pageio_ops ff_layout_pg_write_ops = {
.pg_init = ff_layout_pg_init_write,
- .pg_test = pnfs_generic_pg_test,
+ .pg_test = ff_layout_pg_test,
.pg_doio = pnfs_generic_pg_writepages,
.pg_get_mirror_count = ff_layout_pg_get_mirror_count_write,
.pg_cleanup = pnfs_generic_pg_cleanup,
@@ -1048,11 +1261,15 @@ static void ff_layout_resend_pnfs_read(struct nfs_pgio_header *hdr)
{
u32 idx = hdr->pgio_mirror_idx + 1;
u32 new_idx = 0;
+ u32 dss_id = 0;
+ struct nfs4_pnfs_ds *ds;
- if (ff_layout_choose_any_ds_for_read(hdr->lseg, idx, &new_idx))
- ff_layout_send_layouterror(hdr->lseg);
- else
+ ds = ff_layout_choose_any_ds_for_read(hdr->lseg, idx, &new_idx,
+ hdr->args.offset, &dss_id);
+ if (IS_ERR(ds))
pnfs_error_mark_layout_for_return(hdr->inode, hdr->lseg);
+ else
+ ff_layout_send_layouterror(hdr->lseg);
pnfs_read_resend_pnfs(hdr, new_idx);
}
@@ -1081,42 +1298,53 @@ static void ff_layout_reset_read(struct nfs_pgio_header *hdr)
}
static int ff_layout_async_handle_error_v4(struct rpc_task *task,
+ u32 op_status,
struct nfs4_state *state,
struct nfs_client *clp,
struct pnfs_layout_segment *lseg,
- u32 idx)
+ u32 idx, u32 dss_id)
{
struct pnfs_layout_hdr *lo = lseg->pls_layout;
struct inode *inode = lo->plh_inode;
- struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
+ struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx, dss_id);
struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;
- switch (task->tk_status) {
- case -NFS4ERR_BADSESSION:
- case -NFS4ERR_BADSLOT:
- case -NFS4ERR_BAD_HIGH_SLOT:
- case -NFS4ERR_DEADSESSION:
- case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
- case -NFS4ERR_SEQ_FALSE_RETRY:
- case -NFS4ERR_SEQ_MISORDERED:
+ switch (op_status) {
+ case NFS4_OK:
+ case NFS4ERR_NXIO:
+ break;
+ case NFSERR_PERM:
+ if (!task->tk_xprt)
+ break;
+ xprt_force_disconnect(task->tk_xprt);
+ goto out_retry;
+ case NFS4ERR_BADSESSION:
+ case NFS4ERR_BADSLOT:
+ case NFS4ERR_BAD_HIGH_SLOT:
+ case NFS4ERR_DEADSESSION:
+ case NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
+ case NFS4ERR_SEQ_FALSE_RETRY:
+ case NFS4ERR_SEQ_MISORDERED:
dprintk("%s ERROR %d, Reset session. Exchangeid "
"flags 0x%x\n", __func__, task->tk_status,
clp->cl_exchange_flags);
nfs4_schedule_session_recovery(clp->cl_session, task->tk_status);
- break;
- case -NFS4ERR_DELAY:
- case -NFS4ERR_GRACE:
+ goto out_retry;
+ case NFS4ERR_DELAY:
+ nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY);
+ fallthrough;
+ case NFS4ERR_GRACE:
rpc_delay(task, FF_LAYOUT_POLL_RETRY_MAX);
- break;
- case -NFS4ERR_RETRY_UNCACHED_REP:
- break;
+ goto out_retry;
+ case NFS4ERR_RETRY_UNCACHED_REP:
+ goto out_retry;
/* Invalidate Layout errors */
- case -NFS4ERR_PNFS_NO_LAYOUT:
- case -ESTALE: /* mapped NFS4ERR_STALE */
- case -EBADHANDLE: /* mapped NFS4ERR_BADHANDLE */
- case -EISDIR: /* mapped NFS4ERR_ISDIR */
- case -NFS4ERR_FHEXPIRED:
- case -NFS4ERR_WRONG_TYPE:
+ case NFS4ERR_PNFS_NO_LAYOUT:
+ case NFS4ERR_STALE:
+ case NFS4ERR_BADHANDLE:
+ case NFS4ERR_ISDIR:
+ case NFS4ERR_FHEXPIRED:
+ case NFS4ERR_WRONG_TYPE:
dprintk("%s Invalid layout error %d\n", __func__,
task->tk_status);
/*
@@ -1129,11 +1357,20 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
pnfs_destroy_layout(NFS_I(inode));
rpc_wake_up(&tbl->slot_tbl_waitq);
goto reset;
+ default:
+ break;
+ }
+
+ switch (task->tk_status) {
/* RPC connection errors */
+ case -ENETDOWN:
+ case -ENETUNREACH:
+ if (test_bit(NFS_CS_NETUNREACH_FATAL, &clp->cl_flags))
+ return -NFS4ERR_FATAL_IOERROR;
+ fallthrough;
case -ECONNREFUSED:
case -EHOSTDOWN:
case -EHOSTUNREACH:
- case -ENETUNREACH:
case -EIO:
case -ETIMEDOUT:
case -EPIPE:
@@ -1144,25 +1381,55 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
nfs4_delete_deviceid(devid->ld, devid->nfs_client,
&devid->deviceid);
rpc_wake_up(&tbl->slot_tbl_waitq);
- fallthrough;
+ break;
default:
- if (ff_layout_avoid_mds_available_ds(lseg))
- return -NFS4ERR_RESET_TO_PNFS;
-reset:
- dprintk("%s Retry through MDS. Error %d\n", __func__,
- task->tk_status);
- return -NFS4ERR_RESET_TO_MDS;
+ break;
}
+
+ if (ff_layout_avoid_mds_available_ds(lseg))
+ return -NFS4ERR_RESET_TO_PNFS;
+reset:
+ dprintk("%s Retry through MDS. Error %d\n", __func__,
+ task->tk_status);
+ return -NFS4ERR_RESET_TO_MDS;
+
+out_retry:
task->tk_status = 0;
return -EAGAIN;
}
/* Retry all errors through either pNFS or MDS except for -EJUKEBOX */
static int ff_layout_async_handle_error_v3(struct rpc_task *task,
+ u32 op_status,
+ struct nfs_client *clp,
struct pnfs_layout_segment *lseg,
- u32 idx)
+ u32 idx, u32 dss_id)
{
- struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
+ struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx, dss_id);
+
+ switch (op_status) {
+ case NFS_OK:
+ case NFSERR_NXIO:
+ break;
+ case NFSERR_PERM:
+ if (!task->tk_xprt)
+ break;
+ xprt_force_disconnect(task->tk_xprt);
+ goto out_retry;
+ case NFSERR_ACCES:
+ case NFSERR_BADHANDLE:
+ case NFSERR_FBIG:
+ case NFSERR_IO:
+ case NFSERR_NOSPC:
+ case NFSERR_ROFS:
+ case NFSERR_STALE:
+ goto out_reset_to_pnfs;
+ case NFSERR_JUKEBOX:
+ nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY);
+ goto out_retry;
+ default:
+ break;
+ }
switch (task->tk_status) {
/* File access problems. Don't mark the device as unavailable */
@@ -1176,12 +1443,18 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task,
case -EJUKEBOX:
nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY);
goto out_retry;
+ case -ENETDOWN:
+ case -ENETUNREACH:
+ if (test_bit(NFS_CS_NETUNREACH_FATAL, &clp->cl_flags))
+ return -NFS4ERR_FATAL_IOERROR;
+ fallthrough;
default:
dprintk("%s DS connection error %d\n", __func__,
task->tk_status);
nfs4_delete_deviceid(devid->ld, devid->nfs_client,
&devid->deviceid);
}
+out_reset_to_pnfs:
/* FIXME: Need to prevent infinite looping here. */
return -NFS4ERR_RESET_TO_PNFS;
out_retry:
@@ -1192,15 +1465,16 @@ out_retry:
}
static int ff_layout_async_handle_error(struct rpc_task *task,
+ u32 op_status,
struct nfs4_state *state,
struct nfs_client *clp,
struct pnfs_layout_segment *lseg,
- u32 idx)
+ u32 idx, u32 dss_id)
{
int vers = clp->cl_nfs_mod->rpc_vers->number;
if (task->tk_status >= 0) {
- ff_layout_mark_ds_reachable(lseg, idx);
+ ff_layout_mark_ds_reachable(lseg, idx, dss_id);
return 0;
}
@@ -1210,10 +1484,11 @@ static int ff_layout_async_handle_error(struct rpc_task *task,
switch (vers) {
case 3:
- return ff_layout_async_handle_error_v3(task, lseg, idx);
+ return ff_layout_async_handle_error_v3(task, op_status, clp,
+ lseg, idx, dss_id);
case 4:
- return ff_layout_async_handle_error_v4(task, state, clp,
- lseg, idx);
+ return ff_layout_async_handle_error_v4(task, op_status, state,
+ clp, lseg, idx, dss_id);
default:
/* should never happen */
WARN_ON_ONCE(1);
@@ -1222,7 +1497,7 @@ static int ff_layout_async_handle_error(struct rpc_task *task,
}
static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
- u32 idx, u64 offset, u64 length,
+ u32 idx, u32 dss_id, u64 offset, u64 length,
u32 *op_status, int opnum, int error)
{
struct nfs4_ff_layout_mirror *mirror;
@@ -1235,10 +1510,12 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
case -EPFNOSUPPORT:
case -EPROTONOSUPPORT:
case -EOPNOTSUPP:
+ case -EINVAL:
case -ECONNREFUSED:
case -ECONNRESET:
case -EHOSTDOWN:
case -EHOSTUNREACH:
+ case -ENETDOWN:
case -ENETUNREACH:
case -EADDRINUSE:
case -ENOBUFS:
@@ -1258,15 +1535,16 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
mirror = FF_LAYOUT_COMP(lseg, idx);
err = ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout),
- mirror, offset, length, status, opnum,
+ mirror, dss_id, offset, length, status, opnum,
nfs_io_gfp_mask());
switch (status) {
case NFS4ERR_DELAY:
case NFS4ERR_GRACE:
+ case NFS4ERR_PERM:
break;
case NFS4ERR_NXIO:
- ff_layout_mark_ds_unreachable(lseg, idx);
+ ff_layout_mark_ds_unreachable(lseg, idx, dss_id);
/*
* Don't return the layout if this is a read and we still
* have layouts to try
@@ -1286,19 +1564,27 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
static int ff_layout_read_done_cb(struct rpc_task *task,
struct nfs_pgio_header *hdr)
{
+ struct nfs4_ff_layout_segment *flseg = FF_LAYOUT_LSEG(hdr->lseg);
+ u32 dss_id = nfs4_ff_layout_calc_dss_id(
+ flseg->stripe_unit,
+ flseg->mirror_array[hdr->pgio_mirror_idx]->dss_count,
+ hdr->args.offset);
int err;
if (task->tk_status < 0) {
- ff_layout_io_track_ds_error(hdr->lseg, hdr->pgio_mirror_idx,
+ ff_layout_io_track_ds_error(hdr->lseg,
+ hdr->pgio_mirror_idx, dss_id,
hdr->args.offset, hdr->args.count,
&hdr->res.op_status, OP_READ,
task->tk_status);
- trace_ff_layout_read_error(hdr);
+ trace_ff_layout_read_error(hdr, task->tk_status);
}
- err = ff_layout_async_handle_error(task, hdr->args.context->state,
+ err = ff_layout_async_handle_error(task, hdr->res.op_status,
+ hdr->args.context->state,
hdr->ds_clp, hdr->lseg,
- hdr->pgio_mirror_idx);
+ hdr->pgio_mirror_idx,
+ dss_id);
trace_nfs4_pnfs_read(hdr, err);
clear_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags);
@@ -1312,6 +1598,9 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
return task->tk_status;
case -EAGAIN:
goto out_eagain;
+ case -NFS4ERR_FATAL_IOERROR:
+ task->tk_status = -EIO;
+ return 0;
}
return 0;
@@ -1351,23 +1640,47 @@ ff_layout_set_layoutcommit(struct inode *inode,
static void ff_layout_read_record_layoutstats_start(struct rpc_task *task,
struct nfs_pgio_header *hdr)
{
+ struct nfs4_ff_layout_mirror *mirror;
+ u32 dss_id;
+
if (test_and_set_bit(NFS_IOHDR_STAT, &hdr->flags))
return;
- nfs4_ff_layout_stat_io_start_read(hdr->inode,
- FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
- hdr->args.count,
- task->tk_start);
+
+ mirror = FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx);
+ dss_id = nfs4_ff_layout_calc_dss_id(
+ FF_LAYOUT_LSEG(hdr->lseg)->stripe_unit,
+ mirror->dss_count,
+ hdr->args.offset);
+
+ nfs4_ff_layout_stat_io_start_read(
+ hdr->inode,
+ mirror,
+ dss_id,
+ hdr->args.count,
+ task->tk_start);
}
static void ff_layout_read_record_layoutstats_done(struct rpc_task *task,
struct nfs_pgio_header *hdr)
{
+ struct nfs4_ff_layout_mirror *mirror;
+ u32 dss_id;
+
if (!test_and_clear_bit(NFS_IOHDR_STAT, &hdr->flags))
return;
- nfs4_ff_layout_stat_io_end_read(task,
- FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
- hdr->args.count,
- hdr->res.count);
+
+ mirror = FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx);
+ dss_id = nfs4_ff_layout_calc_dss_id(
+ FF_LAYOUT_LSEG(hdr->lseg)->stripe_unit,
+ mirror->dss_count,
+ hdr->args.offset);
+
+ nfs4_ff_layout_stat_io_end_read(
+ task,
+ mirror,
+ dss_id,
+ hdr->args.count,
+ hdr->res.count);
set_bit(NFS_LSEG_LAYOUTRETURN, &hdr->lseg->pls_flags);
}
@@ -1455,20 +1768,28 @@ static void ff_layout_read_release(void *data)
static int ff_layout_write_done_cb(struct rpc_task *task,
struct nfs_pgio_header *hdr)
{
+ struct nfs4_ff_layout_segment *flseg = FF_LAYOUT_LSEG(hdr->lseg);
+ u32 dss_id = nfs4_ff_layout_calc_dss_id(
+ flseg->stripe_unit,
+ flseg->mirror_array[hdr->pgio_mirror_idx]->dss_count,
+ hdr->args.offset);
loff_t end_offs = 0;
int err;
if (task->tk_status < 0) {
- ff_layout_io_track_ds_error(hdr->lseg, hdr->pgio_mirror_idx,
+ ff_layout_io_track_ds_error(hdr->lseg,
+ hdr->pgio_mirror_idx, dss_id,
hdr->args.offset, hdr->args.count,
&hdr->res.op_status, OP_WRITE,
task->tk_status);
- trace_ff_layout_write_error(hdr);
+ trace_ff_layout_write_error(hdr, task->tk_status);
}
- err = ff_layout_async_handle_error(task, hdr->args.context->state,
+ err = ff_layout_async_handle_error(task, hdr->res.op_status,
+ hdr->args.context->state,
hdr->ds_clp, hdr->lseg,
- hdr->pgio_mirror_idx);
+ hdr->pgio_mirror_idx,
+ dss_id);
trace_nfs4_pnfs_write(hdr, err);
clear_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags);
@@ -1482,6 +1803,9 @@ static int ff_layout_write_done_cb(struct rpc_task *task,
return task->tk_status;
case -EAGAIN:
return -EAGAIN;
+ case -NFS4ERR_FATAL_IOERROR:
+ task->tk_status = -EIO;
+ return 0;
}
if (hdr->res.verf->committed == NFS_FILE_SYNC ||
@@ -1503,17 +1827,20 @@ static int ff_layout_commit_done_cb(struct rpc_task *task,
struct nfs_commit_data *data)
{
int err;
+ u32 idx = calc_mirror_idx_from_commit(data->lseg, data->ds_commit_index);
+ u32 dss_id = calc_dss_id_from_commit(data->lseg, data->ds_commit_index);
if (task->tk_status < 0) {
- ff_layout_io_track_ds_error(data->lseg, data->ds_commit_index,
+ ff_layout_io_track_ds_error(data->lseg, idx, dss_id,
data->args.offset, data->args.count,
&data->res.op_status, OP_COMMIT,
task->tk_status);
- trace_ff_layout_commit_error(data);
+ trace_ff_layout_commit_error(data, task->tk_status);
}
- err = ff_layout_async_handle_error(task, NULL, data->ds_clp,
- data->lseg, data->ds_commit_index);
+ err = ff_layout_async_handle_error(task, data->res.op_status,
+ NULL, data->ds_clp, data->lseg, idx,
+ dss_id);
trace_nfs4_pnfs_commit_ds(data, err);
switch (err) {
@@ -1526,33 +1853,60 @@ static int ff_layout_commit_done_cb(struct rpc_task *task,
case -EAGAIN:
rpc_restart_call_prepare(task);
return -EAGAIN;
+ case -NFS4ERR_FATAL_IOERROR:
+ task->tk_status = -EIO;
+ return 0;
}
ff_layout_set_layoutcommit(data->inode, data->lseg, data->lwb);
-
return 0;
}
static void ff_layout_write_record_layoutstats_start(struct rpc_task *task,
struct nfs_pgio_header *hdr)
{
+ struct nfs4_ff_layout_mirror *mirror;
+ u32 dss_id;
+
if (test_and_set_bit(NFS_IOHDR_STAT, &hdr->flags))
return;
- nfs4_ff_layout_stat_io_start_write(hdr->inode,
- FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
- hdr->args.count,
- task->tk_start);
+
+ mirror = FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx);
+ dss_id = nfs4_ff_layout_calc_dss_id(
+ FF_LAYOUT_LSEG(hdr->lseg)->stripe_unit,
+ mirror->dss_count,
+ hdr->args.offset);
+
+ nfs4_ff_layout_stat_io_start_write(
+ hdr->inode,
+ mirror,
+ dss_id,
+ hdr->args.count,
+ task->tk_start);
}
static void ff_layout_write_record_layoutstats_done(struct rpc_task *task,
struct nfs_pgio_header *hdr)
{
+ struct nfs4_ff_layout_mirror *mirror;
+ u32 dss_id;
+
if (!test_and_clear_bit(NFS_IOHDR_STAT, &hdr->flags))
return;
- nfs4_ff_layout_stat_io_end_write(task,
- FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
- hdr->args.count, hdr->res.count,
- hdr->res.verf->committed);
+
+ mirror = FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx);
+ dss_id = nfs4_ff_layout_calc_dss_id(
+ FF_LAYOUT_LSEG(hdr->lseg)->stripe_unit,
+ mirror->dss_count,
+ hdr->args.offset);
+
+ nfs4_ff_layout_stat_io_end_write(
+ task,
+ mirror,
+ dss_id,
+ hdr->args.count,
+ hdr->res.count,
+ hdr->res.verf->committed);
set_bit(NFS_LSEG_LAYOUTRETURN, &hdr->lseg->pls_flags);
}
@@ -1635,10 +1989,16 @@ static void ff_layout_write_release(void *data)
static void ff_layout_commit_record_layoutstats_start(struct rpc_task *task,
struct nfs_commit_data *cdata)
{
+ u32 idx, dss_id;
+
if (test_and_set_bit(NFS_IOHDR_STAT, &cdata->flags))
return;
+
+ idx = calc_mirror_idx_from_commit(cdata->lseg, cdata->ds_commit_index);
+ dss_id = calc_dss_id_from_commit(cdata->lseg, cdata->ds_commit_index);
nfs4_ff_layout_stat_io_start_write(cdata->inode,
- FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index),
+ FF_LAYOUT_COMP(cdata->lseg, idx),
+ dss_id,
0, task->tk_start);
}
@@ -1647,6 +2007,7 @@ static void ff_layout_commit_record_layoutstats_done(struct rpc_task *task,
{
struct nfs_page *req;
__u64 count = 0;
+ u32 idx, dss_id;
if (!test_and_clear_bit(NFS_IOHDR_STAT, &cdata->flags))
return;
@@ -1655,8 +2016,12 @@ static void ff_layout_commit_record_layoutstats_done(struct rpc_task *task,
list_for_each_entry(req, &cdata->pages, wb_list)
count += req->wb_bytes;
}
+
+ idx = calc_mirror_idx_from_commit(cdata->lseg, cdata->ds_commit_index);
+ dss_id = calc_dss_id_from_commit(cdata->lseg, cdata->ds_commit_index);
nfs4_ff_layout_stat_io_end_write(task,
- FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index),
+ FF_LAYOUT_COMP(cdata->lseg, idx),
+ dss_id,
count, count, NFS_FILE_SYNC);
set_bit(NFS_LSEG_LAYOUTRETURN, &cdata->lseg->pls_flags);
}
@@ -1763,32 +2128,41 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
struct pnfs_layout_segment *lseg = hdr->lseg;
struct nfs4_pnfs_ds *ds;
struct rpc_clnt *ds_clnt;
+ struct nfsd_file *localio;
struct nfs4_ff_layout_mirror *mirror;
const struct cred *ds_cred;
loff_t offset = hdr->args.offset;
u32 idx = hdr->pgio_mirror_idx;
int vers;
struct nfs_fh *fh;
+ u32 dss_id;
+ bool ds_fatal_error = false;
dprintk("--> %s ino %lu pgbase %u req %zu@%llu\n",
__func__, hdr->inode->i_ino,
hdr->args.pgbase, (size_t)hdr->args.count, offset);
mirror = FF_LAYOUT_COMP(lseg, idx);
- ds = nfs4_ff_layout_prepare_ds(lseg, mirror, false);
- if (!ds)
+ dss_id = nfs4_ff_layout_calc_dss_id(
+ FF_LAYOUT_LSEG(lseg)->stripe_unit,
+ mirror->dss_count,
+ offset);
+ ds = nfs4_ff_layout_prepare_ds(lseg, mirror, dss_id, false);
+ if (IS_ERR(ds)) {
+ ds_fatal_error = nfs_error_is_fatal(PTR_ERR(ds));
goto out_failed;
+ }
ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp,
- hdr->inode);
+ hdr->inode, dss_id);
if (IS_ERR(ds_clnt))
goto out_failed;
- ds_cred = ff_layout_get_ds_cred(mirror, &lseg->pls_range, hdr->cred);
+ ds_cred = ff_layout_get_ds_cred(mirror, &lseg->pls_range, hdr->cred, dss_id);
if (!ds_cred)
goto out_failed;
- vers = nfs4_ff_layout_ds_version(mirror);
+ vers = nfs4_ff_layout_ds_version(mirror, dss_id);
dprintk("%s USE DS: %s cl_count %d vers %d\n", __func__,
ds->ds_remotestr, refcount_read(&ds->ds_clp->cl_count), vers);
@@ -1796,11 +2170,11 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
hdr->pgio_done_cb = ff_layout_read_done_cb;
refcount_inc(&ds->ds_clp->cl_count);
hdr->ds_clp = ds->ds_clp;
- fh = nfs4_ff_layout_select_ds_fh(mirror);
+ fh = nfs4_ff_layout_select_ds_fh(mirror, dss_id);
if (fh)
hdr->args.fh = fh;
- nfs4_ff_layout_select_ds_stateid(mirror, &hdr->args.stateid);
+ nfs4_ff_layout_select_ds_stateid(mirror, dss_id, &hdr->args.stateid);
/*
* Note that if we ever decide to split across DSes,
@@ -1809,16 +2183,24 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
hdr->args.offset = offset;
hdr->mds_offset = offset;
+ /* Start IO accounting for local read */
+ localio = ff_local_open_fh(lseg, idx, dss_id, ds->ds_clp, ds_cred, fh,
+ FMODE_READ);
+ if (localio) {
+ hdr->task.tk_start = ktime_get();
+ ff_layout_read_record_layoutstats_start(&hdr->task, hdr);
+ }
+
/* Perform an asynchronous read to ds */
nfs_initiate_pgio(ds_clnt, hdr, ds_cred, ds->ds_clp->rpc_ops,
vers == 3 ? &ff_layout_read_call_ops_v3 :
&ff_layout_read_call_ops_v4,
- 0, RPC_TASK_SOFTCONN);
+ 0, RPC_TASK_SOFTCONN, localio);
put_cred(ds_cred);
return PNFS_ATTEMPTED;
out_failed:
- if (ff_layout_avoid_mds_available_ds(lseg))
+ if (ff_layout_avoid_mds_available_ds(lseg) && !ds_fatal_error)
return PNFS_TRY_AGAIN;
trace_pnfs_mds_fallback_read_pagelist(hdr->inode,
hdr->args.offset, hdr->args.count,
@@ -1833,28 +2215,37 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
struct pnfs_layout_segment *lseg = hdr->lseg;
struct nfs4_pnfs_ds *ds;
struct rpc_clnt *ds_clnt;
+ struct nfsd_file *localio;
struct nfs4_ff_layout_mirror *mirror;
const struct cred *ds_cred;
loff_t offset = hdr->args.offset;
int vers;
struct nfs_fh *fh;
u32 idx = hdr->pgio_mirror_idx;
+ u32 dss_id;
+ bool ds_fatal_error = false;
mirror = FF_LAYOUT_COMP(lseg, idx);
- ds = nfs4_ff_layout_prepare_ds(lseg, mirror, true);
- if (!ds)
+ dss_id = nfs4_ff_layout_calc_dss_id(
+ FF_LAYOUT_LSEG(lseg)->stripe_unit,
+ mirror->dss_count,
+ offset);
+ ds = nfs4_ff_layout_prepare_ds(lseg, mirror, dss_id, true);
+ if (IS_ERR(ds)) {
+ ds_fatal_error = nfs_error_is_fatal(PTR_ERR(ds));
goto out_failed;
+ }
ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp,
- hdr->inode);
+ hdr->inode, dss_id);
if (IS_ERR(ds_clnt))
goto out_failed;
- ds_cred = ff_layout_get_ds_cred(mirror, &lseg->pls_range, hdr->cred);
+ ds_cred = ff_layout_get_ds_cred(mirror, &lseg->pls_range, hdr->cred, dss_id);
if (!ds_cred)
goto out_failed;
- vers = nfs4_ff_layout_ds_version(mirror);
+ vers = nfs4_ff_layout_ds_version(mirror, dss_id);
dprintk("%s ino %lu sync %d req %zu@%llu DS: %s cl_count %d vers %d\n",
__func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count,
@@ -1864,12 +2255,12 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
hdr->pgio_done_cb = ff_layout_write_done_cb;
refcount_inc(&ds->ds_clp->cl_count);
hdr->ds_clp = ds->ds_clp;
- hdr->ds_commit_idx = idx;
- fh = nfs4_ff_layout_select_ds_fh(mirror);
+ hdr->ds_commit_idx = calc_commit_idx(lseg, idx, dss_id);
+ fh = nfs4_ff_layout_select_ds_fh(mirror, dss_id);
if (fh)
hdr->args.fh = fh;
- nfs4_ff_layout_select_ds_stateid(mirror, &hdr->args.stateid);
+ nfs4_ff_layout_select_ds_stateid(mirror, dss_id, &hdr->args.stateid);
/*
* Note that if we ever decide to split across DSes,
@@ -1877,16 +2268,24 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
*/
hdr->args.offset = offset;
+ /* Start IO accounting for local write */
+ localio = ff_local_open_fh(lseg, idx, dss_id, ds->ds_clp, ds_cred, fh,
+ FMODE_READ|FMODE_WRITE);
+ if (localio) {
+ hdr->task.tk_start = ktime_get();
+ ff_layout_write_record_layoutstats_start(&hdr->task, hdr);
+ }
+
/* Perform an asynchronous write */
nfs_initiate_pgio(ds_clnt, hdr, ds_cred, ds->ds_clp->rpc_ops,
vers == 3 ? &ff_layout_write_call_ops_v3 :
&ff_layout_write_call_ops_v4,
- sync, RPC_TASK_SOFTCONN);
+ sync, RPC_TASK_SOFTCONN, localio);
put_cred(ds_cred);
return PNFS_ATTEMPTED;
out_failed:
- if (ff_layout_avoid_mds_available_ds(lseg))
+ if (ff_layout_avoid_mds_available_ds(lseg) && !ds_fatal_error)
return PNFS_TRY_AGAIN;
trace_pnfs_mds_fallback_write_pagelist(hdr->inode,
hdr->args.offset, hdr->args.count,
@@ -1894,20 +2293,15 @@ out_failed:
return PNFS_NOT_ATTEMPTED;
}
-static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i)
-{
- return i;
-}
-
static struct nfs_fh *
-select_ds_fh_from_commit(struct pnfs_layout_segment *lseg, u32 i)
+select_ds_fh_from_commit(struct pnfs_layout_segment *lseg, u32 i, u32 dss_id)
{
struct nfs4_ff_layout_segment *flseg = FF_LAYOUT_LSEG(lseg);
/* FIXME: Assume that there is only one NFS version available
* for the DS.
*/
- return &flseg->mirror_array[i]->fh_versions[0];
+ return &flseg->mirror_array[i]->dss[dss_id].fh_versions[0];
}
static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how)
@@ -1915,9 +2309,10 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how)
struct pnfs_layout_segment *lseg = data->lseg;
struct nfs4_pnfs_ds *ds;
struct rpc_clnt *ds_clnt;
+ struct nfsd_file *localio;
struct nfs4_ff_layout_mirror *mirror;
const struct cred *ds_cred;
- u32 idx;
+ u32 idx, dss_id;
int vers, ret;
struct nfs_fh *fh;
@@ -1925,22 +2320,23 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how)
test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags)))
goto out_err;
- idx = calc_ds_index_from_commit(lseg, data->ds_commit_index);
+ idx = calc_mirror_idx_from_commit(lseg, data->ds_commit_index);
mirror = FF_LAYOUT_COMP(lseg, idx);
- ds = nfs4_ff_layout_prepare_ds(lseg, mirror, true);
- if (!ds)
+ dss_id = calc_dss_id_from_commit(lseg, data->ds_commit_index);
+ ds = nfs4_ff_layout_prepare_ds(lseg, mirror, dss_id, true);
+ if (IS_ERR(ds))
goto out_err;
ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp,
- data->inode);
+ data->inode, dss_id);
if (IS_ERR(ds_clnt))
goto out_err;
- ds_cred = ff_layout_get_ds_cred(mirror, &lseg->pls_range, data->cred);
+ ds_cred = ff_layout_get_ds_cred(mirror, &lseg->pls_range, data->cred, dss_id);
if (!ds_cred)
goto out_err;
- vers = nfs4_ff_layout_ds_version(mirror);
+ vers = nfs4_ff_layout_ds_version(mirror, dss_id);
dprintk("%s ino %lu, how %d cl_count %d vers %d\n", __func__,
data->inode->i_ino, how, refcount_read(&ds->ds_clp->cl_count),
@@ -1949,14 +2345,22 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how)
data->cred = ds_cred;
refcount_inc(&ds->ds_clp->cl_count);
data->ds_clp = ds->ds_clp;
- fh = select_ds_fh_from_commit(lseg, data->ds_commit_index);
+ fh = select_ds_fh_from_commit(lseg, idx, dss_id);
if (fh)
data->args.fh = fh;
+ /* Start IO accounting for local commit */
+ localio = ff_local_open_fh(lseg, idx, dss_id, ds->ds_clp, ds_cred, fh,
+ FMODE_READ|FMODE_WRITE);
+ if (localio) {
+ data->task.tk_start = ktime_get();
+ ff_layout_commit_record_layoutstats_start(&data->task, data);
+ }
+
ret = nfs_initiate_commit(ds_clnt, data, ds->ds_clp->rpc_ops,
vers == 3 ? &ff_layout_commit_call_ops_v3 :
&ff_layout_commit_call_ops_v4,
- how, RPC_TASK_SOFTCONN);
+ how, RPC_TASK_SOFTCONN, localio);
put_cred(ds_cred);
return ret;
out_err:
@@ -2010,25 +2414,28 @@ static void ff_layout_cancel_io(struct pnfs_layout_segment *lseg)
struct nfs4_pnfs_ds *ds;
struct nfs_client *ds_clp;
struct rpc_clnt *clnt;
- u32 idx;
+ u32 idx, dss_id;
for (idx = 0; idx < flseg->mirror_array_cnt; idx++) {
mirror = flseg->mirror_array[idx];
- mirror_ds = mirror->mirror_ds;
- if (!mirror_ds)
- continue;
- ds = mirror->mirror_ds->ds;
- if (!ds)
- continue;
- ds_clp = ds->ds_clp;
- if (!ds_clp)
- continue;
- clnt = ds_clp->cl_rpcclient;
- if (!clnt)
- continue;
- if (!rpc_cancel_tasks(clnt, -EAGAIN, ff_layout_match_io, lseg))
- continue;
- rpc_clnt_disconnect(clnt);
+ for (dss_id = 0; dss_id < mirror->dss_count; dss_id++) {
+ mirror_ds = mirror->dss[dss_id].mirror_ds;
+ if (IS_ERR_OR_NULL(mirror_ds))
+ continue;
+ ds = mirror->dss[dss_id].mirror_ds->ds;
+ if (!ds)
+ continue;
+ ds_clp = ds->ds_clp;
+ if (!ds_clp)
+ continue;
+ clnt = ds_clp->cl_rpcclient;
+ if (!clnt)
+ continue;
+ if (!rpc_cancel_tasks(clnt, -EAGAIN,
+ ff_layout_match_io, lseg))
+ continue;
+ rpc_clnt_disconnect(clnt);
+ }
}
}
@@ -2050,8 +2457,9 @@ ff_layout_setup_ds_info(struct pnfs_ds_commit_info *fl_cinfo,
struct nfs4_ff_layout_segment *flseg = FF_LAYOUT_LSEG(lseg);
struct inode *inode = lseg->pls_layout->plh_inode;
struct pnfs_commit_array *array, *new;
+ u32 size = flseg->mirror_array_cnt * flseg->mirror_array[0]->dss_count;
- new = pnfs_alloc_commit_array(flseg->mirror_array_cnt,
+ new = pnfs_alloc_commit_array(size,
nfs_io_gfp_mask());
if (new) {
spin_lock(&inode->i_lock);
@@ -2094,12 +2502,6 @@ static int ff_layout_encode_ioerr(struct xdr_stream *xdr,
}
static void
-encode_opaque_fixed(struct xdr_stream *xdr, const void *buf, size_t len)
-{
- WARN_ON_ONCE(xdr_stream_encode_opaque_fixed(xdr, buf, len) < 0);
-}
-
-static void
ff_layout_encode_ff_iostat_head(struct xdr_stream *xdr,
const nfs4_stateid *stateid,
const struct nfs42_layoutstat_devinfo *devinfo)
@@ -2421,11 +2823,11 @@ ff_layout_encode_io_latency(struct xdr_stream *xdr,
static void
ff_layout_encode_ff_layoutupdate(struct xdr_stream *xdr,
const struct nfs42_layoutstat_devinfo *devinfo,
- struct nfs4_ff_layout_mirror *mirror)
+ struct nfs4_ff_layout_ds_stripe *dss_info)
{
struct nfs4_pnfs_ds_addr *da;
- struct nfs4_pnfs_ds *ds = mirror->mirror_ds->ds;
- struct nfs_fh *fh = &mirror->fh_versions[0];
+ struct nfs4_pnfs_ds *ds = dss_info->mirror_ds->ds;
+ struct nfs_fh *fh = &dss_info->fh_versions[0];
__be32 *p;
da = list_first_entry(&ds->ds_addrs, struct nfs4_pnfs_ds_addr, da_node);
@@ -2437,13 +2839,17 @@ ff_layout_encode_ff_layoutupdate(struct xdr_stream *xdr,
p = xdr_reserve_space(xdr, 4 + fh->size);
xdr_encode_opaque(p, fh->data, fh->size);
/* ff_io_latency4 read */
- spin_lock(&mirror->lock);
- ff_layout_encode_io_latency(xdr, &mirror->read_stat.io_stat);
+ spin_lock(&dss_info->mirror->lock);
+ ff_layout_encode_io_latency(xdr,
+ &dss_info->read_stat.io_stat);
/* ff_io_latency4 write */
- ff_layout_encode_io_latency(xdr, &mirror->write_stat.io_stat);
- spin_unlock(&mirror->lock);
+ ff_layout_encode_io_latency(xdr,
+ &dss_info->write_stat.io_stat);
+ spin_unlock(&dss_info->mirror->lock);
/* nfstime4 */
- ff_layout_encode_nfstime(xdr, ktime_sub(ktime_get(), mirror->start_time));
+ ff_layout_encode_nfstime(xdr,
+ ktime_sub(ktime_get(),
+ dss_info->start_time));
/* bool */
p = xdr_reserve_space(xdr, 4);
*p = cpu_to_be32(false);
@@ -2467,7 +2873,8 @@ ff_layout_encode_layoutstats(struct xdr_stream *xdr, const void *args,
static void
ff_layout_free_layoutstats(struct nfs4_xdr_opaque_data *opaque)
{
- struct nfs4_ff_layout_mirror *mirror = opaque->data;
+ struct nfs4_ff_layout_ds_stripe *dss_info = opaque->data;
+ struct nfs4_ff_layout_mirror *mirror = dss_info->mirror;
ff_layout_put_mirror(mirror);
}
@@ -2484,44 +2891,54 @@ ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo,
{
struct nfs4_flexfile_layout *ff_layout = FF_LAYOUT_FROM_HDR(lo);
struct nfs4_ff_layout_mirror *mirror;
+ struct nfs4_ff_layout_ds_stripe *dss_info;
struct nfs4_deviceid_node *dev;
- int i = 0;
+ int i = 0, dss_id;
list_for_each_entry(mirror, &ff_layout->mirrors, mirrors) {
- if (i >= dev_limit)
- break;
- if (IS_ERR_OR_NULL(mirror->mirror_ds))
- continue;
- if (!test_and_clear_bit(NFS4_FF_MIRROR_STAT_AVAIL,
- &mirror->flags) &&
- type != NFS4_FF_OP_LAYOUTRETURN)
- continue;
- /* mirror refcount put in cleanup_layoutstats */
- if (!refcount_inc_not_zero(&mirror->ref))
- continue;
- dev = &mirror->mirror_ds->id_node;
- memcpy(&devinfo->dev_id, &dev->deviceid, NFS4_DEVICEID4_SIZE);
- devinfo->offset = 0;
- devinfo->length = NFS4_MAX_UINT64;
- spin_lock(&mirror->lock);
- devinfo->read_count = mirror->read_stat.io_stat.ops_completed;
- devinfo->read_bytes = mirror->read_stat.io_stat.bytes_completed;
- devinfo->write_count = mirror->write_stat.io_stat.ops_completed;
- devinfo->write_bytes = mirror->write_stat.io_stat.bytes_completed;
- spin_unlock(&mirror->lock);
- devinfo->layout_type = LAYOUT_FLEX_FILES;
- devinfo->ld_private.ops = &layoutstat_ops;
- devinfo->ld_private.data = mirror;
-
- devinfo++;
- i++;
+ for (dss_id = 0; dss_id < mirror->dss_count; ++dss_id) {
+ dss_info = &mirror->dss[dss_id];
+ if (i >= dev_limit)
+ break;
+ if (IS_ERR_OR_NULL(dss_info->mirror_ds))
+ continue;
+ if (!test_and_clear_bit(NFS4_FF_MIRROR_STAT_AVAIL,
+ &mirror->flags) &&
+ type != NFS4_FF_OP_LAYOUTRETURN)
+ continue;
+ /* mirror refcount put in cleanup_layoutstats */
+ if (!refcount_inc_not_zero(&mirror->ref))
+ continue;
+ dev = &dss_info->mirror_ds->id_node;
+ memcpy(&devinfo->dev_id,
+ &dev->deviceid,
+ NFS4_DEVICEID4_SIZE);
+ devinfo->offset = 0;
+ devinfo->length = NFS4_MAX_UINT64;
+ spin_lock(&mirror->lock);
+ devinfo->read_count =
+ dss_info->read_stat.io_stat.ops_completed;
+ devinfo->read_bytes =
+ dss_info->read_stat.io_stat.bytes_completed;
+ devinfo->write_count =
+ dss_info->write_stat.io_stat.ops_completed;
+ devinfo->write_bytes =
+ dss_info->write_stat.io_stat.bytes_completed;
+ spin_unlock(&mirror->lock);
+ devinfo->layout_type = LAYOUT_FLEX_FILES;
+ devinfo->ld_private.ops = &layoutstat_ops;
+ devinfo->ld_private.data = &mirror->dss[dss_id];
+
+ devinfo++;
+ i++;
+ }
}
return i;
}
-static int
-ff_layout_prepare_layoutstats(struct nfs42_layoutstat_args *args)
+static int ff_layout_prepare_layoutstats(struct nfs42_layoutstat_args *args)
{
+ struct pnfs_layout_hdr *lo;
struct nfs4_flexfile_layout *ff_layout;
const int dev_count = PNFS_LAYOUTSTATS_MAXDEV;
@@ -2532,11 +2949,14 @@ ff_layout_prepare_layoutstats(struct nfs42_layoutstat_args *args)
return -ENOMEM;
spin_lock(&args->inode->i_lock);
- ff_layout = FF_LAYOUT_FROM_HDR(NFS_I(args->inode)->layout);
- args->num_dev = ff_layout_mirror_prepare_stats(&ff_layout->generic_hdr,
- &args->devinfo[0],
- dev_count,
- NFS4_FF_OP_LAYOUTSTATS);
+ lo = NFS_I(args->inode)->layout;
+ if (lo && pnfs_layout_is_valid(lo)) {
+ ff_layout = FF_LAYOUT_FROM_HDR(lo);
+ args->num_dev = ff_layout_mirror_prepare_stats(
+ &ff_layout->generic_hdr, &args->devinfo[0], dev_count,
+ NFS4_FF_OP_LAYOUTSTATS);
+ } else
+ args->num_dev = 0;
spin_unlock(&args->inode->i_lock);
if (!args->num_dev) {
kfree(args->devinfo);
@@ -2552,7 +2972,7 @@ ff_layout_set_layoutdriver(struct nfs_server *server,
const struct nfs_fh *dummy)
{
#if IS_ENABLED(CONFIG_NFS_V4_2)
- server->caps |= NFS_CAP_LAYOUTSTATS;
+ server->caps |= NFS_CAP_LAYOUTSTATS | NFS_CAP_REBOOT_LAYOUTRETURN;
#endif
return 0;
}
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h
index 354a031c69b1..17a008c8e97c 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.h
+++ b/fs/nfs/flexfilelayout/flexfilelayout.h
@@ -21,6 +21,8 @@
* due to network error etc. */
#define NFS4_FLEXFILE_LAYOUT_MAX_MIRROR_CNT 4096
+#define NFS4_FLEXFILE_LAYOUT_MAX_STRIPE_CNT 4096
+
/* LAYOUTSTATS report interval in ms */
#define FF_LAYOUTSTATS_REPORT_INTERVAL (60000L)
#define FF_LAYOUTSTATS_MAXDEV 4
@@ -71,24 +73,32 @@ struct nfs4_ff_layoutstat {
struct nfs4_ff_busy_timer busy_timer;
};
-struct nfs4_ff_layout_mirror {
- struct pnfs_layout_hdr *layout;
- struct list_head mirrors;
- u32 ds_count;
- u32 efficiency;
+struct nfs4_ff_layout_mirror;
+
+struct nfs4_ff_layout_ds_stripe {
+ struct nfs4_ff_layout_mirror *mirror;
struct nfs4_deviceid devid;
+ u32 efficiency;
struct nfs4_ff_layout_ds *mirror_ds;
u32 fh_versions_cnt;
struct nfs_fh *fh_versions;
nfs4_stateid stateid;
const struct cred __rcu *ro_cred;
const struct cred __rcu *rw_cred;
- refcount_t ref;
- spinlock_t lock;
- unsigned long flags;
+ struct nfs_file_localio nfl;
struct nfs4_ff_layoutstat read_stat;
struct nfs4_ff_layoutstat write_stat;
ktime_t start_time;
+};
+
+struct nfs4_ff_layout_mirror {
+ struct pnfs_layout_hdr *layout;
+ struct list_head mirrors;
+ u32 dss_count;
+ struct nfs4_ff_layout_ds_stripe *dss;
+ refcount_t ref;
+ spinlock_t lock;
+ unsigned long flags;
u32 report_interval;
};
@@ -99,7 +109,7 @@ struct nfs4_ff_layout_segment {
u64 stripe_unit;
u32 flags;
u32 mirror_array_cnt;
- struct nfs4_ff_layout_mirror *mirror_array[];
+ struct nfs4_ff_layout_mirror *mirror_array[] __counted_by(mirror_array_cnt);
};
struct nfs4_flexfile_layout {
@@ -149,12 +159,12 @@ FF_LAYOUT_COMP(struct pnfs_layout_segment *lseg, u32 idx)
}
static inline struct nfs4_deviceid_node *
-FF_LAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg, u32 idx)
+FF_LAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg, u32 idx, u32 dss_id)
{
struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, idx);
if (mirror != NULL) {
- struct nfs4_ff_layout_ds *mirror_ds = mirror->mirror_ds;
+ struct nfs4_ff_layout_ds *mirror_ds = mirror->dss[dss_id].mirror_ds;
if (!IS_ERR_OR_NULL(mirror_ds))
return &mirror_ds->id_node;
@@ -181,9 +191,22 @@ ff_layout_no_read_on_rw(struct pnfs_layout_segment *lseg)
}
static inline int
-nfs4_ff_layout_ds_version(const struct nfs4_ff_layout_mirror *mirror)
+nfs4_ff_layout_ds_version(const struct nfs4_ff_layout_mirror *mirror, u32 dss_id)
+{
+ return mirror->dss[dss_id].mirror_ds->ds_versions[0].version;
+}
+
+static inline u32
+nfs4_ff_layout_calc_dss_id(const u64 stripe_unit, const u32 dss_count, const loff_t offset)
{
- return mirror->mirror_ds->ds_versions[0].version;
+ u64 tmp = offset;
+
+ if (dss_count == 1 || stripe_unit == 0)
+ return 0;
+
+ do_div(tmp, stripe_unit);
+
+ return do_div(tmp, dss_count);
}
struct nfs4_ff_layout_ds *
@@ -192,9 +215,9 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
void nfs4_ff_layout_put_deviceid(struct nfs4_ff_layout_ds *mirror_ds);
void nfs4_ff_layout_free_deviceid(struct nfs4_ff_layout_ds *mirror_ds);
int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo,
- struct nfs4_ff_layout_mirror *mirror, u64 offset,
- u64 length, int status, enum nfs_opnum4 opnum,
- gfp_t gfp_flags);
+ struct nfs4_ff_layout_mirror *mirror,
+ u32 dss_id, u64 offset, u64 length, int status,
+ enum nfs_opnum4 opnum, gfp_t gfp_flags);
void ff_layout_send_layouterror(struct pnfs_layout_segment *lseg);
int ff_layout_encode_ds_ioerr(struct xdr_stream *xdr, const struct list_head *head);
void ff_layout_free_ds_ioerr(struct list_head *head);
@@ -203,23 +226,27 @@ unsigned int ff_layout_fetch_ds_ioerr(struct pnfs_layout_hdr *lo,
struct list_head *head,
unsigned int maxnum);
struct nfs_fh *
-nfs4_ff_layout_select_ds_fh(struct nfs4_ff_layout_mirror *mirror);
+nfs4_ff_layout_select_ds_fh(struct nfs4_ff_layout_mirror *mirror, u32 dss_id);
void
nfs4_ff_layout_select_ds_stateid(const struct nfs4_ff_layout_mirror *mirror,
- nfs4_stateid *stateid);
+ u32 dss_id,
+ nfs4_stateid *stateid);
struct nfs4_pnfs_ds *
nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg,
struct nfs4_ff_layout_mirror *mirror,
+ u32 dss_id,
bool fail_return);
struct rpc_clnt *
nfs4_ff_find_or_create_ds_client(struct nfs4_ff_layout_mirror *mirror,
struct nfs_client *ds_clp,
- struct inode *inode);
+ struct inode *inode,
+ u32 dss_id);
const struct cred *ff_layout_get_ds_cred(struct nfs4_ff_layout_mirror *mirror,
const struct pnfs_layout_range *range,
- const struct cred *mdscred);
+ const struct cred *mdscred,
+ u32 dss_id);
bool ff_layout_avoid_mds_available_ds(struct pnfs_layout_segment *lseg);
bool ff_layout_avoid_read_on_rw(struct pnfs_layout_segment *lseg);
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index e028f5a0ef5f..c55ea8fa3bfa 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -44,18 +44,19 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
{
struct xdr_stream stream;
struct xdr_buf buf;
- struct page *scratch;
+ struct folio *scratch;
struct list_head dsaddrs;
struct nfs4_pnfs_ds_addr *da;
struct nfs4_ff_layout_ds *new_ds = NULL;
struct nfs4_ff_ds_version *ds_versions = NULL;
+ struct net *net = server->nfs_client->cl_net;
u32 mp_count;
u32 version_count;
__be32 *p;
int i, ret = -ENOMEM;
/* set up xdr stream */
- scratch = alloc_page(gfp_flags);
+ scratch = folio_alloc(gfp_flags, 0);
if (!scratch)
goto out_err;
@@ -69,7 +70,7 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
INIT_LIST_HEAD(&dsaddrs);
xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen);
- xdr_set_scratch_page(&stream, scratch);
+ xdr_set_scratch_folio(&stream, scratch);
/* multipath count */
p = xdr_inline_decode(&stream, 4);
@@ -80,8 +81,7 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
for (i = 0; i < mp_count; i++) {
/* multipath ds */
- da = nfs4_decode_mp_ds_addr(server->nfs_client->cl_net,
- &stream, gfp_flags);
+ da = nfs4_decode_mp_ds_addr(net, &stream, gfp_flags);
if (da)
list_add_tail(&da->da_node, &dsaddrs);
}
@@ -149,7 +149,7 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
new_ds->ds_versions = ds_versions;
new_ds->ds_versions_cnt = version_count;
- new_ds->ds = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags);
+ new_ds->ds = nfs4_pnfs_ds_add(net, &dsaddrs, gfp_flags);
if (!new_ds->ds)
goto out_err_drain_dsaddrs;
@@ -163,7 +163,7 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
kfree(da);
}
- __free_page(scratch);
+ folio_put(scratch);
return new_ds;
out_err_drain_dsaddrs:
@@ -177,7 +177,7 @@ out_err_drain_dsaddrs:
kfree(ds_versions);
out_scratch:
- __free_page(scratch);
+ folio_put(scratch);
out_err:
kfree(new_ds);
@@ -250,16 +250,16 @@ ff_layout_add_ds_error_locked(struct nfs4_flexfile_layout *flo,
}
int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo,
- struct nfs4_ff_layout_mirror *mirror, u64 offset,
- u64 length, int status, enum nfs_opnum4 opnum,
- gfp_t gfp_flags)
+ struct nfs4_ff_layout_mirror *mirror,
+ u32 dss_id, u64 offset, u64 length, int status,
+ enum nfs_opnum4 opnum, gfp_t gfp_flags)
{
struct nfs4_ff_layout_ds_err *dserr;
if (status == 0)
return 0;
- if (IS_ERR_OR_NULL(mirror->mirror_ds))
+ if (IS_ERR_OR_NULL(mirror->dss[dss_id].mirror_ds))
return -EINVAL;
dserr = kmalloc(sizeof(*dserr), gfp_flags);
@@ -271,8 +271,8 @@ int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo,
dserr->length = length;
dserr->status = status;
dserr->opnum = opnum;
- nfs4_stateid_copy(&dserr->stateid, &mirror->stateid);
- memcpy(&dserr->deviceid, &mirror->mirror_ds->id_node.deviceid,
+ nfs4_stateid_copy(&dserr->stateid, &mirror->dss[dss_id].stateid);
+ memcpy(&dserr->deviceid, &mirror->dss[dss_id].mirror_ds->id_node.deviceid,
NFS4_DEVICEID4_SIZE);
spin_lock(&flo->generic_hdr.plh_inode->i_lock);
@@ -282,14 +282,14 @@ int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo,
}
static const struct cred *
-ff_layout_get_mirror_cred(struct nfs4_ff_layout_mirror *mirror, u32 iomode)
+ff_layout_get_mirror_cred(struct nfs4_ff_layout_mirror *mirror, u32 iomode, u32 dss_id)
{
const struct cred *cred, __rcu **pcred;
if (iomode == IOMODE_READ)
- pcred = &mirror->ro_cred;
+ pcred = &mirror->dss[dss_id].ro_cred;
else
- pcred = &mirror->rw_cred;
+ pcred = &mirror->dss[dss_id].rw_cred;
rcu_read_lock();
do {
@@ -304,43 +304,45 @@ ff_layout_get_mirror_cred(struct nfs4_ff_layout_mirror *mirror, u32 iomode)
}
struct nfs_fh *
-nfs4_ff_layout_select_ds_fh(struct nfs4_ff_layout_mirror *mirror)
+nfs4_ff_layout_select_ds_fh(struct nfs4_ff_layout_mirror *mirror, u32 dss_id)
{
/* FIXME: For now assume there is only 1 version available for the DS */
- return &mirror->fh_versions[0];
+ return &mirror->dss[dss_id].fh_versions[0];
}
void
nfs4_ff_layout_select_ds_stateid(const struct nfs4_ff_layout_mirror *mirror,
- nfs4_stateid *stateid)
+ u32 dss_id,
+ nfs4_stateid *stateid)
{
- if (nfs4_ff_layout_ds_version(mirror) == 4)
- nfs4_stateid_copy(stateid, &mirror->stateid);
+ if (nfs4_ff_layout_ds_version(mirror, dss_id) == 4)
+ nfs4_stateid_copy(stateid, &mirror->dss[dss_id].stateid);
}
static bool
ff_layout_init_mirror_ds(struct pnfs_layout_hdr *lo,
- struct nfs4_ff_layout_mirror *mirror)
+ struct nfs4_ff_layout_mirror *mirror,
+ u32 dss_id)
{
if (mirror == NULL)
goto outerr;
- if (mirror->mirror_ds == NULL) {
+ if (mirror->dss[dss_id].mirror_ds == NULL) {
struct nfs4_deviceid_node *node;
struct nfs4_ff_layout_ds *mirror_ds = ERR_PTR(-ENODEV);
node = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode),
- &mirror->devid, lo->plh_lc_cred,
+ &mirror->dss[dss_id].devid, lo->plh_lc_cred,
GFP_KERNEL);
if (node)
mirror_ds = FF_LAYOUT_MIRROR_DS(node);
/* check for race with another call to this function */
- if (cmpxchg(&mirror->mirror_ds, NULL, mirror_ds) &&
+ if (cmpxchg(&mirror->dss[dss_id].mirror_ds, NULL, mirror_ds) &&
mirror_ds != ERR_PTR(-ENODEV))
nfs4_put_deviceid_node(node);
}
- if (IS_ERR(mirror->mirror_ds))
+ if (IS_ERR(mirror->dss[dss_id].mirror_ds))
goto outerr;
return true;
@@ -352,6 +354,7 @@ outerr:
* nfs4_ff_layout_prepare_ds - prepare a DS connection for an RPC call
* @lseg: the layout segment we're operating on
* @mirror: layout mirror describing the DS to use
+ * @dss_id: DS stripe id to select stripe to use
* @fail_return: return layout on connect failure?
*
* Try to prepare a DS connection to accept an RPC call. This involves
@@ -368,18 +371,19 @@ outerr:
struct nfs4_pnfs_ds *
nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg,
struct nfs4_ff_layout_mirror *mirror,
+ u32 dss_id,
bool fail_return)
{
- struct nfs4_pnfs_ds *ds = NULL;
+ struct nfs4_pnfs_ds *ds;
struct inode *ino = lseg->pls_layout->plh_inode;
struct nfs_server *s = NFS_SERVER(ino);
unsigned int max_payload;
- int status;
+ int status = -EAGAIN;
- if (!ff_layout_init_mirror_ds(lseg->pls_layout, mirror))
+ if (!ff_layout_init_mirror_ds(lseg->pls_layout, mirror, dss_id))
goto noconnect;
- ds = mirror->mirror_ds->ds;
+ ds = mirror->dss[dss_id].mirror_ds->ds;
if (READ_ONCE(ds->ds_clp))
goto out;
/* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */
@@ -388,31 +392,37 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg,
/* FIXME: For now we assume the server sent only one version of NFS
* to use for the DS.
*/
- status = nfs4_pnfs_ds_connect(s, ds, &mirror->mirror_ds->id_node,
+ status = nfs4_pnfs_ds_connect(s, ds, &mirror->dss[dss_id].mirror_ds->id_node,
dataserver_timeo, dataserver_retrans,
- mirror->mirror_ds->ds_versions[0].version,
- mirror->mirror_ds->ds_versions[0].minor_version);
+ mirror->dss[dss_id].mirror_ds->ds_versions[0].version,
+ mirror->dss[dss_id].mirror_ds->ds_versions[0].minor_version);
/* connect success, check rsize/wsize limit */
if (!status) {
+ /*
+ * ds_clp is put in destroy_ds().
+ * keep ds_clp even if DS is local, so that if local IO cannot
+ * proceed somehow, we can fall back to NFS whenever we want.
+ */
+ nfs_local_probe_async(ds->ds_clp);
max_payload =
nfs_block_size(rpc_max_payload(ds->ds_clp->cl_rpcclient),
NULL);
- if (mirror->mirror_ds->ds_versions[0].rsize > max_payload)
- mirror->mirror_ds->ds_versions[0].rsize = max_payload;
- if (mirror->mirror_ds->ds_versions[0].wsize > max_payload)
- mirror->mirror_ds->ds_versions[0].wsize = max_payload;
+ if (mirror->dss[dss_id].mirror_ds->ds_versions[0].rsize > max_payload)
+ mirror->dss[dss_id].mirror_ds->ds_versions[0].rsize = max_payload;
+ if (mirror->dss[dss_id].mirror_ds->ds_versions[0].wsize > max_payload)
+ mirror->dss[dss_id].mirror_ds->ds_versions[0].wsize = max_payload;
goto out;
}
noconnect:
ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout),
- mirror, lseg->pls_range.offset,
+ mirror, dss_id, lseg->pls_range.offset,
lseg->pls_range.length, NFS4ERR_NXIO,
OP_ILLEGAL, GFP_NOIO);
ff_layout_send_layouterror(lseg);
if (fail_return || !ff_layout_has_available_ds(lseg))
pnfs_error_mark_layout_for_return(ino, lseg);
- ds = NULL;
+ ds = ERR_PTR(status);
out:
return ds;
}
@@ -420,12 +430,13 @@ out:
const struct cred *
ff_layout_get_ds_cred(struct nfs4_ff_layout_mirror *mirror,
const struct pnfs_layout_range *range,
- const struct cred *mdscred)
+ const struct cred *mdscred,
+ u32 dss_id)
{
const struct cred *cred;
- if (mirror && !mirror->mirror_ds->ds_versions[0].tightly_coupled) {
- cred = ff_layout_get_mirror_cred(mirror, range->iomode);
+ if (mirror && !mirror->dss[dss_id].mirror_ds->ds_versions[0].tightly_coupled) {
+ cred = ff_layout_get_mirror_cred(mirror, range->iomode, dss_id);
if (!cred)
cred = get_cred(mdscred);
} else {
@@ -439,15 +450,17 @@ ff_layout_get_ds_cred(struct nfs4_ff_layout_mirror *mirror,
* @mirror: pointer to the mirror
* @ds_clp: nfs_client for the DS
* @inode: pointer to inode
+ * @dss_id: DS stripe id
*
* Find or create a DS rpc client with th MDS server rpc client auth flavor
* in the nfs_client cl_ds_clients list.
*/
struct rpc_clnt *
nfs4_ff_find_or_create_ds_client(struct nfs4_ff_layout_mirror *mirror,
- struct nfs_client *ds_clp, struct inode *inode)
+ struct nfs_client *ds_clp, struct inode *inode,
+ u32 dss_id)
{
- switch (mirror->mirror_ds->ds_versions[0].version) {
+ switch (mirror->dss[dss_id].mirror_ds->ds_versions[0].version) {
case 3:
/* For NFSv3 DS, flavor is set when creating DS connections */
return ds_clp->cl_rpcclient;
@@ -553,16 +566,18 @@ static bool ff_read_layout_has_available_ds(struct pnfs_layout_segment *lseg)
{
struct nfs4_ff_layout_mirror *mirror;
struct nfs4_deviceid_node *devid;
- u32 idx;
+ u32 idx, dss_id;
for (idx = 0; idx < FF_LAYOUT_MIRROR_COUNT(lseg); idx++) {
mirror = FF_LAYOUT_COMP(lseg, idx);
- if (mirror) {
- if (!mirror->mirror_ds)
+ if (!mirror)
+ continue;
+ for (dss_id = 0; dss_id < mirror->dss_count; dss_id++) {
+ if (!mirror->dss[dss_id].mirror_ds)
return true;
- if (IS_ERR(mirror->mirror_ds))
+ if (IS_ERR(mirror->dss[dss_id].mirror_ds))
continue;
- devid = &mirror->mirror_ds->id_node;
+ devid = &mirror->dss[dss_id].mirror_ds->id_node;
if (!nfs4_test_deviceid_unavailable(devid))
return true;
}
@@ -575,17 +590,21 @@ static bool ff_rw_layout_has_available_ds(struct pnfs_layout_segment *lseg)
{
struct nfs4_ff_layout_mirror *mirror;
struct nfs4_deviceid_node *devid;
- u32 idx;
+ u32 idx, dss_id;
for (idx = 0; idx < FF_LAYOUT_MIRROR_COUNT(lseg); idx++) {
mirror = FF_LAYOUT_COMP(lseg, idx);
- if (!mirror || IS_ERR(mirror->mirror_ds))
- return false;
- if (!mirror->mirror_ds)
- continue;
- devid = &mirror->mirror_ds->id_node;
- if (nfs4_test_deviceid_unavailable(devid))
+ if (!mirror)
return false;
+ for (dss_id = 0; dss_id < mirror->dss_count; dss_id++) {
+ if (IS_ERR(mirror->dss[dss_id].mirror_ds))
+ return false;
+ if (!mirror->dss[dss_id].mirror_ds)
+ continue;
+ devid = &mirror->dss[dss_id].mirror_ds->id_node;
+ if (nfs4_test_deviceid_unavailable(devid))
+ return false;
+ }
}
return FF_LAYOUT_MIRROR_COUNT(lseg) != 0;
diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
index 9bcd53d5c7d4..b4679b7161b0 100644
--- a/fs/nfs/fs_context.c
+++ b/fs/nfs/fs_context.c
@@ -18,6 +18,9 @@
#include <linux/nfs_fs.h>
#include <linux/nfs_mount.h>
#include <linux/nfs4_mount.h>
+
+#include <net/handshake.h>
+
#include "nfs.h"
#include "internal.h"
@@ -46,6 +49,8 @@ enum nfs_param {
Opt_bsize,
Opt_clientaddr,
Opt_cto,
+ Opt_alignwrite,
+ Opt_fatal_neterrors,
Opt_fg,
Opt_fscache,
Opt_fscache_flag,
@@ -68,6 +73,8 @@ enum nfs_param {
Opt_posix,
Opt_proto,
Opt_rdirplus,
+ Opt_rdirplus_none,
+ Opt_rdirplus_force,
Opt_rdma,
Opt_resvport,
Opt_retrans,
@@ -88,6 +95,23 @@ enum nfs_param {
Opt_vers,
Opt_wsize,
Opt_write,
+ Opt_xprtsec,
+ Opt_cert_serial,
+ Opt_privkey_serial,
+};
+
+enum {
+ Opt_fatal_neterrors_default,
+ Opt_fatal_neterrors_enetunreach,
+ Opt_fatal_neterrors_none,
+};
+
+static const struct constant_table nfs_param_enums_fatal_neterrors[] = {
+ { "default", Opt_fatal_neterrors_default },
+ { "ENETDOWN:ENETUNREACH", Opt_fatal_neterrors_enetunreach },
+ { "ENETUNREACH:ENETDOWN", Opt_fatal_neterrors_enetunreach },
+ { "none", Opt_fatal_neterrors_none },
+ {}
};
enum {
@@ -145,6 +169,9 @@ static const struct fs_parameter_spec nfs_fs_parameters[] = {
fsparam_u32 ("bsize", Opt_bsize),
fsparam_string("clientaddr", Opt_clientaddr),
fsparam_flag_no("cto", Opt_cto),
+ fsparam_flag_no("alignwrite", Opt_alignwrite),
+ fsparam_enum("fatal_neterrors", Opt_fatal_neterrors,
+ nfs_param_enums_fatal_neterrors),
fsparam_flag ("fg", Opt_fg),
fsparam_flag_no("fsc", Opt_fscache_flag),
fsparam_string("fsc", Opt_fscache),
@@ -168,7 +195,8 @@ static const struct fs_parameter_spec nfs_fs_parameters[] = {
fsparam_u32 ("port", Opt_port),
fsparam_flag_no("posix", Opt_posix),
fsparam_string("proto", Opt_proto),
- fsparam_flag_no("rdirplus", Opt_rdirplus),
+ fsparam_flag_no("rdirplus", Opt_rdirplus), // rdirplus|nordirplus
+ fsparam_string("rdirplus", Opt_rdirplus), // rdirplus=...
fsparam_flag ("rdma", Opt_rdma),
fsparam_flag_no("resvport", Opt_resvport),
fsparam_u32 ("retrans", Opt_retrans),
@@ -194,6 +222,9 @@ static const struct fs_parameter_spec nfs_fs_parameters[] = {
fsparam_string("vers", Opt_vers),
fsparam_enum ("write", Opt_write, nfs_param_enums_write),
fsparam_u32 ("wsize", Opt_wsize),
+ fsparam_string("xprtsec", Opt_xprtsec),
+ fsparam_s32("cert_serial", Opt_cert_serial),
+ fsparam_s32("privkey_serial", Opt_privkey_serial),
{}
};
@@ -267,6 +298,26 @@ static const struct constant_table nfs_secflavor_tokens[] = {
{}
};
+enum {
+ Opt_xprtsec_none,
+ Opt_xprtsec_tls,
+ Opt_xprtsec_mtls,
+ nr__Opt_xprtsec
+};
+
+static const struct constant_table nfs_xprtsec_policies[] = {
+ { "none", Opt_xprtsec_none },
+ { "tls", Opt_xprtsec_tls },
+ { "mtls", Opt_xprtsec_mtls },
+ {}
+};
+
+static const struct constant_table nfs_rdirplus_tokens[] = {
+ { "none", Opt_rdirplus_none },
+ { "force", Opt_rdirplus_force },
+ {}
+};
+
/*
* Sanity-check a server address provided by the mount command.
*
@@ -320,9 +371,21 @@ static int nfs_validate_transport_protocol(struct fs_context *fc,
default:
ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP;
}
+
+ if (ctx->xprtsec.policy != RPC_XPRTSEC_NONE)
+ switch (ctx->nfs_server.protocol) {
+ case XPRT_TRANSPORT_TCP:
+ ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP_TLS;
+ break;
+ default:
+ goto out_invalid_xprtsec_policy;
+ }
+
return 0;
out_invalid_transport_udp:
return nfs_invalf(fc, "NFS: Unsupported transport protocol udp");
+out_invalid_xprtsec_policy:
+ return nfs_invalf(fc, "NFS: Transport does not support xprtsec");
}
/*
@@ -430,6 +493,29 @@ static int nfs_parse_security_flavors(struct fs_context *fc,
return 0;
}
+static int nfs_parse_xprtsec_policy(struct fs_context *fc,
+ struct fs_parameter *param)
+{
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
+
+ trace_nfs_mount_assign(param->key, param->string);
+
+ switch (lookup_constant(nfs_xprtsec_policies, param->string, -1)) {
+ case Opt_xprtsec_none:
+ ctx->xprtsec.policy = RPC_XPRTSEC_NONE;
+ break;
+ case Opt_xprtsec_tls:
+ ctx->xprtsec.policy = RPC_XPRTSEC_TLS_ANON;
+ break;
+ case Opt_xprtsec_mtls:
+ ctx->xprtsec.policy = RPC_XPRTSEC_TLS_X509;
+ break;
+ default:
+ return nfs_invalf(fc, "NFS: Unrecognized transport security policy");
+ }
+ return 0;
+}
+
static int nfs_parse_version_string(struct fs_context *fc,
const char *string)
{
@@ -469,6 +555,32 @@ static int nfs_parse_version_string(struct fs_context *fc,
return 0;
}
+#ifdef CONFIG_KEYS
+static int nfs_tls_key_verify(key_serial_t key_id)
+{
+ struct key *key = key_lookup(key_id);
+ int error = 0;
+
+ if (IS_ERR(key)) {
+ pr_err("key id %08x not found\n", key_id);
+ return PTR_ERR(key);
+ }
+ if (test_bit(KEY_FLAG_REVOKED, &key->flags) ||
+ test_bit(KEY_FLAG_INVALIDATED, &key->flags)) {
+ pr_err("key id %08x revoked\n", key_id);
+ error = -EKEYREVOKED;
+ }
+
+ key_put(key);
+ return error;
+}
+#else
+static inline int nfs_tls_key_verify(key_serial_t key_id)
+{
+ return -ENOENT;
+}
+#endif /* CONFIG_KEYS */
+
/*
* Parse a single mount parameter.
*/
@@ -538,6 +650,12 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
else
ctx->flags |= NFS_MOUNT_TRUNK_DISCOVERY;
break;
+ case Opt_alignwrite:
+ if (result.negated)
+ ctx->flags |= NFS_MOUNT_NO_ALIGNWRITE;
+ else
+ ctx->flags &= ~NFS_MOUNT_NO_ALIGNWRITE;
+ break;
case Opt_ac:
if (result.negated)
ctx->flags |= NFS_MOUNT_NOAC;
@@ -546,9 +664,11 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
break;
case Opt_lock:
if (result.negated) {
+ ctx->lock_status = NFS_LOCK_NOLOCK;
ctx->flags |= NFS_MOUNT_NONLM;
ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL);
} else {
+ ctx->lock_status = NFS_LOCK_LOCK;
ctx->flags &= ~NFS_MOUNT_NONLM;
ctx->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL);
}
@@ -572,10 +692,25 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
ctx->flags &= ~NFS_MOUNT_NOACL;
break;
case Opt_rdirplus:
- if (result.negated)
+ if (result.negated) {
+ ctx->flags &= ~NFS_MOUNT_FORCE_RDIRPLUS;
ctx->flags |= NFS_MOUNT_NORDIRPLUS;
- else
- ctx->flags &= ~NFS_MOUNT_NORDIRPLUS;
+ } else if (!param->string) {
+ ctx->flags &= ~(NFS_MOUNT_NORDIRPLUS | NFS_MOUNT_FORCE_RDIRPLUS);
+ } else {
+ switch (lookup_constant(nfs_rdirplus_tokens, param->string, -1)) {
+ case Opt_rdirplus_none:
+ ctx->flags &= ~NFS_MOUNT_FORCE_RDIRPLUS;
+ ctx->flags |= NFS_MOUNT_NORDIRPLUS;
+ break;
+ case Opt_rdirplus_force:
+ ctx->flags &= ~NFS_MOUNT_NORDIRPLUS;
+ ctx->flags |= NFS_MOUNT_FORCE_RDIRPLUS;
+ break;
+ default:
+ goto out_invalid_value;
+ }
+ }
break;
case Opt_sharecache:
if (result.negated)
@@ -598,6 +733,7 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
ctx->fscache_uniq = NULL;
break;
case Opt_fscache:
+ trace_nfs_mount_assign(param->key, param->string);
ctx->options |= NFS_OPTION_FSCACHE;
kfree(ctx->fscache_uniq);
ctx->fscache_uniq = param->string;
@@ -696,6 +832,23 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
if (ret < 0)
return ret;
break;
+ case Opt_xprtsec:
+ ret = nfs_parse_xprtsec_policy(fc, param);
+ if (ret < 0)
+ return ret;
+ break;
+ case Opt_cert_serial:
+ ret = nfs_tls_key_verify(result.int_32);
+ if (ret < 0)
+ return ret;
+ ctx->xprtsec.cert_serial = result.int_32;
+ break;
+ case Opt_privkey_serial:
+ ret = nfs_tls_key_verify(result.int_32);
+ if (ret < 0)
+ return ret;
+ ctx->xprtsec.privkey_serial = result.int_32;
+ break;
case Opt_proto:
if (!param->string)
@@ -791,16 +944,38 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
ctx->mount_server.addrlen = len;
break;
case Opt_nconnect:
+ trace_nfs_mount_assign(param->key, param->string);
if (result.uint_32 < 1 || result.uint_32 > NFS_MAX_CONNECTIONS)
goto out_of_bounds;
ctx->nfs_server.nconnect = result.uint_32;
break;
case Opt_max_connect:
+ trace_nfs_mount_assign(param->key, param->string);
if (result.uint_32 < 1 || result.uint_32 > NFS_MAX_TRANSPORTS)
goto out_of_bounds;
ctx->nfs_server.max_connect = result.uint_32;
break;
+ case Opt_fatal_neterrors:
+ trace_nfs_mount_assign(param->key, param->string);
+ switch (result.uint_32) {
+ case Opt_fatal_neterrors_default:
+ if (fc->net_ns != &init_net)
+ ctx->flags |= NFS_MOUNT_NETUNREACH_FATAL;
+ else
+ ctx->flags &= ~NFS_MOUNT_NETUNREACH_FATAL;
+ break;
+ case Opt_fatal_neterrors_enetunreach:
+ ctx->flags |= NFS_MOUNT_NETUNREACH_FATAL;
+ break;
+ case Opt_fatal_neterrors_none:
+ ctx->flags &= ~NFS_MOUNT_NETUNREACH_FATAL;
+ break;
+ default:
+ goto out_invalid_value;
+ }
+ break;
case Opt_lookupcache:
+ trace_nfs_mount_assign(param->key, param->string);
switch (result.uint_32) {
case Opt_lookupcache_all:
ctx->flags &= ~(NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE);
@@ -817,6 +992,7 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
}
break;
case Opt_local_lock:
+ trace_nfs_mount_assign(param->key, param->string);
switch (result.uint_32) {
case Opt_local_lock_all:
ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK |
@@ -837,6 +1013,7 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
}
break;
case Opt_write:
+ trace_nfs_mount_assign(param->key, param->string);
switch (result.uint_32) {
case Opt_write_lazy:
ctx->flags &=
@@ -1047,9 +1224,12 @@ static int nfs23_parse_monolithic(struct fs_context *fc,
ctx->acdirmax = data->acdirmax;
ctx->need_mount = false;
- memcpy(sap, &data->addr, sizeof(data->addr));
- ctx->nfs_server.addrlen = sizeof(data->addr);
- ctx->nfs_server.port = ntohs(data->addr.sin_port);
+ if (!is_remount_fc(fc)) {
+ memcpy(sap, &data->addr, sizeof(data->addr));
+ ctx->nfs_server.addrlen = sizeof(data->addr);
+ ctx->nfs_server.port = ntohs(data->addr.sin_port);
+ }
+
if (sap->ss_family != AF_INET ||
!nfs_verify_server_address(sap))
goto out_no_address;
@@ -1089,8 +1269,7 @@ static int nfs23_parse_monolithic(struct fs_context *fc,
int ret;
data->context[NFS_MAX_CONTEXT_LEN] = '\0';
- ret = vfs_parse_fs_string(fc, "context",
- data->context, strlen(data->context));
+ ret = vfs_parse_fs_string(fc, "context", data->context);
if (ret < 0)
return ret;
#else
@@ -1389,7 +1568,7 @@ static int nfs_fs_context_validate(struct fs_context *fc)
/* Load the NFS protocol module if we haven't done so yet */
if (!ctx->nfs_mod) {
- nfs_mod = get_nfs_version(ctx->version);
+ nfs_mod = find_nfs_version(ctx->version);
if (IS_ERR(nfs_mod)) {
ret = PTR_ERR(nfs_mod);
goto out_version_unavailable;
@@ -1463,7 +1642,7 @@ static int nfs_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc)
}
nfs_copy_fh(ctx->mntfh, src->mntfh);
- __module_get(ctx->nfs_mod->owner);
+ get_nfs_version(ctx->nfs_mod);
ctx->client_address = NULL;
ctx->mount_server.hostname = NULL;
ctx->nfs_server.export_path = NULL;
@@ -1555,7 +1734,7 @@ static int nfs_init_fs_context(struct fs_context *fc)
}
ctx->nfs_mod = nfss->nfs_client->cl_nfs_mod;
- __module_get(ctx->nfs_mod->owner);
+ get_nfs_version(ctx->nfs_mod);
} else {
/* defaults */
ctx->timeo = NFS_UNSPEC_TIMEO;
@@ -1569,6 +1748,12 @@ static int nfs_init_fs_context(struct fs_context *fc)
ctx->selected_flavor = RPC_AUTH_MAXFLAVOR;
ctx->minorversion = 0;
ctx->need_mount = true;
+ ctx->xprtsec.policy = RPC_XPRTSEC_NONE;
+ ctx->xprtsec.cert_serial = TLS_NO_CERT;
+ ctx->xprtsec.privkey_serial = TLS_NO_PRIVKEY;
+
+ if (fc->net_ns != &init_net)
+ ctx->flags |= NFS_MOUNT_NETUNREACH_FATAL;
fc->s_iflags |= SB_I_STABLE_WRITES;
}
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index e731c00a9fcb..8b0785178731 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -15,6 +15,9 @@
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/iversion.h>
+#include <linux/xarray.h>
+#include <linux/fscache.h>
+#include <linux/netfs.h>
#include "internal.h"
#include "iostat.h"
@@ -163,19 +166,23 @@ void nfs_fscache_init_inode(struct inode *inode)
struct nfs_server *nfss = NFS_SERVER(inode);
struct nfs_inode *nfsi = NFS_I(inode);
- nfsi->fscache = NULL;
+ netfs_inode(inode)->cache = NULL;
if (!(nfss->fscache && S_ISREG(inode->i_mode)))
return;
nfs_fscache_update_auxdata(&auxdata, inode);
- nfsi->fscache = fscache_acquire_cookie(NFS_SB(inode->i_sb)->fscache,
+ netfs_inode(inode)->cache = fscache_acquire_cookie(
+ nfss->fscache,
0,
nfsi->fh.data, /* index_key */
nfsi->fh.size,
&auxdata, /* aux_data */
sizeof(auxdata),
i_size_read(inode));
+
+ if (netfs_inode(inode)->cache)
+ mapping_set_release_always(inode->i_mapping);
}
/*
@@ -183,11 +190,8 @@ void nfs_fscache_init_inode(struct inode *inode)
*/
void nfs_fscache_clear_inode(struct inode *inode)
{
- struct nfs_inode *nfsi = NFS_I(inode);
- struct fscache_cookie *cookie = nfs_i_fscache(inode);
-
- fscache_relinquish_cookie(cookie, false);
- nfsi->fscache = NULL;
+ fscache_relinquish_cookie(netfs_i_cookie(netfs_inode(inode)), false);
+ netfs_inode(inode)->cache = NULL;
}
/*
@@ -212,7 +216,7 @@ void nfs_fscache_clear_inode(struct inode *inode)
void nfs_fscache_open_file(struct inode *inode, struct file *filp)
{
struct nfs_fscache_inode_auxdata auxdata;
- struct fscache_cookie *cookie = nfs_i_fscache(inode);
+ struct fscache_cookie *cookie = netfs_i_cookie(netfs_inode(inode));
bool open_for_write = inode_is_open_for_write(inode);
if (!fscache_cookie_valid(cookie))
@@ -230,119 +234,154 @@ EXPORT_SYMBOL_GPL(nfs_fscache_open_file);
void nfs_fscache_release_file(struct inode *inode, struct file *filp)
{
struct nfs_fscache_inode_auxdata auxdata;
- struct fscache_cookie *cookie = nfs_i_fscache(inode);
+ struct fscache_cookie *cookie = netfs_i_cookie(netfs_inode(inode));
loff_t i_size = i_size_read(inode);
nfs_fscache_update_auxdata(&auxdata, inode);
fscache_unuse_cookie(cookie, &auxdata, &i_size);
}
-/*
- * Fallback page reading interface.
- */
-static int fscache_fallback_read_page(struct inode *inode, struct page *page)
+int nfs_netfs_read_folio(struct file *file, struct folio *folio)
{
- struct netfs_cache_resources cres;
- struct fscache_cookie *cookie = nfs_i_fscache(inode);
- struct iov_iter iter;
- struct bio_vec bvec[1];
- int ret;
-
- memset(&cres, 0, sizeof(cres));
- bvec[0].bv_page = page;
- bvec[0].bv_offset = 0;
- bvec[0].bv_len = PAGE_SIZE;
- iov_iter_bvec(&iter, ITER_DEST, bvec, ARRAY_SIZE(bvec), PAGE_SIZE);
-
- ret = fscache_begin_read_operation(&cres, cookie);
- if (ret < 0)
- return ret;
-
- ret = fscache_read(&cres, page_offset(page), &iter, NETFS_READ_HOLE_FAIL,
- NULL, NULL);
- fscache_end_operation(&cres);
- return ret;
+ if (!netfs_inode(folio_inode(folio))->cache)
+ return -ENOBUFS;
+
+ return netfs_read_folio(file, folio);
}
-/*
- * Fallback page writing interface.
- */
-static int fscache_fallback_write_page(struct inode *inode, struct page *page,
- bool no_space_allocated_yet)
+int nfs_netfs_readahead(struct readahead_control *ractl)
{
- struct netfs_cache_resources cres;
- struct fscache_cookie *cookie = nfs_i_fscache(inode);
- struct iov_iter iter;
- struct bio_vec bvec[1];
- loff_t start = page_offset(page);
- size_t len = PAGE_SIZE;
- int ret;
-
- memset(&cres, 0, sizeof(cres));
- bvec[0].bv_page = page;
- bvec[0].bv_offset = 0;
- bvec[0].bv_len = PAGE_SIZE;
- iov_iter_bvec(&iter, ITER_SOURCE, bvec, ARRAY_SIZE(bvec), PAGE_SIZE);
-
- ret = fscache_begin_write_operation(&cres, cookie);
- if (ret < 0)
- return ret;
-
- ret = cres.ops->prepare_write(&cres, &start, &len, i_size_read(inode),
- no_space_allocated_yet);
- if (ret == 0)
- ret = fscache_write(&cres, page_offset(page), &iter, NULL, NULL);
- fscache_end_operation(&cres);
- return ret;
+ struct inode *inode = ractl->mapping->host;
+
+ if (!netfs_inode(inode)->cache)
+ return -ENOBUFS;
+
+ netfs_readahead(ractl);
+ return 0;
}
-/*
- * Retrieve a page from fscache
- */
-int __nfs_fscache_read_page(struct inode *inode, struct page *page)
+static atomic_t nfs_netfs_debug_id;
+static int nfs_netfs_init_request(struct netfs_io_request *rreq, struct file *file)
{
- int ret;
-
- trace_nfs_fscache_read_page(inode, page);
- if (PageChecked(page)) {
- ClearPageChecked(page);
- ret = 1;
- goto out;
+ if (!file) {
+ if (WARN_ON_ONCE(rreq->origin != NETFS_PGPRIV2_COPY_TO_CACHE))
+ return -EIO;
+ return 0;
}
- ret = fscache_fallback_read_page(inode, page);
- if (ret < 0) {
- nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL);
- SetPageChecked(page);
- goto out;
+ rreq->netfs_priv = get_nfs_open_context(nfs_file_open_context(file));
+ rreq->debug_id = atomic_inc_return(&nfs_netfs_debug_id);
+ /* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */
+ __set_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags);
+ rreq->io_streams[0].sreq_max_len = NFS_SB(rreq->inode->i_sb)->rsize;
+
+ return 0;
+}
+
+static void nfs_netfs_free_request(struct netfs_io_request *rreq)
+{
+ if (rreq->netfs_priv)
+ put_nfs_open_context(rreq->netfs_priv);
+}
+
+static struct nfs_netfs_io_data *nfs_netfs_alloc(struct netfs_io_subrequest *sreq)
+{
+ struct nfs_netfs_io_data *netfs;
+
+ netfs = kzalloc(sizeof(*netfs), GFP_KERNEL_ACCOUNT);
+ if (!netfs)
+ return NULL;
+ netfs->sreq = sreq;
+ refcount_set(&netfs->refcount, 1);
+ return netfs;
+}
+
+static void nfs_netfs_issue_read(struct netfs_io_subrequest *sreq)
+{
+ struct nfs_netfs_io_data *netfs;
+ struct nfs_pageio_descriptor pgio;
+ struct inode *inode = sreq->rreq->inode;
+ struct nfs_open_context *ctx = sreq->rreq->netfs_priv;
+ struct page *page;
+ unsigned long idx;
+ pgoff_t start, last;
+ int err;
+
+ start = (sreq->start + sreq->transferred) >> PAGE_SHIFT;
+ last = ((sreq->start + sreq->len - sreq->transferred - 1) >> PAGE_SHIFT);
+
+ nfs_pageio_init_read(&pgio, inode, false,
+ &nfs_async_read_completion_ops);
+
+ netfs = nfs_netfs_alloc(sreq);
+ if (!netfs) {
+ sreq->error = -ENOMEM;
+ return netfs_read_subreq_terminated(sreq);
}
- /* Read completed synchronously */
- nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_OK);
- SetPageUptodate(page);
- ret = 0;
+ pgio.pg_netfs = netfs; /* used in completion */
+
+ xa_for_each_range(&sreq->rreq->mapping->i_pages, idx, page, start, last) {
+ /* nfs_read_add_folio() may schedule() due to pNFS layout and other RPCs */
+ err = nfs_read_add_folio(&pgio, ctx, page_folio(page));
+ if (err < 0) {
+ netfs->error = err;
+ goto out;
+ }
+ }
out:
- trace_nfs_fscache_read_page_exit(inode, page, ret);
- return ret;
+ nfs_pageio_complete_read(&pgio);
+ nfs_netfs_put(netfs);
}
-/*
- * Store a newly fetched page in fscache. We can be certain there's no page
- * stored in the cache as yet otherwise we would've read it from there.
- */
-void __nfs_fscache_write_page(struct inode *inode, struct page *page)
+void nfs_netfs_initiate_read(struct nfs_pgio_header *hdr)
{
- int ret;
+ struct nfs_netfs_io_data *netfs = hdr->netfs;
- trace_nfs_fscache_write_page(inode, page);
+ if (!netfs)
+ return;
- ret = fscache_fallback_write_page(inode, page, true);
+ nfs_netfs_get(netfs);
+}
- if (ret != 0) {
- nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_WRITTEN_FAIL);
- nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_UNCACHED);
- } else {
- nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_WRITTEN_OK);
- }
- trace_nfs_fscache_write_page_exit(inode, page, ret);
+int nfs_netfs_folio_unlock(struct folio *folio)
+{
+ struct inode *inode = folio->mapping->host;
+
+ /*
+ * If fscache is enabled, netfs will unlock pages.
+ */
+ if (netfs_inode(inode)->cache)
+ return 0;
+
+ return 1;
+}
+
+void nfs_netfs_read_completion(struct nfs_pgio_header *hdr)
+{
+ struct nfs_netfs_io_data *netfs = hdr->netfs;
+ struct netfs_io_subrequest *sreq;
+
+ if (!netfs)
+ return;
+
+ sreq = netfs->sreq;
+ if (test_bit(NFS_IOHDR_EOF, &hdr->flags) &&
+ sreq->rreq->origin != NETFS_UNBUFFERED_READ &&
+ sreq->rreq->origin != NETFS_DIO_READ)
+ __set_bit(NETFS_SREQ_CLEAR_TAIL, &sreq->flags);
+
+ if (hdr->error)
+ netfs->error = hdr->error;
+ else
+ atomic64_add(hdr->res.count, &netfs->transferred);
+
+ nfs_netfs_put(netfs);
+ hdr->netfs = NULL;
}
+
+const struct netfs_request_ops nfs_netfs_ops = {
+ .init_request = nfs_netfs_init_request,
+ .free_request = nfs_netfs_free_request,
+ .issue_read = nfs_netfs_issue_read,
+};
diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h
index 2a37af880978..9d86868f4998 100644
--- a/fs/nfs/fscache.h
+++ b/fs/nfs/fscache.h
@@ -34,6 +34,58 @@ struct nfs_fscache_inode_auxdata {
u64 change_attr;
};
+struct nfs_netfs_io_data {
+ /*
+ * NFS may split a netfs_io_subrequest into multiple RPCs, each
+ * with their own read completion. In netfs, we can only call
+ * netfs_subreq_terminated() once for each subrequest. Use the
+ * refcount here to double as a marker of the last RPC completion,
+ * and only call netfs via netfs_subreq_terminated() once.
+ */
+ refcount_t refcount;
+ struct netfs_io_subrequest *sreq;
+
+ /*
+ * Final disposition of the netfs_io_subrequest, sent in
+ * netfs_subreq_terminated()
+ */
+ atomic64_t transferred;
+ int error;
+};
+
+static inline void nfs_netfs_get(struct nfs_netfs_io_data *netfs)
+{
+ refcount_inc(&netfs->refcount);
+}
+
+static inline void nfs_netfs_put(struct nfs_netfs_io_data *netfs)
+{
+ /* Only the last RPC completion should call netfs_subreq_terminated() */
+ if (!refcount_dec_and_test(&netfs->refcount))
+ return;
+
+ /*
+ * The NFS pageio interface may read a complete page, even when netfs
+ * only asked for a partial page. Specifically, this may be seen when
+ * one thread is truncating a file while another one is reading the last
+ * page of the file.
+ * Correct the final length here to be no larger than the netfs subrequest
+ * length, and thus avoid netfs's "Subreq overread" warning message.
+ */
+ netfs->sreq->transferred = min_t(s64, netfs->sreq->len,
+ atomic64_read(&netfs->transferred));
+ netfs->sreq->error = netfs->error;
+ netfs_read_subreq_terminated(netfs->sreq);
+ kfree(netfs);
+}
+static inline void nfs_netfs_inode_init(struct nfs_inode *nfsi)
+{
+ netfs_inode_init(&nfsi->netfs, &nfs_netfs_ops, false);
+}
+extern void nfs_netfs_initiate_read(struct nfs_pgio_header *hdr);
+extern void nfs_netfs_read_completion(struct nfs_pgio_header *hdr);
+extern int nfs_netfs_folio_unlock(struct folio *folio);
+
/*
* fscache.c
*/
@@ -44,52 +96,28 @@ extern void nfs_fscache_init_inode(struct inode *);
extern void nfs_fscache_clear_inode(struct inode *);
extern void nfs_fscache_open_file(struct inode *, struct file *);
extern void nfs_fscache_release_file(struct inode *, struct file *);
-
-extern int __nfs_fscache_read_page(struct inode *, struct page *);
-extern void __nfs_fscache_write_page(struct inode *, struct page *);
+extern int nfs_netfs_readahead(struct readahead_control *ractl);
+extern int nfs_netfs_read_folio(struct file *file, struct folio *folio);
static inline bool nfs_fscache_release_folio(struct folio *folio, gfp_t gfp)
{
- if (folio_test_fscache(folio)) {
+ if (folio_test_private_2(folio)) { /* [DEPRECATED] */
if (current_is_kswapd() || !(gfp & __GFP_FS))
return false;
- folio_wait_fscache(folio);
- fscache_note_page_release(nfs_i_fscache(folio->mapping->host));
- nfs_inc_fscache_stats(folio->mapping->host,
- NFSIOS_FSCACHE_PAGES_UNCACHED);
+ folio_wait_private_2(folio);
}
+ fscache_note_page_release(netfs_i_cookie(netfs_inode(folio->mapping->host)));
return true;
}
-/*
- * Retrieve a page from an inode data storage object.
- */
-static inline int nfs_fscache_read_page(struct inode *inode, struct page *page)
-{
- if (nfs_i_fscache(inode))
- return __nfs_fscache_read_page(inode, page);
- return -ENOBUFS;
-}
-
-/*
- * Store a page newly fetched from the server in an inode data storage object
- * in the cache.
- */
-static inline void nfs_fscache_write_page(struct inode *inode,
- struct page *page)
-{
- if (nfs_i_fscache(inode))
- __nfs_fscache_write_page(inode, page);
-}
-
static inline void nfs_fscache_update_auxdata(struct nfs_fscache_inode_auxdata *auxdata,
struct inode *inode)
{
memset(auxdata, 0, sizeof(*auxdata));
- auxdata->mtime_sec = inode->i_mtime.tv_sec;
- auxdata->mtime_nsec = inode->i_mtime.tv_nsec;
- auxdata->ctime_sec = inode->i_ctime.tv_sec;
- auxdata->ctime_nsec = inode->i_ctime.tv_nsec;
+ auxdata->mtime_sec = inode_get_mtime(inode).tv_sec;
+ auxdata->mtime_nsec = inode_get_mtime(inode).tv_nsec;
+ auxdata->ctime_sec = inode_get_ctime(inode).tv_sec;
+ auxdata->ctime_nsec = inode_get_ctime(inode).tv_nsec;
if (NFS_SERVER(inode)->nfs_client->rpc_ops->version == 4)
auxdata->change_attr = inode_peek_iversion_raw(inode);
@@ -101,13 +129,10 @@ static inline void nfs_fscache_update_auxdata(struct nfs_fscache_inode_auxdata *
static inline void nfs_fscache_invalidate(struct inode *inode, int flags)
{
struct nfs_fscache_inode_auxdata auxdata;
- struct nfs_inode *nfsi = NFS_I(inode);
+ struct fscache_cookie *cookie = netfs_i_cookie(&NFS_I(inode)->netfs);
- if (nfsi->fscache) {
- nfs_fscache_update_auxdata(&auxdata, inode);
- fscache_invalidate(nfsi->fscache, &auxdata,
- i_size_read(inode), flags);
- }
+ nfs_fscache_update_auxdata(&auxdata, inode);
+ fscache_invalidate(cookie, &auxdata, i_size_read(inode), flags);
}
/*
@@ -120,7 +145,28 @@ static inline const char *nfs_server_fscache_state(struct nfs_server *server)
return "no ";
}
+static inline void nfs_netfs_set_pgio_header(struct nfs_pgio_header *hdr,
+ struct nfs_pageio_descriptor *desc)
+{
+ hdr->netfs = desc->pg_netfs;
+}
+static inline void nfs_netfs_set_pageio_descriptor(struct nfs_pageio_descriptor *desc,
+ struct nfs_pgio_header *hdr)
+{
+ desc->pg_netfs = hdr->netfs;
+}
+static inline void nfs_netfs_reset_pageio_descriptor(struct nfs_pageio_descriptor *desc)
+{
+ desc->pg_netfs = NULL;
+}
#else /* CONFIG_NFS_FSCACHE */
+static inline void nfs_netfs_inode_init(struct nfs_inode *nfsi) {}
+static inline void nfs_netfs_initiate_read(struct nfs_pgio_header *hdr) {}
+static inline void nfs_netfs_read_completion(struct nfs_pgio_header *hdr) {}
+static inline int nfs_netfs_folio_unlock(struct folio *folio)
+{
+ return 1;
+}
static inline void nfs_fscache_release_super_cookie(struct super_block *sb) {}
static inline void nfs_fscache_init_inode(struct inode *inode) {}
@@ -128,22 +174,29 @@ static inline void nfs_fscache_clear_inode(struct inode *inode) {}
static inline void nfs_fscache_open_file(struct inode *inode,
struct file *filp) {}
static inline void nfs_fscache_release_file(struct inode *inode, struct file *file) {}
-
-static inline bool nfs_fscache_release_folio(struct folio *folio, gfp_t gfp)
+static inline int nfs_netfs_readahead(struct readahead_control *ractl)
{
- return true; /* may release folio */
+ return -ENOBUFS;
}
-static inline int nfs_fscache_read_page(struct inode *inode, struct page *page)
+static inline int nfs_netfs_read_folio(struct file *file, struct folio *folio)
{
return -ENOBUFS;
}
-static inline void nfs_fscache_write_page(struct inode *inode, struct page *page) {}
+
+static inline bool nfs_fscache_release_folio(struct folio *folio, gfp_t gfp)
+{
+ return true; /* may release folio */
+}
static inline void nfs_fscache_invalidate(struct inode *inode, int flags) {}
static inline const char *nfs_server_fscache_state(struct nfs_server *server)
{
return "no ";
}
-
+static inline void nfs_netfs_set_pgio_header(struct nfs_pgio_header *hdr,
+ struct nfs_pageio_descriptor *desc) {}
+static inline void nfs_netfs_set_pageio_descriptor(struct nfs_pageio_descriptor *desc,
+ struct nfs_pgio_header *hdr) {}
+static inline void nfs_netfs_reset_pageio_descriptor(struct nfs_pageio_descriptor *desc) {}
#endif /* CONFIG_NFS_FSCACHE */
#endif /* _NFS_FSCACHE_H */
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 11ff2b2e060f..f13d25d95b85 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -62,7 +62,7 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i
}
/*
- * get an NFS2/NFS3 root dentry from the root filehandle
+ * get a root dentry from the root filehandle
*/
int nfs_get_root(struct super_block *s, struct fs_context *fc)
{
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index e98ee7599eeb..f76fe406937a 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -74,6 +74,8 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
int nfs_wait_bit_killable(struct wait_bit_key *key, int mode)
{
+ if (unlikely(nfs_current_task_exiting()))
+ return -EINTR;
schedule();
if (signal_pending_state(mode, current))
return -ERESTARTSYS;
@@ -106,7 +108,7 @@ u64 nfs_compat_user_ino64(u64 fileid)
int nfs_drop_inode(struct inode *inode)
{
- return NFS_STALE(inode) || generic_drop_inode(inode);
+ return NFS_STALE(inode) || inode_generic_drop(inode);
}
EXPORT_SYMBOL_GPL(nfs_drop_inode);
@@ -190,12 +192,12 @@ static bool nfs_has_xattr_cache(const struct nfs_inode *nfsi)
void nfs_set_cache_invalid(struct inode *inode, unsigned long flags)
{
struct nfs_inode *nfsi = NFS_I(inode);
- bool have_delegation = NFS_PROTO(inode)->have_delegation(inode, FMODE_READ);
- if (have_delegation) {
+ if (nfs_have_delegated_attributes(inode)) {
if (!(flags & NFS_INO_REVAL_FORCED))
flags &= ~(NFS_INO_INVALID_MODE |
NFS_INO_INVALID_OTHER |
+ NFS_INO_INVALID_BTIME |
NFS_INO_INVALID_XATTR);
flags &= ~(NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_SIZE);
}
@@ -206,13 +208,17 @@ void nfs_set_cache_invalid(struct inode *inode, unsigned long flags)
nfs_fscache_invalidate(inode, 0);
flags &= ~NFS_INO_REVAL_FORCED;
- nfsi->cache_validity |= flags;
-
+ flags |= nfsi->cache_validity;
if (inode->i_mapping->nrpages == 0)
- nfsi->cache_validity &= ~(NFS_INO_INVALID_DATA |
- NFS_INO_DATA_INVAL_DEFER);
- else if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
- nfsi->cache_validity &= ~NFS_INO_DATA_INVAL_DEFER;
+ flags &= ~NFS_INO_INVALID_DATA;
+
+ /* pairs with nfs_clear_invalid_mapping()'s smp_load_acquire() */
+ smp_store_release(&nfsi->cache_validity, flags);
+
+ if (inode->i_mapping->nrpages == 0 ||
+ nfsi->cache_validity & NFS_INO_INVALID_DATA) {
+ nfs_ooo_clear(nfsi);
+ }
trace_nfs_set_cache_invalid(inode, 0);
}
EXPORT_SYMBOL_GPL(nfs_set_cache_invalid);
@@ -275,6 +281,8 @@ EXPORT_SYMBOL_GPL(nfs_zap_acl_cache);
void nfs_invalidate_atime(struct inode *inode)
{
+ if (nfs_have_delegated_atime(inode))
+ return;
spin_lock(&inode->i_lock);
nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME);
spin_unlock(&inode->i_lock);
@@ -467,7 +475,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
goto out_no_inode;
}
- if (inode->i_state & I_NEW) {
+ if (inode_state_read_once(inode) & I_NEW) {
struct nfs_inode *nfsi = NFS_I(inode);
unsigned long now = jiffies;
@@ -490,6 +498,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
inode->i_fop = NFS_SB(sb)->nfs_client->rpc_ops->file_ops;
inode->i_data.a_ops = &nfs_file_aops;
nfs_inode_init_regular(nfsi);
+ mapping_set_large_folios(inode->i_mapping);
} else if (S_ISDIR(inode->i_mode)) {
inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops;
inode->i_fop = &nfs_dir_operations;
@@ -511,9 +520,10 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
} else
init_special_inode(inode, inode->i_mode, fattr->rdev);
- memset(&inode->i_atime, 0, sizeof(inode->i_atime));
- memset(&inode->i_mtime, 0, sizeof(inode->i_mtime));
- memset(&inode->i_ctime, 0, sizeof(inode->i_ctime));
+ inode_set_atime(inode, 0, 0);
+ inode_set_mtime(inode, 0, 0);
+ inode_set_ctime(inode, 0, 0);
+ memset(&nfsi->btime, 0, sizeof(nfsi->btime));
inode_set_iversion_raw(inode, 0);
inode->i_size = 0;
clear_nlink(inode);
@@ -526,17 +536,21 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
nfsi->read_cache_jiffies = fattr->time_start;
nfsi->attr_gencount = fattr->gencount;
if (fattr->valid & NFS_ATTR_FATTR_ATIME)
- inode->i_atime = fattr->atime;
+ inode_set_atime_to_ts(inode, fattr->atime);
else if (fattr_supported & NFS_ATTR_FATTR_ATIME)
nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME);
if (fattr->valid & NFS_ATTR_FATTR_MTIME)
- inode->i_mtime = fattr->mtime;
+ inode_set_mtime_to_ts(inode, fattr->mtime);
else if (fattr_supported & NFS_ATTR_FATTR_MTIME)
nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME);
if (fattr->valid & NFS_ATTR_FATTR_CTIME)
- inode->i_ctime = fattr->ctime;
+ inode_set_ctime_to_ts(inode, fattr->ctime);
else if (fattr_supported & NFS_ATTR_FATTR_CTIME)
nfs_set_cache_invalid(inode, NFS_INO_INVALID_CTIME);
+ if (fattr->valid & NFS_ATTR_FATTR_BTIME)
+ nfsi->btime = fattr->btime;
+ else if (fattr_supported & NFS_ATTR_FATTR_BTIME)
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_BTIME);
if (fattr->valid & NFS_ATTR_FATTR_CHANGE)
inode_set_iversion_raw(inode, fattr->change_attr);
else
@@ -549,6 +563,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
set_nlink(inode, fattr->nlink);
else if (fattr_supported & NFS_ATTR_FATTR_NLINK)
nfs_set_cache_invalid(inode, NFS_INO_INVALID_NLINK);
+ else
+ set_nlink(inode, 1);
if (fattr->valid & NFS_ATTR_FATTR_OWNER)
inode->i_uid = fattr->uid;
else if (fattr_supported & NFS_ATTR_FATTR_OWNER)
@@ -592,7 +608,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
inode->i_sb->s_id,
(unsigned long long)NFS_FILEID(inode),
nfs_display_fhandle_hash(fh),
- atomic_read(&inode->i_count));
+ icount_read(inode));
out:
return inode;
@@ -603,15 +619,107 @@ out_no_inode:
}
EXPORT_SYMBOL_GPL(nfs_fhget);
+static void
+nfs_fattr_fixup_delegated(struct inode *inode, struct nfs_fattr *fattr)
+{
+ unsigned long cache_validity = NFS_I(inode)->cache_validity;
+
+ if (nfs_have_delegated_mtime(inode)) {
+ if (!(cache_validity & NFS_INO_INVALID_CTIME))
+ fattr->valid &= ~(NFS_ATTR_FATTR_PRECTIME |
+ NFS_ATTR_FATTR_CTIME);
+
+ if (!(cache_validity & NFS_INO_INVALID_MTIME))
+ fattr->valid &= ~(NFS_ATTR_FATTR_PREMTIME |
+ NFS_ATTR_FATTR_MTIME);
+
+ if (!(cache_validity & NFS_INO_INVALID_ATIME))
+ fattr->valid &= ~NFS_ATTR_FATTR_ATIME;
+ } else if (nfs_have_delegated_atime(inode)) {
+ if (!(cache_validity & NFS_INO_INVALID_ATIME))
+ fattr->valid &= ~NFS_ATTR_FATTR_ATIME;
+ }
+}
+
+static void nfs_set_timestamps_to_ts(struct inode *inode, struct iattr *attr)
+{
+ unsigned int cache_flags = 0;
+
+ if (attr->ia_valid & ATTR_MTIME_SET) {
+ struct timespec64 ctime = inode_get_ctime(inode);
+ struct timespec64 mtime = inode_get_mtime(inode);
+ struct timespec64 now;
+ int updated = 0;
+
+ now = inode_set_ctime_current(inode);
+ if (!timespec64_equal(&now, &ctime))
+ updated |= S_CTIME;
+
+ inode_set_mtime_to_ts(inode, attr->ia_mtime);
+ if (!timespec64_equal(&now, &mtime))
+ updated |= S_MTIME;
+
+ inode_maybe_inc_iversion(inode, updated);
+ cache_flags |= NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME;
+ }
+ if (attr->ia_valid & ATTR_ATIME_SET) {
+ inode_set_atime_to_ts(inode, attr->ia_atime);
+ cache_flags |= NFS_INO_INVALID_ATIME;
+ }
+ NFS_I(inode)->cache_validity &= ~cache_flags;
+}
+
+static void nfs_update_timestamps(struct inode *inode, unsigned int ia_valid)
+{
+ enum file_time_flags time_flags = 0;
+ unsigned int cache_flags = 0;
+
+ if (ia_valid & ATTR_MTIME) {
+ time_flags |= S_MTIME | S_CTIME;
+ cache_flags |= NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME;
+ }
+ if (ia_valid & ATTR_ATIME) {
+ time_flags |= S_ATIME;
+ cache_flags |= NFS_INO_INVALID_ATIME;
+ }
+ inode_update_timestamps(inode, time_flags);
+ NFS_I(inode)->cache_validity &= ~cache_flags;
+}
+
+void nfs_update_delegated_atime(struct inode *inode)
+{
+ spin_lock(&inode->i_lock);
+ if (nfs_have_delegated_atime(inode))
+ nfs_update_timestamps(inode, ATTR_ATIME);
+ spin_unlock(&inode->i_lock);
+}
+
+void nfs_update_delegated_mtime_locked(struct inode *inode)
+{
+ if (nfs_have_delegated_mtime(inode))
+ nfs_update_timestamps(inode, ATTR_MTIME);
+}
+
+void nfs_update_delegated_mtime(struct inode *inode)
+{
+ spin_lock(&inode->i_lock);
+ nfs_update_delegated_mtime_locked(inode);
+ spin_unlock(&inode->i_lock);
+}
+EXPORT_SYMBOL_GPL(nfs_update_delegated_mtime);
+
#define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET|ATTR_FILE|ATTR_OPEN)
int
-nfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+nfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
struct nfs_fattr *fattr;
+ loff_t oldsize = i_size_read(inode);
int error = 0;
+ kuid_t task_uid = current_fsuid();
+ kuid_t owner_uid = inode->i_uid;
nfs_inc_stats(inode, NFSIOS_VFSSETATTR);
@@ -626,10 +734,39 @@ nfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
if (error)
return error;
- if (attr->ia_size == i_size_read(inode))
+ if (attr->ia_size == oldsize)
attr->ia_valid &= ~ATTR_SIZE;
}
+ if (nfs_have_delegated_mtime(inode) && attr->ia_valid & ATTR_MTIME) {
+ spin_lock(&inode->i_lock);
+ if (attr->ia_valid & ATTR_MTIME_SET) {
+ if (uid_eq(task_uid, owner_uid)) {
+ nfs_set_timestamps_to_ts(inode, attr);
+ attr->ia_valid &= ~(ATTR_MTIME|ATTR_MTIME_SET|
+ ATTR_ATIME|ATTR_ATIME_SET);
+ }
+ } else {
+ nfs_update_timestamps(inode, attr->ia_valid);
+ attr->ia_valid &= ~(ATTR_MTIME|ATTR_ATIME);
+ }
+ spin_unlock(&inode->i_lock);
+ } else if (nfs_have_delegated_atime(inode) &&
+ attr->ia_valid & ATTR_ATIME &&
+ !(attr->ia_valid & ATTR_MTIME)) {
+ if (attr->ia_valid & ATTR_ATIME_SET) {
+ if (uid_eq(task_uid, owner_uid)) {
+ spin_lock(&inode->i_lock);
+ nfs_set_timestamps_to_ts(inode, attr);
+ spin_unlock(&inode->i_lock);
+ attr->ia_valid &= ~(ATTR_ATIME|ATTR_ATIME_SET);
+ }
+ } else {
+ nfs_update_delegated_atime(inode);
+ attr->ia_valid &= ~ATTR_ATIME;
+ }
+ }
+
/* Optimization: if the end result is no change, don't RPC */
if (((attr->ia_valid & NFS_VALID_ATTRS) & ~(ATTR_FILE|ATTR_OPEN)) == 0)
return 0;
@@ -637,8 +774,10 @@ nfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
trace_nfs_setattr_enter(inode);
/* Write all dirty data */
- if (S_ISREG(inode->i_mode))
+ if (S_ISREG(inode->i_mode)) {
+ nfs_file_block_o_direct(NFS_I(inode));
nfs_sync_inode(inode);
+ }
fattr = nfs_alloc_fattr_with_label(NFS_SERVER(inode));
if (fattr == NULL) {
@@ -647,8 +786,12 @@ nfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
}
error = NFS_PROTO(inode)->setattr(dentry, fattr, attr);
- if (error == 0)
+ if (error == 0) {
+ if (attr->ia_valid & ATTR_SIZE)
+ nfs_truncate_last_folio(inode->i_mapping, oldsize,
+ attr->ia_size);
error = nfs_refresh_inode(inode, fattr);
+ }
nfs_free_fattr(fattr);
out:
trace_nfs_setattr_exit(inode, error);
@@ -677,13 +820,15 @@ static int nfs_vmtruncate(struct inode * inode, loff_t offset)
trace_nfs_size_truncate(inode, offset);
i_size_write(inode, offset);
/* Optimisation */
- if (offset == 0)
- NFS_I(inode)->cache_validity &= ~(NFS_INO_INVALID_DATA |
- NFS_INO_DATA_INVAL_DEFER);
+ if (offset == 0) {
+ NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_DATA;
+ nfs_ooo_clear(NFS_I(inode));
+ }
NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_SIZE;
spin_unlock(&inode->i_lock);
truncate_pagecache(inode, offset);
+ nfs_update_delegated_mtime_locked(inode);
spin_lock(&inode->i_lock);
out:
return err;
@@ -707,8 +852,9 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr,
spin_lock(&inode->i_lock);
NFS_I(inode)->attr_gencount = fattr->gencount;
if ((attr->ia_valid & ATTR_SIZE) != 0) {
- nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME |
- NFS_INO_INVALID_BLOCKS);
+ if (!nfs_have_delegated_mtime(inode))
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME);
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS);
nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC);
nfs_vmtruncate(inode, attr->ia_size);
}
@@ -717,9 +863,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr,
if ((attr->ia_valid & ATTR_KILL_SUID) != 0 &&
inode->i_mode & S_ISUID)
inode->i_mode &= ~S_ISUID;
- if ((attr->ia_valid & ATTR_KILL_SGID) != 0 &&
- (inode->i_mode & (S_ISGID | S_IXGRP)) ==
- (S_ISGID | S_IXGRP))
+ if (setattr_should_drop_sgid(&nop_mnt_idmap, inode))
inode->i_mode &= ~S_ISGID;
if ((attr->ia_valid & ATTR_MODE) != 0) {
int mode = attr->ia_mode & S_IALLUGO;
@@ -731,7 +875,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr,
if ((attr->ia_valid & ATTR_GID) != 0)
inode->i_gid = attr->ia_gid;
if (fattr->valid & NFS_ATTR_FATTR_CTIME)
- inode->i_ctime = fattr->ctime;
+ inode_set_ctime_to_ts(inode, fattr->ctime);
else
nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE
| NFS_INO_INVALID_CTIME);
@@ -742,14 +886,14 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr,
NFS_I(inode)->cache_validity &= ~(NFS_INO_INVALID_ATIME
| NFS_INO_INVALID_CTIME);
if (fattr->valid & NFS_ATTR_FATTR_ATIME)
- inode->i_atime = fattr->atime;
+ inode_set_atime_to_ts(inode, fattr->atime);
else if (attr->ia_valid & ATTR_ATIME_SET)
- inode->i_atime = attr->ia_atime;
+ inode_set_atime_to_ts(inode, attr->ia_atime);
else
nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME);
if (fattr->valid & NFS_ATTR_FATTR_CTIME)
- inode->i_ctime = fattr->ctime;
+ inode_set_ctime_to_ts(inode, fattr->ctime);
else
nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE
| NFS_INO_INVALID_CTIME);
@@ -758,14 +902,14 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr,
NFS_I(inode)->cache_validity &= ~(NFS_INO_INVALID_MTIME
| NFS_INO_INVALID_CTIME);
if (fattr->valid & NFS_ATTR_FATTR_MTIME)
- inode->i_mtime = fattr->mtime;
+ inode_set_mtime_to_ts(inode, fattr->mtime);
else if (attr->ia_valid & ATTR_MTIME_SET)
- inode->i_mtime = attr->ia_mtime;
+ inode_set_mtime_to_ts(inode, attr->ia_mtime);
else
nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME);
if (fattr->valid & NFS_ATTR_FATTR_CTIME)
- inode->i_ctime = fattr->ctime;
+ inode_set_ctime_to_ts(inode, fattr->ctime);
else
nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE
| NFS_INO_INVALID_CTIME);
@@ -806,6 +950,7 @@ static void nfs_readdirplus_parent_cache_hit(struct dentry *dentry)
static u32 nfs_get_valid_attrmask(struct inode *inode)
{
+ u64 fattr_valid = NFS_SERVER(inode)->fattr_valid;
unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity);
u32 reply_mask = STATX_INO | STATX_TYPE;
@@ -825,14 +970,20 @@ static u32 nfs_get_valid_attrmask(struct inode *inode)
reply_mask |= STATX_UID | STATX_GID;
if (!(cache_validity & NFS_INO_INVALID_BLOCKS))
reply_mask |= STATX_BLOCKS;
+ if (!(cache_validity & NFS_INO_INVALID_BTIME) &&
+ (fattr_valid & NFS_ATTR_FATTR_BTIME))
+ reply_mask |= STATX_BTIME;
+ if (!(cache_validity & NFS_INO_INVALID_CHANGE))
+ reply_mask |= STATX_CHANGE_COOKIE;
return reply_mask;
}
-int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
+int nfs_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask, unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
struct nfs_server *server = NFS_SERVER(inode);
+ u64 fattr_valid = server->fattr_valid;
unsigned long cache_validity;
int err = 0;
bool force_sync = query_flags & AT_STATX_FORCE_SYNC;
@@ -843,7 +994,11 @@ int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
request_mask &= STATX_TYPE | STATX_MODE | STATX_NLINK | STATX_UID |
STATX_GID | STATX_ATIME | STATX_MTIME | STATX_CTIME |
- STATX_INO | STATX_SIZE | STATX_BLOCKS;
+ STATX_INO | STATX_SIZE | STATX_BLOCKS | STATX_BTIME |
+ STATX_CHANGE_COOKIE;
+
+ if (!(fattr_valid & NFS_ATTR_FATTR_BTIME))
+ request_mask &= ~STATX_BTIME;
if ((query_flags & AT_STATX_DONT_SYNC) && !force_sync) {
if (readdirplus_enabled)
@@ -851,10 +1006,14 @@ int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
goto out_no_revalidate;
}
- /* Flush out writes to the server in order to update c/mtime. */
- if ((request_mask & (STATX_CTIME | STATX_MTIME)) &&
- S_ISREG(inode->i_mode))
- filemap_write_and_wait(inode->i_mapping);
+ /* Flush out writes to the server in order to update c/mtime/version. */
+ if ((request_mask & (STATX_CTIME | STATX_MTIME | STATX_CHANGE_COOKIE)) &&
+ S_ISREG(inode->i_mode)) {
+ if (nfs_have_delegated_mtime(inode))
+ filemap_fdatawrite(inode->i_mapping);
+ else
+ filemap_write_and_wait(inode->i_mapping);
+ }
/*
* We may force a getattr if the user cares about atime.
@@ -872,7 +1031,8 @@ int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
/* Is the user requesting attributes that might need revalidation? */
if (!(request_mask & (STATX_MODE|STATX_NLINK|STATX_ATIME|STATX_CTIME|
STATX_MTIME|STATX_UID|STATX_GID|
- STATX_SIZE|STATX_BLOCKS)))
+ STATX_SIZE|STATX_BLOCKS|STATX_BTIME|
+ STATX_CHANGE_COOKIE)))
goto out_no_revalidate;
/* Check whether the cached attributes are stale */
@@ -895,6 +1055,8 @@ int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
do_update |= cache_validity & NFS_INO_INVALID_OTHER;
if (request_mask & STATX_BLOCKS)
do_update |= cache_validity & NFS_INO_INVALID_BLOCKS;
+ if (request_mask & STATX_BTIME)
+ do_update |= cache_validity & NFS_INO_INVALID_BTIME;
if (do_update) {
if (readdirplus_enabled)
@@ -908,10 +1070,30 @@ out_no_revalidate:
/* Only return attributes that were revalidated. */
stat->result_mask = nfs_get_valid_attrmask(inode) | request_mask;
- generic_fillattr(&init_user_ns, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode));
+ stat->change_cookie = inode_peek_iversion_raw(inode);
+ stat->attributes_mask |= STATX_ATTR_CHANGE_MONOTONIC;
+ if (server->change_attr_type != NFS4_CHANGE_TYPE_IS_UNDEFINED)
+ stat->attributes |= STATX_ATTR_CHANGE_MONOTONIC;
if (S_ISDIR(inode->i_mode))
stat->blksize = NFS_SERVER(inode)->dtsize;
+ stat->btime = NFS_I(inode)->btime;
+
+ /* Special handling for STATX_DIOALIGN and STATX_DIO_READ_ALIGN
+ * - NFS doesn't have DIO alignment constraints, avoid getting
+ * these DIO attrs from remote and just respond with most
+ * accommodating limits (so client will issue supported DIO).
+ * - this is unintuitive, but the most coarse-grained
+ * dio_offset_align is the most accommodating.
+ */
+ if ((request_mask & (STATX_DIOALIGN | STATX_DIO_READ_ALIGN)) &&
+ S_ISREG(inode->i_mode)) {
+ stat->result_mask |= STATX_DIOALIGN | STATX_DIO_READ_ALIGN;
+ stat->dio_mem_align = 4; /* 4-byte alignment */
+ stat->dio_offset_align = PAGE_SIZE;
+ stat->dio_read_offset_align = stat->dio_offset_align;
+ }
out:
trace_nfs_getattr_exit(inode, err);
return err;
@@ -1004,7 +1186,7 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync)
if (!is_sync)
return;
inode = d_inode(ctx->dentry);
- if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
+ if (nfs_have_read_or_write_delegation(inode))
return;
nfsi = NFS_I(inode);
if (inode->i_mapping->nrpages == 0)
@@ -1045,6 +1227,8 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry,
ctx->lock_context.open_context = ctx;
INIT_LIST_HEAD(&ctx->list);
ctx->mdsthreshold = NULL;
+ nfs_localio_file_init(&ctx->nfl);
+
return ctx;
}
EXPORT_SYMBOL_GPL(alloc_nfs_open_context);
@@ -1076,6 +1260,7 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
nfs_sb_deactive(sb);
put_rpccred(rcu_dereference_protected(ctx->ll_cred, 1));
kfree(ctx->mdsthreshold);
+ nfs_close_local_fh(&ctx->nfl);
kfree_rcu(ctx, rcu_head);
}
@@ -1101,7 +1286,7 @@ void nfs_inode_attach_open_context(struct nfs_open_context *ctx)
spin_lock(&inode->i_lock);
if (list_empty(&nfsi->open_files) &&
- (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER))
+ nfs_ooo_test(nfsi))
nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA |
NFS_INO_REVAL_FORCED);
list_add_tail_rcu(&ctx->list, &nfsi->open_files);
@@ -1332,6 +1517,13 @@ int nfs_clear_invalid_mapping(struct address_space *mapping)
TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
if (ret)
goto out;
+ smp_rmb(); /* pairs with smp_wmb() below */
+ if (test_bit(NFS_INO_INVALIDATING, bitlock))
+ continue;
+ /* pairs with nfs_set_cache_invalid()'s smp_store_release() */
+ if (!(smp_load_acquire(&nfsi->cache_validity) & NFS_INO_INVALID_DATA))
+ goto out;
+ /* Slow-path that double-checks with spinlock held */
spin_lock(&inode->i_lock);
if (test_bit(NFS_INO_INVALIDATING, bitlock)) {
spin_unlock(&inode->i_lock);
@@ -1345,8 +1537,8 @@ int nfs_clear_invalid_mapping(struct address_space *mapping)
set_bit(NFS_INO_INVALIDATING, bitlock);
smp_wmb();
- nfsi->cache_validity &=
- ~(NFS_INO_INVALID_DATA | NFS_INO_DATA_INVAL_DEFER);
+ nfsi->cache_validity &= ~NFS_INO_INVALID_DATA;
+ nfs_ooo_clear(nfsi);
spin_unlock(&inode->i_lock);
trace_nfs_invalidate_mapping_enter(inode);
ret = nfs_invalidate_mapping(inode, mapping);
@@ -1436,18 +1628,18 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
nfs_set_cache_invalid(inode, NFS_INO_INVALID_XATTR);
}
/* If we have atomic WCC data, we may update some attributes */
- ts = inode->i_ctime;
+ ts = inode_get_ctime(inode);
if ((fattr->valid & NFS_ATTR_FATTR_PRECTIME)
&& (fattr->valid & NFS_ATTR_FATTR_CTIME)
&& timespec64_equal(&ts, &fattr->pre_ctime)) {
- inode->i_ctime = fattr->ctime;
+ inode_set_ctime_to_ts(inode, fattr->ctime);
}
- ts = inode->i_mtime;
+ ts = inode_get_mtime(inode);
if ((fattr->valid & NFS_ATTR_FATTR_PREMTIME)
&& (fattr->valid & NFS_ATTR_FATTR_MTIME)
&& timespec64_equal(&ts, &fattr->pre_mtime)) {
- inode->i_mtime = fattr->mtime;
+ inode_set_mtime_to_ts(inode, fattr->mtime);
}
if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE)
&& (fattr->valid & NFS_ATTR_FATTR_SIZE)
@@ -1474,7 +1666,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
unsigned long invalid = 0;
struct timespec64 ts;
- if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
+ if (nfs_have_delegated_attributes(inode))
return 0;
if (!(fattr->valid & NFS_ATTR_FATTR_FILEID)) {
@@ -1498,11 +1690,11 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && !inode_eq_iversion_raw(inode, fattr->change_attr))
invalid |= NFS_INO_INVALID_CHANGE;
- ts = inode->i_mtime;
+ ts = inode_get_mtime(inode);
if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec64_equal(&ts, &fattr->mtime))
invalid |= NFS_INO_INVALID_MTIME;
- ts = inode->i_ctime;
+ ts = inode_get_ctime(inode);
if ((fattr->valid & NFS_ATTR_FATTR_CTIME) && !timespec64_equal(&ts, &fattr->ctime))
invalid |= NFS_INO_INVALID_CTIME;
@@ -1526,7 +1718,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
if ((fattr->valid & NFS_ATTR_FATTR_NLINK) && inode->i_nlink != fattr->nlink)
invalid |= NFS_INO_INVALID_NLINK;
- ts = inode->i_atime;
+ ts = inode_get_atime(inode);
if ((fattr->valid & NFS_ATTR_FATTR_ATIME) && !timespec64_equal(&ts, &fattr->atime))
invalid |= NFS_INO_INVALID_ATIME;
@@ -1557,6 +1749,7 @@ void nfs_fattr_init(struct nfs_fattr *fattr)
fattr->gencount = nfs_inc_attr_generation_counter();
fattr->owner_name = NULL;
fattr->group_name = NULL;
+ fattr->mdsthreshold = NULL;
}
EXPORT_SYMBOL_GPL(nfs_fattr_init);
@@ -1795,7 +1988,7 @@ static int nfs_inode_finish_partial_attr_update(const struct nfs_fattr *fattr,
NFS_INO_INVALID_ATIME | NFS_INO_INVALID_CTIME |
NFS_INO_INVALID_MTIME | NFS_INO_INVALID_SIZE |
NFS_INO_INVALID_BLOCKS | NFS_INO_INVALID_OTHER |
- NFS_INO_INVALID_NLINK;
+ NFS_INO_INVALID_NLINK | NFS_INO_INVALID_BTIME;
unsigned long cache_validity = NFS_I(inode)->cache_validity;
enum nfs4_change_attr_type ctype = NFS_SERVER(inode)->change_attr_type;
@@ -1808,6 +2001,66 @@ static int nfs_inode_finish_partial_attr_update(const struct nfs_fattr *fattr,
return 0;
}
+static void nfs_ooo_merge(struct nfs_inode *nfsi,
+ u64 start, u64 end)
+{
+ int i, cnt;
+
+ if (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER)
+ /* No point merging anything */
+ return;
+
+ if (!nfsi->ooo) {
+ nfsi->ooo = kmalloc(sizeof(*nfsi->ooo), GFP_ATOMIC);
+ if (!nfsi->ooo) {
+ nfsi->cache_validity |= NFS_INO_DATA_INVAL_DEFER;
+ return;
+ }
+ nfsi->ooo->cnt = 0;
+ }
+
+ /* add this range, merging if possible */
+ cnt = nfsi->ooo->cnt;
+ for (i = 0; i < cnt; i++) {
+ if (end == nfsi->ooo->gap[i].start)
+ end = nfsi->ooo->gap[i].end;
+ else if (start == nfsi->ooo->gap[i].end)
+ start = nfsi->ooo->gap[i].start;
+ else
+ continue;
+ /* Remove 'i' from table and loop to insert the new range */
+ cnt -= 1;
+ nfsi->ooo->gap[i] = nfsi->ooo->gap[cnt];
+ i = -1;
+ }
+ if (start != end) {
+ if (cnt >= ARRAY_SIZE(nfsi->ooo->gap)) {
+ nfsi->cache_validity |= NFS_INO_DATA_INVAL_DEFER;
+ kfree(nfsi->ooo);
+ nfsi->ooo = NULL;
+ return;
+ }
+ nfsi->ooo->gap[cnt].start = start;
+ nfsi->ooo->gap[cnt].end = end;
+ cnt += 1;
+ }
+ nfsi->ooo->cnt = cnt;
+}
+
+static void nfs_ooo_record(struct nfs_inode *nfsi,
+ struct nfs_fattr *fattr)
+{
+ /* This reply was out-of-order, so record in the
+ * pre/post change id, possibly cancelling
+ * gaps created when iversion was jumpped forward.
+ */
+ if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) &&
+ (fattr->valid & NFS_ATTR_FATTR_PRECHANGE))
+ nfs_ooo_merge(nfsi,
+ fattr->change_attr,
+ fattr->pre_change_attr);
+}
+
static int nfs_refresh_inode_locked(struct inode *inode,
struct nfs_fattr *fattr)
{
@@ -1818,8 +2071,12 @@ static int nfs_refresh_inode_locked(struct inode *inode,
if (attr_cmp > 0 || nfs_inode_finish_partial_attr_update(fattr, inode))
ret = nfs_update_inode(inode, fattr);
- else if (attr_cmp == 0)
- ret = nfs_check_inode_attributes(inode, fattr);
+ else {
+ nfs_ooo_record(NFS_I(inode), fattr);
+
+ if (attr_cmp == 0)
+ ret = nfs_check_inode_attributes(inode, fattr);
+ }
trace_nfs_refresh_inode_exit(inode, ret);
return ret;
@@ -1910,6 +2167,8 @@ int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fa
if (attr_cmp < 0)
return 0;
if ((fattr->valid & NFS_ATTR_FATTR) == 0 || !attr_cmp) {
+ /* Record the pre/post change info before clearing PRECHANGE */
+ nfs_ooo_record(NFS_I(inode), fattr);
fattr->valid &= ~(NFS_ATTR_FATTR_PRECHANGE
| NFS_ATTR_FATTR_PRESIZE
| NFS_ATTR_FATTR_PREMTIME
@@ -1923,12 +2182,12 @@ int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fa
}
if ((fattr->valid & NFS_ATTR_FATTR_CTIME) != 0 &&
(fattr->valid & NFS_ATTR_FATTR_PRECTIME) == 0) {
- fattr->pre_ctime = inode->i_ctime;
+ fattr->pre_ctime = inode_get_ctime(inode);
fattr->valid |= NFS_ATTR_FATTR_PRECTIME;
}
if ((fattr->valid & NFS_ATTR_FATTR_MTIME) != 0 &&
(fattr->valid & NFS_ATTR_FATTR_PREMTIME) == 0) {
- fattr->pre_mtime = inode->i_mtime;
+ fattr->pre_mtime = inode_get_mtime(inode);
fattr->valid |= NFS_ATTR_FATTR_PREMTIME;
}
if ((fattr->valid & NFS_ATTR_FATTR_SIZE) != 0 &&
@@ -1995,10 +2254,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
bool attr_changed = false;
bool have_delegation;
- dfprintk(VFS, "NFS: %s(%s/%lu fh_crc=0x%08x ct=%d info=0x%x)\n",
+ dfprintk(VFS, "NFS: %s(%s/%lu fh_crc=0x%08x ct=%d info=0x%llx)\n",
__func__, inode->i_sb->s_id, inode->i_ino,
nfs_display_fhandle_hash(NFS_FH(inode)),
- atomic_read(&inode->i_count), fattr->valid);
+ icount_read(inode), fattr->valid);
if (!(fattr->valid & NFS_ATTR_FATTR_FILEID)) {
/* Only a mounted-on-fileid? Just exit */
@@ -2044,6 +2303,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
*/
nfsi->read_cache_jiffies = fattr->time_start;
+ /* Fix up any delegated attributes in the struct nfs_fattr */
+ nfs_fattr_fixup_delegated(inode, fattr);
+
save_cache_validity = nfsi->cache_validity;
nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR
| NFS_INO_INVALID_ATIME
@@ -2064,6 +2326,15 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
/* More cache consistency checks */
if (fattr->valid & NFS_ATTR_FATTR_CHANGE) {
+ if (!have_writers && nfsi->ooo && nfsi->ooo->cnt == 1 &&
+ nfsi->ooo->gap[0].end == inode_peek_iversion_raw(inode)) {
+ /* There is one remaining gap that hasn't been
+ * merged into iversion - do that now.
+ */
+ inode_set_iversion_raw(inode, nfsi->ooo->gap[0].start);
+ kfree(nfsi->ooo);
+ nfsi->ooo = NULL;
+ }
if (!inode_eq_iversion_raw(inode, fattr->change_attr)) {
/* Could it be a race with writeback? */
if (!(have_writers || have_delegation)) {
@@ -2078,15 +2349,19 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
| NFS_INO_INVALID_BLOCKS
| NFS_INO_INVALID_NLINK
| NFS_INO_INVALID_MODE
- | NFS_INO_INVALID_OTHER;
+ | NFS_INO_INVALID_OTHER
+ | NFS_INO_INVALID_BTIME;
if (S_ISDIR(inode->i_mode))
nfs_force_lookup_revalidate(inode);
attr_changed = true;
dprintk("NFS: change_attr change on server for file %s/%ld\n",
inode->i_sb->s_id,
inode->i_ino);
- } else if (!have_delegation)
- nfsi->cache_validity |= NFS_INO_DATA_INVAL_DEFER;
+ } else if (!have_delegation) {
+ nfs_ooo_record(nfsi, fattr);
+ nfs_ooo_merge(nfsi, inode_peek_iversion_raw(inode),
+ fattr->change_attr);
+ }
inode_set_iversion_raw(inode, fattr->change_attr);
}
} else {
@@ -2098,17 +2373,23 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
}
if (fattr->valid & NFS_ATTR_FATTR_MTIME)
- inode->i_mtime = fattr->mtime;
+ inode_set_mtime_to_ts(inode, fattr->mtime);
else if (fattr_supported & NFS_ATTR_FATTR_MTIME)
nfsi->cache_validity |=
save_cache_validity & NFS_INO_INVALID_MTIME;
if (fattr->valid & NFS_ATTR_FATTR_CTIME)
- inode->i_ctime = fattr->ctime;
+ inode_set_ctime_to_ts(inode, fattr->ctime);
else if (fattr_supported & NFS_ATTR_FATTR_CTIME)
nfsi->cache_validity |=
save_cache_validity & NFS_INO_INVALID_CTIME;
+ if (fattr->valid & NFS_ATTR_FATTR_BTIME)
+ nfsi->btime = fattr->btime;
+ else if (fattr_supported & NFS_ATTR_FATTR_BTIME)
+ nfsi->cache_validity |=
+ save_cache_validity & NFS_INO_INVALID_BTIME;
+
/* Check if our cached file size is stale */
if (fattr->valid & NFS_ATTR_FATTR_SIZE) {
new_isize = nfs_size_to_loff_t(fattr->size);
@@ -2134,7 +2415,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
save_cache_validity & NFS_INO_INVALID_SIZE;
if (fattr->valid & NFS_ATTR_FATTR_ATIME)
- inode->i_atime = fattr->atime;
+ inode_set_atime_to_ts(inode, fattr->atime);
else if (fattr_supported & NFS_ATTR_FATTR_ATIME)
nfsi->cache_validity |=
save_cache_validity & NFS_INO_INVALID_ATIME;
@@ -2240,18 +2521,22 @@ struct inode *nfs_alloc_inode(struct super_block *sb)
return NULL;
nfsi->flags = 0UL;
nfsi->cache_validity = 0UL;
+ nfsi->ooo = NULL;
#if IS_ENABLED(CONFIG_NFS_V4)
nfsi->nfs4_acl = NULL;
#endif /* CONFIG_NFS_V4 */
#ifdef CONFIG_NFS_V4_2
nfsi->xattr_cache = NULL;
#endif
+ nfs_netfs_inode_init(nfsi);
+
return &nfsi->vfs_inode;
}
EXPORT_SYMBOL_GPL(nfs_alloc_inode);
void nfs_free_inode(struct inode *inode)
{
+ kfree(NFS_I(inode)->ooo);
kmem_cache_free(nfs_inode_cachep, NFS_I(inode));
}
EXPORT_SYMBOL_GPL(nfs_free_inode);
@@ -2282,7 +2567,7 @@ static int __init nfs_init_inodecache(void)
nfs_inode_cachep = kmem_cache_create("nfs_inode_cache",
sizeof(struct nfs_inode),
0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD|SLAB_ACCOUNT),
+ SLAB_ACCOUNT),
init_once);
if (nfs_inode_cachep == NULL)
return -ENOMEM;
@@ -2300,35 +2585,54 @@ static void nfs_destroy_inodecache(void)
kmem_cache_destroy(nfs_inode_cachep);
}
+struct workqueue_struct *nfslocaliod_workqueue;
struct workqueue_struct *nfsiod_workqueue;
EXPORT_SYMBOL_GPL(nfsiod_workqueue);
/*
- * start up the nfsiod workqueue
+ * Destroy the nfsiod workqueues
*/
-static int nfsiod_start(void)
+static void nfsiod_stop(void)
{
struct workqueue_struct *wq;
- dprintk("RPC: creating workqueue nfsiod\n");
- wq = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
- if (wq == NULL)
- return -ENOMEM;
- nfsiod_workqueue = wq;
- return 0;
+
+ wq = nfsiod_workqueue;
+ if (wq != NULL) {
+ nfsiod_workqueue = NULL;
+ destroy_workqueue(wq);
+ }
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+ wq = nfslocaliod_workqueue;
+ if (wq != NULL) {
+ nfslocaliod_workqueue = NULL;
+ destroy_workqueue(wq);
+ }
+#endif /* CONFIG_NFS_LOCALIO */
}
/*
- * Destroy the nfsiod workqueue
+ * Start the nfsiod workqueues
*/
-static void nfsiod_stop(void)
+static int nfsiod_start(void)
{
- struct workqueue_struct *wq;
-
- wq = nfsiod_workqueue;
- if (wq == NULL)
- return;
- nfsiod_workqueue = NULL;
- destroy_workqueue(wq);
+ dprintk("RPC: creating workqueue nfsiod\n");
+ nfsiod_workqueue = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
+ if (nfsiod_workqueue == NULL)
+ return -ENOMEM;
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+ /*
+ * localio writes need to use a normal (non-memreclaim) workqueue.
+ * When we start getting low on space, XFS goes and calls flush_work() on
+ * a non-memreclaim work queue, which causes a priority inversion problem.
+ */
+ dprintk("RPC: creating workqueue nfslocaliod\n");
+ nfslocaliod_workqueue = alloc_workqueue("nfslocaliod", WQ_UNBOUND, 0);
+ if (unlikely(nfslocaliod_workqueue == NULL)) {
+ nfsiod_stop();
+ return -ENOMEM;
+ }
+#endif /* CONFIG_NFS_LOCALIO */
+ return 0;
}
unsigned int nfs_net_id;
@@ -2336,12 +2640,32 @@ EXPORT_SYMBOL_GPL(nfs_net_id);
static int nfs_net_init(struct net *net)
{
+ struct nfs_net *nn = net_generic(net, nfs_net_id);
+ int err;
+
nfs_clients_init(net);
- return nfs_fs_proc_net_init(net);
+
+ if (!rpc_proc_register(net, &nn->rpcstats)) {
+ err = -ENOMEM;
+ goto err_proc_rpc;
+ }
+
+ err = nfs_fs_proc_net_init(net);
+ if (err)
+ goto err_proc_nfs;
+
+ return 0;
+
+err_proc_nfs:
+ rpc_proc_unregister(net, "nfs");
+err_proc_rpc:
+ nfs_clients_exit(net);
+ return err;
}
static void nfs_net_exit(struct net *net)
{
+ rpc_proc_unregister(net, "nfs");
nfs_fs_proc_net_exit(net);
nfs_clients_exit(net);
}
@@ -2353,6 +2677,35 @@ static struct pernet_operations nfs_net_ops = {
.size = sizeof(struct nfs_net),
};
+#ifdef CONFIG_KEYS
+static struct key *nfs_keyring;
+
+static int __init nfs_init_keyring(void)
+{
+ nfs_keyring = keyring_alloc(".nfs",
+ GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
+ current_cred(),
+ (KEY_POS_ALL & ~KEY_POS_SETATTR) |
+ (KEY_USR_ALL & ~KEY_USR_SETATTR),
+ KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL);
+ return PTR_ERR_OR_ZERO(nfs_keyring);
+}
+
+static void nfs_exit_keyring(void)
+{
+ key_put(nfs_keyring);
+}
+#else
+static inline int nfs_init_keyring(void)
+{
+ return 0;
+}
+
+static inline void nfs_exit_keyring(void)
+{
+}
+#endif /* CONFIG_KEYS */
+
/*
* Initialize NFS
*/
@@ -2360,6 +2713,10 @@ static int __init init_nfs_fs(void)
{
int err;
+ err = nfs_init_keyring();
+ if (err)
+ return err;
+
err = nfs_sysfs_init();
if (err < 0)
goto out10;
@@ -2396,15 +2753,12 @@ static int __init init_nfs_fs(void)
if (err)
goto out1;
- rpc_proc_register(&init_net, &nfs_rpcstat);
-
err = register_nfs_fs();
if (err)
goto out0;
return 0;
out0:
- rpc_proc_unregister(&init_net, "nfs");
nfs_destroy_directcache();
out1:
nfs_destroy_writepagecache();
@@ -2423,6 +2777,7 @@ out7:
out9:
nfs_sysfs_exit();
out10:
+ nfs_exit_keyring();
return err;
}
@@ -2434,15 +2789,16 @@ static void __exit exit_nfs_fs(void)
nfs_destroy_inodecache();
nfs_destroy_nfspagecache();
unregister_pernet_subsys(&nfs_net_ops);
- rpc_proc_unregister(&init_net, "nfs");
unregister_nfs_fs();
nfs_fs_proc_exit();
nfsiod_stop();
nfs_sysfs_exit();
+ nfs_exit_keyring();
}
/* Not quite true; I just maintain it */
MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>");
+MODULE_DESCRIPTION("NFS client support");
MODULE_LICENSE("GPL");
module_param(enable_ino64, bool, 0644);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index ae7d4a8c728c..2ecd38e1d17a 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -6,12 +6,14 @@
#include "nfs4_fs.h"
#include <linux/fs_context.h>
#include <linux/security.h>
+#include <linux/compiler_attributes.h>
#include <linux/crc32.h>
#include <linux/sunrpc/addr.h>
#include <linux/nfs_page.h>
+#include <linux/nfslocalio.h>
#include <linux/wait_bit.h>
-#define NFS_SB_MASK (SB_RDONLY|SB_NOSUID|SB_NODEV|SB_NOEXEC|SB_SYNCHRONOUS)
+#define NFS_SB_MASK (SB_NOSUID|SB_NODEV|SB_NOEXEC|SB_SYNCHRONOUS)
extern const struct export_operations nfs_export_ops;
@@ -81,6 +83,9 @@ struct nfs_client_initdata {
struct net *net;
const struct rpc_timeout *timeparms;
const struct cred *cred;
+ struct xprtsec_parms xprtsec;
+ unsigned long connect_timeout;
+ unsigned long reconnect_timeout;
};
/*
@@ -101,6 +106,7 @@ struct nfs_fs_context {
unsigned int bsize;
struct nfs_auth_info auth_info;
rpc_authflavor_t selected_flavor;
+ struct xprtsec_parms xprtsec;
char *client_address;
unsigned int version;
unsigned int minorversion;
@@ -108,6 +114,7 @@ struct nfs_fs_context {
unsigned short protofamily;
unsigned short mountfamily;
bool has_sec_mnt_opts;
+ int lock_status;
struct {
union {
@@ -149,6 +156,12 @@ struct nfs_fs_context {
} clone_data;
};
+enum nfs_lock_status {
+ NFS_LOCK_NOT_SET = 0,
+ NFS_LOCK_LOCK = 1,
+ NFS_LOCK_NOLOCK = 2,
+};
+
#define nfs_errorf(fc, fmt, ...) ((fc)->log.log ? \
errorf(fc, fmt, ## __VA_ARGS__) : \
({ dprintk(fmt "\n", ## __VA_ARGS__); }))
@@ -194,7 +207,6 @@ struct nfs_mount_request {
};
extern int nfs_mount(struct nfs_mount_request *info, int timeo, int retrans);
-extern void nfs_umount(const struct nfs_mount_request *info);
/* client.c */
extern const struct rpc_program nfs_program;
@@ -219,7 +231,7 @@ extern struct nfs_client *
nfs4_find_client_sessionid(struct net *, const struct sockaddr *,
struct nfs4_sessionid *, u32);
extern struct nfs_server *nfs_create_server(struct fs_context *);
-extern void nfs4_server_set_init_caps(struct nfs_server *);
+extern void nfs_server_set_init_caps(struct nfs_server *);
extern struct nfs_server *nfs4_create_server(struct fs_context *);
extern struct nfs_server *nfs4_create_referral_server(struct fs_context *);
extern int nfs4_update_server(struct nfs_server *server, const char *hostname,
@@ -297,7 +309,8 @@ void nfs_pgio_header_free(struct nfs_pgio_header *);
int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *);
int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr,
const struct cred *cred, const struct nfs_rpc_ops *rpc_ops,
- const struct rpc_call_ops *call_ops, int how, int flags);
+ const struct rpc_call_ops *call_ops, int how, int flags,
+ struct nfsd_file *localio);
void nfs_free_request(struct nfs_page *req);
struct nfs_pgio_mirror *
nfs_pgio_current_mirror(struct nfs_pageio_descriptor *desc);
@@ -384,18 +397,18 @@ extern unsigned long nfs_access_cache_scan(struct shrinker *shrink,
struct shrink_control *sc);
struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int);
void nfs_d_prune_case_insensitive_aliases(struct inode *inode);
-int nfs_create(struct user_namespace *, struct inode *, struct dentry *,
+int nfs_create(struct mnt_idmap *, struct inode *, struct dentry *,
umode_t, bool);
-int nfs_mkdir(struct user_namespace *, struct inode *, struct dentry *,
- umode_t);
+struct dentry *nfs_mkdir(struct mnt_idmap *, struct inode *, struct dentry *,
+ umode_t);
int nfs_rmdir(struct inode *, struct dentry *);
int nfs_unlink(struct inode *, struct dentry *);
-int nfs_symlink(struct user_namespace *, struct inode *, struct dentry *,
+int nfs_symlink(struct mnt_idmap *, struct inode *, struct dentry *,
const char *);
int nfs_link(struct dentry *, struct inode *, struct dentry *);
-int nfs_mknod(struct user_namespace *, struct inode *, struct dentry *, umode_t,
+int nfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *, umode_t,
dev_t);
-int nfs_rename(struct user_namespace *, struct inode *, struct dentry *,
+int nfs_rename(struct mnt_idmap *, struct inode *, struct dentry *,
struct inode *, struct dentry *, unsigned int);
#ifdef CONFIG_NFS_V4_2
@@ -416,15 +429,20 @@ static inline __u32 nfs_access_xattr_mask(const struct nfs_server *server)
int nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync);
loff_t nfs_file_llseek(struct file *, loff_t, int);
ssize_t nfs_file_read(struct kiocb *, struct iov_iter *);
-int nfs_file_mmap(struct file *, struct vm_area_struct *);
+ssize_t nfs_file_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe,
+ size_t len, unsigned int flags);
+int nfs_file_mmap_prepare(struct vm_area_desc *);
ssize_t nfs_file_write(struct kiocb *, struct iov_iter *);
int nfs_file_release(struct inode *, struct file *);
int nfs_lock(struct file *, int, struct file_lock *);
int nfs_flock(struct file *, int, struct file_lock *);
int nfs_check_flags(int);
+void nfs_truncate_last_folio(struct address_space *mapping, loff_t from,
+ loff_t to);
/* inode.c */
extern struct workqueue_struct *nfsiod_workqueue;
+extern struct workqueue_struct *nfslocaliod_workqueue;
extern struct inode *nfs_alloc_inode(struct super_block *sb);
extern void nfs_free_inode(struct inode *);
extern int nfs_write_inode(struct inode *, struct writeback_control *);
@@ -436,6 +454,63 @@ extern void nfs_set_cache_invalid(struct inode *inode, unsigned long flags);
extern bool nfs_check_cache_invalid(struct inode *, unsigned long);
extern int nfs_wait_bit_killable(struct wait_bit_key *key, int mode);
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+/* localio.c */
+struct nfs_local_dio {
+ u32 mem_align;
+ u32 offset_align;
+ loff_t middle_offset;
+ loff_t end_offset;
+ ssize_t start_len; /* Length for misaligned first extent */
+ ssize_t middle_len; /* Length for DIO-aligned middle extent */
+ ssize_t end_len; /* Length for misaligned last extent */
+};
+
+extern void nfs_local_probe_async(struct nfs_client *);
+extern void nfs_local_probe_async_work(struct work_struct *);
+extern struct nfsd_file *nfs_local_open_fh(struct nfs_client *,
+ const struct cred *,
+ struct nfs_fh *,
+ struct nfs_file_localio *,
+ const fmode_t);
+extern int nfs_local_doio(struct nfs_client *,
+ struct nfsd_file *,
+ struct nfs_pgio_header *,
+ const struct rpc_call_ops *);
+extern int nfs_local_commit(struct nfsd_file *,
+ struct nfs_commit_data *,
+ const struct rpc_call_ops *, int);
+extern bool nfs_server_is_local(const struct nfs_client *clp);
+
+#else /* CONFIG_NFS_LOCALIO */
+static inline void nfs_local_probe(struct nfs_client *clp) {}
+static inline void nfs_local_probe_async(struct nfs_client *clp) {}
+static inline struct nfsd_file *
+nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred,
+ struct nfs_fh *fh, struct nfs_file_localio *nfl,
+ const fmode_t mode)
+{
+ return NULL;
+}
+static inline int nfs_local_doio(struct nfs_client *clp,
+ struct nfsd_file *localio,
+ struct nfs_pgio_header *hdr,
+ const struct rpc_call_ops *call_ops)
+{
+ return -EINVAL;
+}
+static inline int nfs_local_commit(struct nfsd_file *localio,
+ struct nfs_commit_data *data,
+ const struct rpc_call_ops *call_ops, int how)
+{
+ return -EINVAL;
+}
+static inline bool nfs_server_is_local(const struct nfs_client *clp)
+{
+ return false;
+}
+#endif /* CONFIG_NFS_LOCALIO */
+
/* super.c */
extern const struct super_operations nfs_sops;
bool nfs_auth_info_match(const struct nfs_auth_info *, rpc_authflavor_t);
@@ -443,8 +518,6 @@ int nfs_try_get_tree(struct fs_context *);
int nfs_get_tree_common(struct fs_context *);
void nfs_kill_super(struct super_block *);
-extern struct rpc_stat nfs_rpcstat;
-
extern int __init register_nfs_fs(void);
extern void __exit unregister_nfs_fs(void);
extern bool nfs_sb_active(struct super_block *sb);
@@ -452,12 +525,16 @@ extern void nfs_sb_deactive(struct super_block *sb);
extern int nfs_client_for_each_server(struct nfs_client *clp,
int (*fn)(struct nfs_server *, void *),
void *data);
+#ifdef CONFIG_NFS_FSCACHE
+extern const struct netfs_request_ops nfs_netfs_ops;
+#endif
+
/* io.c */
-extern void nfs_start_io_read(struct inode *inode);
+extern __must_check int nfs_start_io_read(struct inode *inode);
extern void nfs_end_io_read(struct inode *inode);
-extern void nfs_start_io_write(struct inode *inode);
+extern __must_check int nfs_start_io_write(struct inode *inode);
extern void nfs_end_io_write(struct inode *inode);
-extern void nfs_start_io_direct(struct inode *inode);
+extern __must_check int nfs_start_io_direct(struct inode *inode);
extern void nfs_end_io_direct(struct inode *inode);
static inline bool nfs_file_io_is_buffered(struct nfs_inode *nfsi)
@@ -465,6 +542,16 @@ static inline bool nfs_file_io_is_buffered(struct nfs_inode *nfsi)
return test_bit(NFS_INO_ODIRECT, &nfsi->flags) == 0;
}
+/* Must be called with exclusively locked inode->i_rwsem */
+static inline void nfs_file_block_o_direct(struct nfs_inode *nfsi)
+{
+ if (test_bit(NFS_INO_ODIRECT, &nfsi->flags)) {
+ clear_bit(NFS_INO_ODIRECT, &nfsi->flags);
+ inode_dio_wait(&nfsi->vfs_inode);
+ }
+}
+
+
/* namespace.c */
#define NFS_PATH_CANONICAL 1
extern char *nfs_path(char **p, struct dentry *dentry,
@@ -481,10 +568,15 @@ extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool
struct nfs_pgio_completion_ops;
/* read.c */
+extern const struct nfs_pgio_completion_ops nfs_async_read_completion_ops;
extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
struct inode *inode, bool force_mds,
const struct nfs_pgio_completion_ops *compl_ops);
-extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
+extern bool nfs_read_alloc_scratch(struct nfs_pgio_header *hdr, size_t size);
+extern int nfs_read_add_folio(struct nfs_pageio_descriptor *pgio,
+ struct nfs_open_context *ctx,
+ struct folio *folio);
+extern void nfs_pageio_complete_read(struct nfs_pageio_descriptor *pgio);
extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
/* super.c */
@@ -507,7 +599,8 @@ extern int nfs_initiate_commit(struct rpc_clnt *clnt,
struct nfs_commit_data *data,
const struct nfs_rpc_ops *nfs_ops,
const struct rpc_call_ops *call_ops,
- int how, int flags);
+ int how, int flags,
+ struct nfsd_file *localio);
extern void nfs_init_commit(struct nfs_commit_data *data,
struct list_head *head,
struct pnfs_layout_segment *lseg,
@@ -599,9 +692,12 @@ nfs_write_match_verf(const struct nfs_writeverf *verf,
static inline gfp_t nfs_io_gfp_mask(void)
{
- if (current->flags & PF_WQ_WORKER)
- return GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN;
- return GFP_KERNEL;
+ gfp_t ret = current_gfp_context(GFP_KERNEL);
+
+ /* For workers __GFP_NORETRY only with __GFP_IO or __GFP_FS */
+ if ((current->flags & PF_WQ_WORKER) && ret == GFP_KERNEL)
+ ret |= __GFP_NORETRY | __GFP_NOWARN;
+ return ret;
}
/*
@@ -639,7 +735,7 @@ extern int nfs_sillyrename(struct inode *dir, struct dentry *dentry);
/* direct.c */
void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
struct nfs_direct_req *dreq);
-extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq);
+extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq, loff_t offset);
/* nfs4proc.c */
extern struct nfs_client *nfs4_init_client(struct nfs_client *clp,
@@ -696,9 +792,9 @@ unsigned long nfs_block_bits(unsigned long bsize, unsigned char *nrbitsp)
if ((bsize & (bsize - 1)) || nrbitsp) {
unsigned char nrbits;
- for (nrbits = 31; nrbits && !(bsize & (1 << nrbits)); nrbits--)
+ for (nrbits = 31; nrbits && !(bsize & (1UL << nrbits)); nrbits--)
;
- bsize = 1 << nrbits;
+ bsize = 1UL << nrbits;
if (nrbitsp)
*nrbitsp = nrbits;
}
@@ -760,17 +856,18 @@ void nfs_super_set_maxbytes(struct super_block *sb, __u64 maxfilesize)
* Record the page as unstable (an extra writeback period) and mark its
* inode as dirty.
*/
-static inline
-void nfs_mark_page_unstable(struct page *page, struct nfs_commit_info *cinfo)
+static inline void nfs_folio_mark_unstable(struct folio *folio,
+ struct nfs_commit_info *cinfo)
{
- if (!cinfo->dreq) {
- struct inode *inode = page_file_mapping(page)->host;
+ if (folio && !cinfo->dreq) {
+ struct inode *inode = folio->mapping->host;
+ long nr = folio_nr_pages(folio);
/* This page is really still in write-back - just that the
* writeback is happening on the server now.
*/
- inc_node_page_state(page, NR_WRITEBACK);
- inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
+ node_stat_mod_folio(folio, NR_WRITEBACK, nr);
+ wb_stat_mod(&inode_to_bdi(inode)->wb, WB_WRITEBACK, nr);
__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
}
}
@@ -778,18 +875,17 @@ void nfs_mark_page_unstable(struct page *page, struct nfs_commit_info *cinfo)
/*
* Determine the number of bytes of data the page contains
*/
-static inline
-unsigned int nfs_page_length(struct page *page)
+static inline size_t nfs_folio_length(struct folio *folio)
{
- loff_t i_size = i_size_read(page_file_mapping(page)->host);
+ loff_t i_size = i_size_read(folio->mapping->host);
if (i_size > 0) {
- pgoff_t index = page_index(page);
- pgoff_t end_index = (i_size - 1) >> PAGE_SHIFT;
+ pgoff_t index = folio->index >> folio_order(folio);
+ pgoff_t end_index = (i_size - 1) >> folio_shift(folio);
if (index < end_index)
- return PAGE_SIZE;
+ return folio_size(folio);
if (index == end_index)
- return ((i_size - 1) & ~PAGE_MASK) + 1;
+ return offset_in_folio(folio, i_size - 1) + 1;
}
return 0;
}
@@ -807,11 +903,10 @@ unsigned char nfs_umode_to_dtype(umode_t mode)
* Determine the number of pages in an array of length 'len' and
* with a base offset of 'base'
*/
-static inline
-unsigned int nfs_page_array_len(unsigned int base, size_t len)
+static inline unsigned int nfs_page_array_len(unsigned int base, size_t len)
{
- return ((unsigned long)len + (unsigned long)base +
- PAGE_SIZE - 1) >> PAGE_SHIFT;
+ return ((unsigned long)len + (unsigned long)base + PAGE_SIZE - 1) >>
+ PAGE_SHIFT;
}
/*
@@ -827,33 +922,16 @@ u64 nfs_timespec_to_change_attr(const struct timespec64 *ts)
return ((u64)ts->tv_sec << 30) + ts->tv_nsec;
}
-#ifdef CONFIG_CRC32
-/**
- * nfs_fhandle_hash - calculate the crc32 hash for the filehandle
- * @fh - pointer to filehandle
- *
- * returns a crc32 hash for the filehandle that is compatible with
- * the one displayed by "wireshark".
- */
-static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh)
-{
- return ~crc32_le(0xFFFFFFFF, &fh->data[0], fh->size);
-}
static inline u32 nfs_stateid_hash(const nfs4_stateid *stateid)
{
return ~crc32_le(0xFFFFFFFF, &stateid->other[0],
NFS4_STATEID_OTHER_SIZE);
}
-#else
-static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh)
-{
- return 0;
-}
-static inline u32 nfs_stateid_hash(nfs4_stateid *stateid)
+
+static inline bool nfs_current_task_exiting(void)
{
- return 0;
+ return (current->flags & PF_EXITING) != 0;
}
-#endif
static inline bool nfs_error_is_fatal(int err)
{
@@ -917,7 +995,6 @@ struct nfs_direct_req {
loff_t io_start; /* Start offset for I/O */
ssize_t count, /* bytes actually processed */
max_count, /* max expected count */
- bytes_left, /* bytes left to be sent */
error; /* any reported error */
struct completion completion; /* wait for i/o completion */
diff --git a/fs/nfs/io.c b/fs/nfs/io.c
index b5551ed8f648..d275b0a250bf 100644
--- a/fs/nfs/io.c
+++ b/fs/nfs/io.c
@@ -14,15 +14,6 @@
#include "internal.h"
-/* Call with exclusively locked inode->i_rwsem */
-static void nfs_block_o_direct(struct nfs_inode *nfsi, struct inode *inode)
-{
- if (test_bit(NFS_INO_ODIRECT, &nfsi->flags)) {
- clear_bit(NFS_INO_ODIRECT, &nfsi->flags);
- inode_dio_wait(inode);
- }
-}
-
/**
* nfs_start_io_read - declare the file is being used for buffered reads
* @inode: file inode
@@ -39,19 +30,28 @@ static void nfs_block_o_direct(struct nfs_inode *nfsi, struct inode *inode)
* Note that buffered writes and truncates both take a write lock on
* inode->i_rwsem, meaning that those are serialised w.r.t. the reads.
*/
-void
+int
nfs_start_io_read(struct inode *inode)
{
struct nfs_inode *nfsi = NFS_I(inode);
+ int err;
+
/* Be an optimist! */
- down_read(&inode->i_rwsem);
+ err = down_read_killable(&inode->i_rwsem);
+ if (err)
+ return err;
if (test_bit(NFS_INO_ODIRECT, &nfsi->flags) == 0)
- return;
+ return 0;
up_read(&inode->i_rwsem);
+
/* Slow path.... */
- down_write(&inode->i_rwsem);
- nfs_block_o_direct(nfsi, inode);
+ err = down_write_killable(&inode->i_rwsem);
+ if (err)
+ return err;
+ nfs_file_block_o_direct(nfsi);
downgrade_write(&inode->i_rwsem);
+
+ return 0;
}
/**
@@ -74,11 +74,15 @@ nfs_end_io_read(struct inode *inode)
* Declare that a buffered read operation is about to start, and ensure
* that we block all direct I/O.
*/
-void
+int
nfs_start_io_write(struct inode *inode)
{
- down_write(&inode->i_rwsem);
- nfs_block_o_direct(NFS_I(inode), inode);
+ int err;
+
+ err = down_write_killable(&inode->i_rwsem);
+ if (!err)
+ nfs_file_block_o_direct(NFS_I(inode));
+ return err;
}
/**
@@ -119,19 +123,28 @@ static void nfs_block_buffered(struct nfs_inode *nfsi, struct inode *inode)
* Note that buffered writes and truncates both take a write lock on
* inode->i_rwsem, meaning that those are serialised w.r.t. O_DIRECT.
*/
-void
+int
nfs_start_io_direct(struct inode *inode)
{
struct nfs_inode *nfsi = NFS_I(inode);
+ int err;
+
/* Be an optimist! */
- down_read(&inode->i_rwsem);
+ err = down_read_killable(&inode->i_rwsem);
+ if (err)
+ return err;
if (test_bit(NFS_INO_ODIRECT, &nfsi->flags) != 0)
- return;
+ return 0;
up_read(&inode->i_rwsem);
+
/* Slow path.... */
- down_write(&inode->i_rwsem);
+ err = down_write_killable(&inode->i_rwsem);
+ if (err)
+ return err;
nfs_block_buffered(nfsi, inode);
downgrade_write(&inode->i_rwsem);
+
+ return 0;
}
/**
diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h
index 2ddaab1ac653..49862c95b224 100644
--- a/fs/nfs/iostat.h
+++ b/fs/nfs/iostat.h
@@ -17,9 +17,6 @@
struct nfs_iostats {
unsigned long long bytes[__NFSIOS_BYTESMAX];
-#ifdef CONFIG_NFS_FSCACHE
- unsigned long long fscache[__NFSIOS_FSCACHEMAX];
-#endif
unsigned long events[__NFSIOS_COUNTSMAX];
} ____cacheline_aligned;
@@ -49,24 +46,11 @@ static inline void nfs_add_stats(const struct inode *inode,
nfs_add_server_stats(NFS_SERVER(inode), stat, addend);
}
-#ifdef CONFIG_NFS_FSCACHE
-static inline void nfs_add_fscache_stats(struct inode *inode,
- enum nfs_stat_fscachecounters stat,
- long addend)
-{
- this_cpu_add(NFS_SERVER(inode)->io_stats->fscache[stat], addend);
-}
-static inline void nfs_inc_fscache_stats(struct inode *inode,
- enum nfs_stat_fscachecounters stat)
-{
- this_cpu_inc(NFS_SERVER(inode)->io_stats->fscache[stat]);
-}
-#endif
-
-static inline struct nfs_iostats __percpu *nfs_alloc_iostats(void)
-{
- return alloc_percpu(struct nfs_iostats);
-}
+/*
+ * This specialized allocator has to be a macro for its allocations to be
+ * accounted separately (to have a separate alloc_tag).
+ */
+#define nfs_alloc_iostats() alloc_percpu(struct nfs_iostats)
static inline void nfs_free_iostats(struct nfs_iostats __percpu *stats)
{
diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c
new file mode 100644
index 000000000000..f33bfa7b58e6
--- /dev/null
+++ b/fs/nfs/localio.c
@@ -0,0 +1,1072 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * NFS client support for local clients to bypass network stack
+ *
+ * Copyright (C) 2014 Weston Andros Adamson <dros@primarydata.com>
+ * Copyright (C) 2019 Trond Myklebust <trond.myklebust@hammerspace.com>
+ * Copyright (C) 2024 Mike Snitzer <snitzer@hammerspace.com>
+ * Copyright (C) 2024 NeilBrown <neilb@suse.de>
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/vfs.h>
+#include <linux/file.h>
+#include <linux/inet.h>
+#include <linux/sunrpc/addr.h>
+#include <linux/inetdevice.h>
+#include <net/addrconf.h>
+#include <linux/nfs_common.h>
+#include <linux/nfslocalio.h>
+#include <linux/bvec.h>
+
+#include <linux/nfs.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_xdr.h>
+
+#include "internal.h"
+#include "pnfs.h"
+#include "nfstrace.h"
+
+#define NFSDBG_FACILITY NFSDBG_VFS
+
+#define NFSLOCAL_MAX_IOS 3
+
+struct nfs_local_kiocb {
+ struct kiocb kiocb;
+ struct bio_vec *bvec;
+ struct nfs_pgio_header *hdr;
+ struct work_struct work;
+ void (*aio_complete_work)(struct work_struct *);
+ struct nfsd_file *localio;
+ /* Begin mostly DIO-specific members */
+ size_t end_len;
+ short int end_iter_index;
+ atomic_t n_iters;
+ bool iter_is_dio_aligned[NFSLOCAL_MAX_IOS];
+ struct iov_iter iters[NFSLOCAL_MAX_IOS] ____cacheline_aligned;
+ /* End mostly DIO-specific members */
+};
+
+struct nfs_local_fsync_ctx {
+ struct nfsd_file *localio;
+ struct nfs_commit_data *data;
+ struct work_struct work;
+ struct completion *done;
+};
+
+static bool localio_enabled __read_mostly = true;
+module_param(localio_enabled, bool, 0644);
+
+static inline bool nfs_client_is_local(const struct nfs_client *clp)
+{
+ return !!rcu_access_pointer(clp->cl_uuid.net);
+}
+
+bool nfs_server_is_local(const struct nfs_client *clp)
+{
+ return nfs_client_is_local(clp) && localio_enabled;
+}
+EXPORT_SYMBOL_GPL(nfs_server_is_local);
+
+/*
+ * UUID_IS_LOCAL XDR functions
+ */
+
+static void localio_xdr_enc_uuidargs(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const void *data)
+{
+ const u8 *uuid = data;
+
+ encode_opaque_fixed(xdr, uuid, UUID_SIZE);
+}
+
+static int localio_xdr_dec_uuidres(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ void *result)
+{
+ /* void return */
+ return 0;
+}
+
+static const struct rpc_procinfo nfs_localio_procedures[] = {
+ [LOCALIOPROC_UUID_IS_LOCAL] = {
+ .p_proc = LOCALIOPROC_UUID_IS_LOCAL,
+ .p_encode = localio_xdr_enc_uuidargs,
+ .p_decode = localio_xdr_dec_uuidres,
+ .p_arglen = XDR_QUADLEN(UUID_SIZE),
+ .p_replen = 0,
+ .p_statidx = LOCALIOPROC_UUID_IS_LOCAL,
+ .p_name = "UUID_IS_LOCAL",
+ },
+};
+
+static unsigned int nfs_localio_counts[ARRAY_SIZE(nfs_localio_procedures)];
+static const struct rpc_version nfslocalio_version1 = {
+ .number = 1,
+ .nrprocs = ARRAY_SIZE(nfs_localio_procedures),
+ .procs = nfs_localio_procedures,
+ .counts = nfs_localio_counts,
+};
+
+static const struct rpc_version *nfslocalio_version[] = {
+ [1] = &nfslocalio_version1,
+};
+
+extern const struct rpc_program nfslocalio_program;
+static struct rpc_stat nfslocalio_rpcstat = { &nfslocalio_program };
+
+const struct rpc_program nfslocalio_program = {
+ .name = "nfslocalio",
+ .number = NFS_LOCALIO_PROGRAM,
+ .nrvers = ARRAY_SIZE(nfslocalio_version),
+ .version = nfslocalio_version,
+ .stats = &nfslocalio_rpcstat,
+};
+
+/*
+ * nfs_init_localioclient - Initialise an NFS localio client connection
+ */
+static struct rpc_clnt *nfs_init_localioclient(struct nfs_client *clp)
+{
+ struct rpc_clnt *rpcclient_localio;
+
+ rpcclient_localio = rpc_bind_new_program(clp->cl_rpcclient,
+ &nfslocalio_program, 1);
+
+ dprintk_rcu("%s: server (%s) %s NFS LOCALIO.\n",
+ __func__, rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR),
+ (IS_ERR(rpcclient_localio) ? "does not support" : "supports"));
+
+ return rpcclient_localio;
+}
+
+static bool nfs_server_uuid_is_local(struct nfs_client *clp)
+{
+ u8 uuid[UUID_SIZE];
+ struct rpc_message msg = {
+ .rpc_argp = &uuid,
+ };
+ struct rpc_clnt *rpcclient_localio;
+ int status;
+
+ rpcclient_localio = nfs_init_localioclient(clp);
+ if (IS_ERR(rpcclient_localio))
+ return false;
+
+ export_uuid(uuid, &clp->cl_uuid.uuid);
+
+ msg.rpc_proc = &nfs_localio_procedures[LOCALIOPROC_UUID_IS_LOCAL];
+ status = rpc_call_sync(rpcclient_localio, &msg, 0);
+ dprintk("%s: NFS reply UUID_IS_LOCAL: status=%d\n",
+ __func__, status);
+ rpc_shutdown_client(rpcclient_localio);
+
+ /* Server is only local if it initialized required struct members */
+ if (status || !rcu_access_pointer(clp->cl_uuid.net) || !clp->cl_uuid.dom)
+ return false;
+
+ return true;
+}
+
+/*
+ * nfs_local_probe - probe local i/o support for an nfs_server and nfs_client
+ * - called after alloc_client and init_client (so cl_rpcclient exists)
+ * - this function is idempotent, it can be called for old or new clients
+ */
+static void nfs_local_probe(struct nfs_client *clp)
+{
+ /* Disallow localio if disabled via sysfs or AUTH_SYS isn't used */
+ if (!localio_enabled ||
+ clp->cl_rpcclient->cl_auth->au_flavor != RPC_AUTH_UNIX) {
+ nfs_localio_disable_client(clp);
+ return;
+ }
+
+ if (nfs_client_is_local(clp))
+ return;
+
+ if (!nfs_uuid_begin(&clp->cl_uuid))
+ return;
+ if (nfs_server_uuid_is_local(clp))
+ nfs_localio_enable_client(clp);
+ nfs_uuid_end(&clp->cl_uuid);
+}
+
+void nfs_local_probe_async_work(struct work_struct *work)
+{
+ struct nfs_client *clp =
+ container_of(work, struct nfs_client, cl_local_probe_work);
+
+ if (!refcount_inc_not_zero(&clp->cl_count))
+ return;
+ nfs_local_probe(clp);
+ nfs_put_client(clp);
+}
+
+void nfs_local_probe_async(struct nfs_client *clp)
+{
+ queue_work(nfsiod_workqueue, &clp->cl_local_probe_work);
+}
+EXPORT_SYMBOL_GPL(nfs_local_probe_async);
+
+static inline void nfs_local_file_put(struct nfsd_file *localio)
+{
+ /* nfs_to_nfsd_file_put_local() expects an __rcu pointer
+ * but we have a __kernel pointer. It is always safe
+ * to cast a __kernel pointer to an __rcu pointer
+ * because the cast only weakens what is known about the pointer.
+ */
+ struct nfsd_file __rcu *nf = (struct nfsd_file __rcu*) localio;
+
+ nfs_to_nfsd_file_put_local(&nf);
+}
+
+/*
+ * __nfs_local_open_fh - open a local filehandle in terms of nfsd_file.
+ *
+ * Returns a pointer to a struct nfsd_file or ERR_PTR.
+ * Caller must release returned nfsd_file with nfs_to_nfsd_file_put_local().
+ */
+static struct nfsd_file *
+__nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred,
+ struct nfs_fh *fh, struct nfs_file_localio *nfl,
+ struct nfsd_file __rcu **pnf,
+ const fmode_t mode)
+{
+ int status = 0;
+ struct nfsd_file *localio;
+
+ localio = nfs_open_local_fh(&clp->cl_uuid, clp->cl_rpcclient,
+ cred, fh, nfl, pnf, mode);
+ if (IS_ERR(localio)) {
+ status = PTR_ERR(localio);
+ switch (status) {
+ case -ENOMEM:
+ case -ENXIO:
+ case -ENOENT:
+ /* Revalidate localio */
+ nfs_localio_disable_client(clp);
+ nfs_local_probe(clp);
+ }
+ }
+ trace_nfs_local_open_fh(fh, mode, status);
+ return localio;
+}
+
+/*
+ * nfs_local_open_fh - open a local filehandle in terms of nfsd_file.
+ * First checking if the open nfsd_file is already cached, otherwise
+ * must __nfs_local_open_fh and insert the nfsd_file in nfs_file_localio.
+ *
+ * Returns a pointer to a struct nfsd_file or NULL.
+ */
+struct nfsd_file *
+nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred,
+ struct nfs_fh *fh, struct nfs_file_localio *nfl,
+ const fmode_t mode)
+{
+ struct nfsd_file *nf, __rcu **pnf;
+
+ if (!nfs_server_is_local(clp))
+ return NULL;
+ if (mode & ~(FMODE_READ | FMODE_WRITE))
+ return NULL;
+
+ if (mode & FMODE_WRITE)
+ pnf = &nfl->rw_file;
+ else
+ pnf = &nfl->ro_file;
+
+ nf = __nfs_local_open_fh(clp, cred, fh, nfl, pnf, mode);
+ if (IS_ERR(nf))
+ return NULL;
+ return nf;
+}
+EXPORT_SYMBOL_GPL(nfs_local_open_fh);
+
+static void
+nfs_local_iocb_free(struct nfs_local_kiocb *iocb)
+{
+ kfree(iocb->bvec);
+ kfree(iocb);
+}
+
+static struct nfs_local_kiocb *
+nfs_local_iocb_alloc(struct nfs_pgio_header *hdr,
+ struct file *file, gfp_t flags)
+{
+ struct nfs_local_kiocb *iocb;
+
+ iocb = kzalloc(sizeof(*iocb), flags);
+ if (iocb == NULL)
+ return NULL;
+
+ iocb->bvec = kmalloc_array(hdr->page_array.npages,
+ sizeof(struct bio_vec), flags);
+ if (iocb->bvec == NULL) {
+ kfree(iocb);
+ return NULL;
+ }
+
+ init_sync_kiocb(&iocb->kiocb, file);
+
+ iocb->hdr = hdr;
+ iocb->kiocb.ki_pos = hdr->args.offset;
+ iocb->kiocb.ki_flags &= ~IOCB_APPEND;
+ iocb->kiocb.ki_complete = NULL;
+ iocb->aio_complete_work = NULL;
+
+ iocb->end_iter_index = -1;
+
+ return iocb;
+}
+
+static bool
+nfs_is_local_dio_possible(struct nfs_local_kiocb *iocb, int rw,
+ size_t len, struct nfs_local_dio *local_dio)
+{
+ struct nfs_pgio_header *hdr = iocb->hdr;
+ loff_t offset = hdr->args.offset;
+ u32 nf_dio_mem_align, nf_dio_offset_align, nf_dio_read_offset_align;
+ loff_t start_end, orig_end, middle_end;
+
+ nfs_to->nfsd_file_dio_alignment(iocb->localio, &nf_dio_mem_align,
+ &nf_dio_offset_align, &nf_dio_read_offset_align);
+ if (rw == ITER_DEST)
+ nf_dio_offset_align = nf_dio_read_offset_align;
+
+ if (unlikely(!nf_dio_mem_align || !nf_dio_offset_align))
+ return false;
+ if (unlikely(nf_dio_offset_align > PAGE_SIZE))
+ return false;
+ if (unlikely(len < nf_dio_offset_align))
+ return false;
+
+ local_dio->mem_align = nf_dio_mem_align;
+ local_dio->offset_align = nf_dio_offset_align;
+
+ start_end = round_up(offset, nf_dio_offset_align);
+ orig_end = offset + len;
+ middle_end = round_down(orig_end, nf_dio_offset_align);
+
+ local_dio->middle_offset = start_end;
+ local_dio->end_offset = middle_end;
+
+ local_dio->start_len = start_end - offset;
+ local_dio->middle_len = middle_end - start_end;
+ local_dio->end_len = orig_end - middle_end;
+
+ if (rw == ITER_DEST)
+ trace_nfs_local_dio_read(hdr->inode, offset, len, local_dio);
+ else
+ trace_nfs_local_dio_write(hdr->inode, offset, len, local_dio);
+ return true;
+}
+
+static bool nfs_iov_iter_aligned_bvec(const struct iov_iter *i,
+ unsigned int addr_mask, unsigned int len_mask)
+{
+ const struct bio_vec *bvec = i->bvec;
+ size_t skip = i->iov_offset;
+ size_t size = i->count;
+
+ if (size & len_mask)
+ return false;
+ do {
+ size_t len = bvec->bv_len;
+
+ if (len > size)
+ len = size;
+ if ((unsigned long)(bvec->bv_offset + skip) & addr_mask)
+ return false;
+ bvec++;
+ size -= len;
+ skip = 0;
+ } while (size);
+
+ return true;
+}
+
+static void
+nfs_local_iter_setup(struct iov_iter *iter, int rw, struct bio_vec *bvec,
+ unsigned int nvecs, unsigned long total,
+ size_t start, size_t len)
+{
+ iov_iter_bvec(iter, rw, bvec, nvecs, total);
+ if (start)
+ iov_iter_advance(iter, start);
+ iov_iter_truncate(iter, len);
+}
+
+/*
+ * Setup as many as 3 iov_iter based on extents described by @local_dio.
+ * Returns the number of iov_iter that were setup.
+ */
+static int
+nfs_local_iters_setup_dio(struct nfs_local_kiocb *iocb, int rw,
+ unsigned int nvecs, unsigned long total,
+ struct nfs_local_dio *local_dio)
+{
+ int n_iters = 0;
+ struct iov_iter *iters = iocb->iters;
+
+ /* Setup misaligned start? */
+ if (local_dio->start_len) {
+ nfs_local_iter_setup(&iters[n_iters], rw, iocb->bvec,
+ nvecs, total, 0, local_dio->start_len);
+ ++n_iters;
+ }
+
+ /*
+ * Setup DIO-aligned middle, if there is no misaligned end (below)
+ * then AIO completion is used, see nfs_local_call_{read,write}
+ */
+ nfs_local_iter_setup(&iters[n_iters], rw, iocb->bvec, nvecs,
+ total, local_dio->start_len, local_dio->middle_len);
+
+ iocb->iter_is_dio_aligned[n_iters] =
+ nfs_iov_iter_aligned_bvec(&iters[n_iters],
+ local_dio->mem_align-1, local_dio->offset_align-1);
+
+ if (unlikely(!iocb->iter_is_dio_aligned[n_iters])) {
+ trace_nfs_local_dio_misaligned(iocb->hdr->inode,
+ local_dio->start_len, local_dio->middle_len, local_dio);
+ return 0; /* no DIO-aligned IO possible */
+ }
+ iocb->end_iter_index = n_iters;
+ ++n_iters;
+
+ /* Setup misaligned end? */
+ if (local_dio->end_len) {
+ nfs_local_iter_setup(&iters[n_iters], rw, iocb->bvec,
+ nvecs, total, local_dio->start_len +
+ local_dio->middle_len, local_dio->end_len);
+ iocb->end_iter_index = n_iters;
+ ++n_iters;
+ }
+
+ atomic_set(&iocb->n_iters, n_iters);
+ return n_iters;
+}
+
+static noinline_for_stack void
+nfs_local_iters_init(struct nfs_local_kiocb *iocb, int rw)
+{
+ struct nfs_pgio_header *hdr = iocb->hdr;
+ struct page **pagevec = hdr->page_array.pagevec;
+ unsigned long v, total;
+ unsigned int base;
+ size_t len;
+
+ v = 0;
+ total = hdr->args.count;
+ base = hdr->args.pgbase;
+ while (total && v < hdr->page_array.npages) {
+ len = min_t(size_t, total, PAGE_SIZE - base);
+ bvec_set_page(&iocb->bvec[v], *pagevec, len, base);
+ total -= len;
+ ++pagevec;
+ ++v;
+ base = 0;
+ }
+ len = hdr->args.count - total;
+
+ /*
+ * For each iocb, iocb->n_iters is always at least 1 and we always
+ * end io after first nfs_local_pgio_done call unless misaligned DIO.
+ */
+ atomic_set(&iocb->n_iters, 1);
+
+ if (test_bit(NFS_IOHDR_ODIRECT, &hdr->flags)) {
+ struct nfs_local_dio local_dio;
+
+ if (nfs_is_local_dio_possible(iocb, rw, len, &local_dio) &&
+ nfs_local_iters_setup_dio(iocb, rw, v, len, &local_dio) != 0) {
+ /* Ensure DIO WRITE's IO on stable storage upon completion */
+ if (rw == ITER_SOURCE)
+ iocb->kiocb.ki_flags |= IOCB_DSYNC|IOCB_SYNC;
+ return; /* is DIO-aligned */
+ }
+ }
+
+ /* Use buffered IO */
+ iov_iter_bvec(&iocb->iters[0], rw, iocb->bvec, v, len);
+}
+
+static void
+nfs_local_hdr_release(struct nfs_pgio_header *hdr,
+ const struct rpc_call_ops *call_ops)
+{
+ call_ops->rpc_call_done(&hdr->task, hdr);
+ call_ops->rpc_release(hdr);
+}
+
+static void
+nfs_local_pgio_init(struct nfs_pgio_header *hdr,
+ const struct rpc_call_ops *call_ops)
+{
+ hdr->task.tk_ops = call_ops;
+ if (!hdr->task.tk_start)
+ hdr->task.tk_start = ktime_get();
+}
+
+static bool
+nfs_local_pgio_done(struct nfs_local_kiocb *iocb, long status, bool force)
+{
+ struct nfs_pgio_header *hdr = iocb->hdr;
+
+ /* Must handle partial completions */
+ if (status >= 0) {
+ hdr->res.count += status;
+ /* @hdr was initialized to 0 (zeroed during allocation) */
+ if (hdr->task.tk_status == 0)
+ hdr->res.op_status = NFS4_OK;
+ } else {
+ hdr->res.op_status = nfs_localio_errno_to_nfs4_stat(status);
+ hdr->task.tk_status = status;
+ }
+
+ if (force)
+ return true;
+
+ BUG_ON(atomic_read(&iocb->n_iters) <= 0);
+ return atomic_dec_and_test(&iocb->n_iters);
+}
+
+static void
+nfs_local_iocb_release(struct nfs_local_kiocb *iocb)
+{
+ nfs_local_file_put(iocb->localio);
+ nfs_local_iocb_free(iocb);
+}
+
+static void
+nfs_local_pgio_release(struct nfs_local_kiocb *iocb)
+{
+ struct nfs_pgio_header *hdr = iocb->hdr;
+
+ nfs_local_iocb_release(iocb);
+ nfs_local_hdr_release(hdr, hdr->task.tk_ops);
+}
+
+/*
+ * Complete the I/O from iocb->kiocb.ki_complete()
+ *
+ * Note that this function can be called from a bottom half context,
+ * hence we need to queue the rpc_call_done() etc to a workqueue
+ */
+static inline void nfs_local_pgio_aio_complete(struct nfs_local_kiocb *iocb)
+{
+ INIT_WORK(&iocb->work, iocb->aio_complete_work);
+ queue_work(nfsiod_workqueue, &iocb->work);
+}
+
+static void nfs_local_read_done(struct nfs_local_kiocb *iocb)
+{
+ struct nfs_pgio_header *hdr = iocb->hdr;
+ struct file *filp = iocb->kiocb.ki_filp;
+ long status = hdr->task.tk_status;
+
+ if ((iocb->kiocb.ki_flags & IOCB_DIRECT) && status == -EINVAL) {
+ /* Underlying FS will return -EINVAL if misaligned DIO is attempted. */
+ pr_info_ratelimited("nfs: Unexpected direct I/O read alignment failure\n");
+ }
+
+ /*
+ * Must clear replen otherwise NFSv3 data corruption will occur
+ * if/when switching from LOCALIO back to using normal RPC.
+ */
+ hdr->res.replen = 0;
+
+ /* nfs_readpage_result() handles short read */
+
+ if (hdr->args.offset + hdr->res.count >= i_size_read(file_inode(filp)))
+ hdr->res.eof = true;
+
+ dprintk("%s: read %ld bytes eof %d.\n", __func__,
+ status > 0 ? status : 0, hdr->res.eof);
+}
+
+static inline void nfs_local_read_iocb_done(struct nfs_local_kiocb *iocb)
+{
+ nfs_local_read_done(iocb);
+ nfs_local_pgio_release(iocb);
+}
+
+static void nfs_local_read_aio_complete_work(struct work_struct *work)
+{
+ struct nfs_local_kiocb *iocb =
+ container_of(work, struct nfs_local_kiocb, work);
+
+ nfs_local_read_iocb_done(iocb);
+}
+
+static void nfs_local_read_aio_complete(struct kiocb *kiocb, long ret)
+{
+ struct nfs_local_kiocb *iocb =
+ container_of(kiocb, struct nfs_local_kiocb, kiocb);
+
+ /* AIO completion of DIO read should always be last to complete */
+ if (unlikely(!nfs_local_pgio_done(iocb, ret, false)))
+ return;
+
+ nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_read_aio_complete_work */
+}
+
+static void nfs_local_call_read(struct work_struct *work)
+{
+ struct nfs_local_kiocb *iocb =
+ container_of(work, struct nfs_local_kiocb, work);
+ struct file *filp = iocb->kiocb.ki_filp;
+ bool force_done = false;
+ ssize_t status;
+ int n_iters;
+
+ n_iters = atomic_read(&iocb->n_iters);
+ for (int i = 0; i < n_iters ; i++) {
+ if (iocb->iter_is_dio_aligned[i]) {
+ iocb->kiocb.ki_flags |= IOCB_DIRECT;
+ /* Only use AIO completion if DIO-aligned segment is last */
+ if (i == iocb->end_iter_index) {
+ iocb->kiocb.ki_complete = nfs_local_read_aio_complete;
+ iocb->aio_complete_work = nfs_local_read_aio_complete_work;
+ }
+ } else
+ iocb->kiocb.ki_flags &= ~IOCB_DIRECT;
+
+ scoped_with_creds(filp->f_cred)
+ status = filp->f_op->read_iter(&iocb->kiocb, &iocb->iters[i]);
+
+ if (status != -EIOCBQUEUED) {
+ if (unlikely(status >= 0 && status < iocb->iters[i].count))
+ force_done = true; /* Partial read */
+ if (nfs_local_pgio_done(iocb, status, force_done)) {
+ nfs_local_read_iocb_done(iocb);
+ break;
+ }
+ }
+ }
+}
+
+static int
+nfs_local_do_read(struct nfs_local_kiocb *iocb,
+ const struct rpc_call_ops *call_ops)
+{
+ struct nfs_pgio_header *hdr = iocb->hdr;
+
+ dprintk("%s: vfs_read count=%u pos=%llu\n",
+ __func__, hdr->args.count, hdr->args.offset);
+
+ nfs_local_pgio_init(hdr, call_ops);
+ hdr->res.eof = false;
+
+ INIT_WORK(&iocb->work, nfs_local_call_read);
+ queue_work(nfslocaliod_workqueue, &iocb->work);
+
+ return 0;
+}
+
+static void
+nfs_copy_boot_verifier(struct nfs_write_verifier *verifier, struct inode *inode)
+{
+ struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+ u32 *verf = (u32 *)verifier->data;
+ unsigned int seq;
+
+ do {
+ seq = read_seqbegin(&clp->cl_boot_lock);
+ verf[0] = (u32)clp->cl_nfssvc_boot.tv_sec;
+ verf[1] = (u32)clp->cl_nfssvc_boot.tv_nsec;
+ } while (read_seqretry(&clp->cl_boot_lock, seq));
+}
+
+static void
+nfs_reset_boot_verifier(struct inode *inode)
+{
+ struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+
+ write_seqlock(&clp->cl_boot_lock);
+ ktime_get_real_ts64(&clp->cl_nfssvc_boot);
+ write_sequnlock(&clp->cl_boot_lock);
+}
+
+static void
+nfs_set_local_verifier(struct inode *inode,
+ struct nfs_writeverf *verf,
+ enum nfs3_stable_how how)
+{
+ nfs_copy_boot_verifier(&verf->verifier, inode);
+ verf->committed = how;
+}
+
+/* Factored out from fs/nfsd/vfs.h:fh_getattr() */
+static int __vfs_getattr(const struct path *p, struct kstat *stat, int version)
+{
+ u32 request_mask = STATX_BASIC_STATS;
+
+ if (version == 4)
+ request_mask |= (STATX_BTIME | STATX_CHANGE_COOKIE);
+ return vfs_getattr(p, stat, request_mask, AT_STATX_SYNC_AS_STAT);
+}
+
+/* Copied from fs/nfsd/nfsfh.c:nfsd4_change_attribute() */
+static u64 __nfsd4_change_attribute(const struct kstat *stat,
+ const struct inode *inode)
+{
+ u64 chattr;
+
+ if (stat->result_mask & STATX_CHANGE_COOKIE) {
+ chattr = stat->change_cookie;
+ if (S_ISREG(inode->i_mode) &&
+ !(stat->attributes & STATX_ATTR_CHANGE_MONOTONIC)) {
+ chattr += (u64)stat->ctime.tv_sec << 30;
+ chattr += stat->ctime.tv_nsec;
+ }
+ } else {
+ chattr = time_to_chattr(&stat->ctime);
+ }
+ return chattr;
+}
+
+static void nfs_local_vfs_getattr(struct nfs_local_kiocb *iocb)
+{
+ struct kstat stat;
+ struct file *filp = iocb->kiocb.ki_filp;
+ struct nfs_pgio_header *hdr = iocb->hdr;
+ struct nfs_fattr *fattr = hdr->res.fattr;
+ int version = NFS_PROTO(hdr->inode)->version;
+
+ if (unlikely(!fattr) || __vfs_getattr(&filp->f_path, &stat, version))
+ return;
+
+ fattr->valid = (NFS_ATTR_FATTR_FILEID |
+ NFS_ATTR_FATTR_CHANGE |
+ NFS_ATTR_FATTR_SIZE |
+ NFS_ATTR_FATTR_ATIME |
+ NFS_ATTR_FATTR_MTIME |
+ NFS_ATTR_FATTR_CTIME |
+ NFS_ATTR_FATTR_SPACE_USED);
+
+ fattr->fileid = stat.ino;
+ fattr->size = stat.size;
+ fattr->atime = stat.atime;
+ fattr->mtime = stat.mtime;
+ fattr->ctime = stat.ctime;
+ if (version == 4) {
+ fattr->change_attr =
+ __nfsd4_change_attribute(&stat, file_inode(filp));
+ } else
+ fattr->change_attr = nfs_timespec_to_change_attr(&fattr->ctime);
+ fattr->du.nfs3.used = stat.blocks << 9;
+}
+
+static void nfs_local_write_done(struct nfs_local_kiocb *iocb)
+{
+ struct nfs_pgio_header *hdr = iocb->hdr;
+ long status = hdr->task.tk_status;
+
+ dprintk("%s: wrote %ld bytes.\n", __func__, status > 0 ? status : 0);
+
+ if ((iocb->kiocb.ki_flags & IOCB_DIRECT) && status == -EINVAL) {
+ /* Underlying FS will return -EINVAL if misaligned DIO is attempted. */
+ pr_info_ratelimited("nfs: Unexpected direct I/O write alignment failure\n");
+ }
+
+ /* Handle short writes as if they are ENOSPC */
+ status = hdr->res.count;
+ if (status > 0 && status < hdr->args.count) {
+ hdr->mds_offset += status;
+ hdr->args.offset += status;
+ hdr->args.pgbase += status;
+ hdr->args.count -= status;
+ nfs_set_pgio_error(hdr, -ENOSPC, hdr->args.offset);
+ status = -ENOSPC;
+ /* record -ENOSPC in terms of nfs_local_pgio_done */
+ (void) nfs_local_pgio_done(iocb, status, true);
+ }
+ if (hdr->task.tk_status < 0)
+ nfs_reset_boot_verifier(hdr->inode);
+}
+
+static inline void nfs_local_write_iocb_done(struct nfs_local_kiocb *iocb)
+{
+ nfs_local_write_done(iocb);
+ nfs_local_vfs_getattr(iocb);
+ nfs_local_pgio_release(iocb);
+}
+
+static void nfs_local_write_aio_complete_work(struct work_struct *work)
+{
+ struct nfs_local_kiocb *iocb =
+ container_of(work, struct nfs_local_kiocb, work);
+
+ nfs_local_write_iocb_done(iocb);
+}
+
+static void nfs_local_write_aio_complete(struct kiocb *kiocb, long ret)
+{
+ struct nfs_local_kiocb *iocb =
+ container_of(kiocb, struct nfs_local_kiocb, kiocb);
+
+ /* AIO completion of DIO write should always be last to complete */
+ if (unlikely(!nfs_local_pgio_done(iocb, ret, false)))
+ return;
+
+ nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_write_aio_complete_work */
+}
+
+static void nfs_local_call_write(struct work_struct *work)
+{
+ struct nfs_local_kiocb *iocb =
+ container_of(work, struct nfs_local_kiocb, work);
+ struct file *filp = iocb->kiocb.ki_filp;
+ unsigned long old_flags = current->flags;
+ bool force_done = false;
+ ssize_t status;
+ int n_iters;
+
+ current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO;
+
+ file_start_write(filp);
+ n_iters = atomic_read(&iocb->n_iters);
+ for (int i = 0; i < n_iters ; i++) {
+ if (iocb->iter_is_dio_aligned[i]) {
+ iocb->kiocb.ki_flags |= IOCB_DIRECT;
+ /* Only use AIO completion if DIO-aligned segment is last */
+ if (i == iocb->end_iter_index) {
+ iocb->kiocb.ki_complete = nfs_local_write_aio_complete;
+ iocb->aio_complete_work = nfs_local_write_aio_complete_work;
+ }
+ } else
+ iocb->kiocb.ki_flags &= ~IOCB_DIRECT;
+
+ scoped_with_creds(filp->f_cred)
+ status = filp->f_op->write_iter(&iocb->kiocb, &iocb->iters[i]);
+
+ if (status != -EIOCBQUEUED) {
+ if (unlikely(status >= 0 && status < iocb->iters[i].count))
+ force_done = true; /* Partial write */
+ if (nfs_local_pgio_done(iocb, status, force_done)) {
+ nfs_local_write_iocb_done(iocb);
+ break;
+ }
+ }
+ }
+ file_end_write(filp);
+
+ current->flags = old_flags;
+}
+
+static int
+nfs_local_do_write(struct nfs_local_kiocb *iocb,
+ const struct rpc_call_ops *call_ops)
+{
+ struct nfs_pgio_header *hdr = iocb->hdr;
+
+ dprintk("%s: vfs_write count=%u pos=%llu %s\n",
+ __func__, hdr->args.count, hdr->args.offset,
+ (hdr->args.stable == NFS_UNSTABLE) ? "unstable" : "stable");
+
+ switch (hdr->args.stable) {
+ default:
+ break;
+ case NFS_DATA_SYNC:
+ iocb->kiocb.ki_flags |= IOCB_DSYNC;
+ break;
+ case NFS_FILE_SYNC:
+ iocb->kiocb.ki_flags |= IOCB_DSYNC|IOCB_SYNC;
+ }
+
+ nfs_local_pgio_init(hdr, call_ops);
+
+ nfs_set_local_verifier(hdr->inode, hdr->res.verf, hdr->args.stable);
+
+ INIT_WORK(&iocb->work, nfs_local_call_write);
+ queue_work(nfslocaliod_workqueue, &iocb->work);
+
+ return 0;
+}
+
+static struct nfs_local_kiocb *
+nfs_local_iocb_init(struct nfs_pgio_header *hdr, struct nfsd_file *localio)
+{
+ struct file *file = nfs_to->nfsd_file_file(localio);
+ struct nfs_local_kiocb *iocb;
+ gfp_t gfp_mask;
+ int rw;
+
+ if (hdr->rw_mode & FMODE_READ) {
+ if (!file->f_op->read_iter)
+ return ERR_PTR(-EOPNOTSUPP);
+ gfp_mask = GFP_KERNEL;
+ rw = ITER_DEST;
+ } else {
+ if (!file->f_op->write_iter)
+ return ERR_PTR(-EOPNOTSUPP);
+ gfp_mask = GFP_NOIO;
+ rw = ITER_SOURCE;
+ }
+
+ iocb = nfs_local_iocb_alloc(hdr, file, gfp_mask);
+ if (iocb == NULL)
+ return ERR_PTR(-ENOMEM);
+ iocb->hdr = hdr;
+ iocb->localio = localio;
+
+ nfs_local_iters_init(iocb, rw);
+
+ return iocb;
+}
+
+int nfs_local_doio(struct nfs_client *clp, struct nfsd_file *localio,
+ struct nfs_pgio_header *hdr,
+ const struct rpc_call_ops *call_ops)
+{
+ struct nfs_local_kiocb *iocb;
+ int status = 0;
+
+ if (!hdr->args.count)
+ return 0;
+
+ iocb = nfs_local_iocb_init(hdr, localio);
+ if (IS_ERR(iocb))
+ return PTR_ERR(iocb);
+
+ switch (hdr->rw_mode) {
+ case FMODE_READ:
+ status = nfs_local_do_read(iocb, call_ops);
+ break;
+ case FMODE_WRITE:
+ status = nfs_local_do_write(iocb, call_ops);
+ break;
+ default:
+ dprintk("%s: invalid mode: %d\n", __func__,
+ hdr->rw_mode);
+ status = -EOPNOTSUPP;
+ }
+
+ if (status != 0) {
+ if (status == -EAGAIN)
+ nfs_localio_disable_client(clp);
+ nfs_local_iocb_release(iocb);
+ hdr->task.tk_status = status;
+ nfs_local_hdr_release(hdr, call_ops);
+ }
+ return status;
+}
+
+static void
+nfs_local_init_commit(struct nfs_commit_data *data,
+ const struct rpc_call_ops *call_ops)
+{
+ data->task.tk_ops = call_ops;
+}
+
+static int
+nfs_local_run_commit(struct file *filp, struct nfs_commit_data *data)
+{
+ loff_t start = data->args.offset;
+ loff_t end = LLONG_MAX;
+
+ if (data->args.count > 0) {
+ end = start + data->args.count - 1;
+ if (end < start)
+ end = LLONG_MAX;
+ }
+
+ dprintk("%s: commit %llu - %llu\n", __func__, start, end);
+ return vfs_fsync_range(filp, start, end, 0);
+}
+
+static void
+nfs_local_commit_done(struct nfs_commit_data *data, int status)
+{
+ if (status >= 0) {
+ nfs_set_local_verifier(data->inode,
+ data->res.verf,
+ NFS_FILE_SYNC);
+ data->res.op_status = NFS4_OK;
+ data->task.tk_status = 0;
+ } else {
+ nfs_reset_boot_verifier(data->inode);
+ data->res.op_status = nfs_localio_errno_to_nfs4_stat(status);
+ data->task.tk_status = status;
+ }
+}
+
+static void
+nfs_local_release_commit_data(struct nfsd_file *localio,
+ struct nfs_commit_data *data,
+ const struct rpc_call_ops *call_ops)
+{
+ nfs_local_file_put(localio);
+ call_ops->rpc_call_done(&data->task, data);
+ call_ops->rpc_release(data);
+}
+
+static void
+nfs_local_fsync_ctx_free(struct nfs_local_fsync_ctx *ctx)
+{
+ nfs_local_release_commit_data(ctx->localio, ctx->data,
+ ctx->data->task.tk_ops);
+ kfree(ctx);
+}
+
+static void
+nfs_local_fsync_work(struct work_struct *work)
+{
+ struct nfs_local_fsync_ctx *ctx;
+ int status;
+
+ ctx = container_of(work, struct nfs_local_fsync_ctx, work);
+
+ status = nfs_local_run_commit(nfs_to->nfsd_file_file(ctx->localio),
+ ctx->data);
+ nfs_local_commit_done(ctx->data, status);
+ if (ctx->done != NULL)
+ complete(ctx->done);
+ nfs_local_fsync_ctx_free(ctx);
+}
+
+static struct nfs_local_fsync_ctx *
+nfs_local_fsync_ctx_alloc(struct nfs_commit_data *data,
+ struct nfsd_file *localio, gfp_t flags)
+{
+ struct nfs_local_fsync_ctx *ctx = kmalloc(sizeof(*ctx), flags);
+
+ if (ctx != NULL) {
+ ctx->localio = localio;
+ ctx->data = data;
+ INIT_WORK(&ctx->work, nfs_local_fsync_work);
+ ctx->done = NULL;
+ }
+ return ctx;
+}
+
+int nfs_local_commit(struct nfsd_file *localio,
+ struct nfs_commit_data *data,
+ const struct rpc_call_ops *call_ops, int how)
+{
+ struct nfs_local_fsync_ctx *ctx;
+
+ ctx = nfs_local_fsync_ctx_alloc(data, localio, GFP_KERNEL);
+ if (!ctx) {
+ nfs_local_commit_done(data, -ENOMEM);
+ nfs_local_release_commit_data(localio, data, call_ops);
+ return -ENOMEM;
+ }
+
+ nfs_local_init_commit(data, call_ops);
+
+ if (how & FLUSH_SYNC) {
+ DECLARE_COMPLETION_ONSTACK(done);
+ ctx->done = &done;
+ queue_work(nfsiod_workqueue, &ctx->work);
+ wait_for_completion(&done);
+ } else
+ queue_work(nfsiod_workqueue, &ctx->work);
+
+ return 0;
+}
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 68e76b626371..db8dfb920394 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -128,11 +128,6 @@ struct mountres {
rpc_authflavor_t *auth_flavors;
};
-struct mnt_fhstatus {
- u32 status;
- struct nfs_fh *fh;
-};
-
/**
* nfs_mount - Obtain an NFS file handle for the given host and path
* @info: pointer to mount request arguments
@@ -228,74 +223,6 @@ out_mnt_err:
goto out;
}
-/**
- * nfs_umount - Notify a server that we have unmounted this export
- * @info: pointer to umount request arguments
- *
- * MOUNTPROC_UMNT is advisory, so we set a short timeout, and always
- * use UDP.
- */
-void nfs_umount(const struct nfs_mount_request *info)
-{
- static const struct rpc_timeout nfs_umnt_timeout = {
- .to_initval = 1 * HZ,
- .to_maxval = 3 * HZ,
- .to_retries = 2,
- };
- struct rpc_create_args args = {
- .net = info->net,
- .protocol = IPPROTO_UDP,
- .address = (struct sockaddr *)info->sap,
- .addrsize = info->salen,
- .timeout = &nfs_umnt_timeout,
- .servername = info->hostname,
- .program = &mnt_program,
- .version = info->version,
- .authflavor = RPC_AUTH_UNIX,
- .flags = RPC_CLNT_CREATE_NOPING,
- .cred = current_cred(),
- };
- struct rpc_message msg = {
- .rpc_argp = info->dirpath,
- };
- struct rpc_clnt *clnt;
- int status;
-
- if (strlen(info->dirpath) > MNTPATHLEN)
- return;
-
- if (info->noresvport)
- args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
-
- clnt = rpc_create(&args);
- if (IS_ERR(clnt))
- goto out_clnt_err;
-
- dprintk("NFS: sending UMNT request for %s:%s\n",
- (info->hostname ? info->hostname : "server"), info->dirpath);
-
- if (info->version == NFS_MNT3_VERSION)
- msg.rpc_proc = &clnt->cl_procinfo[MOUNTPROC3_UMNT];
- else
- msg.rpc_proc = &clnt->cl_procinfo[MOUNTPROC_UMNT];
-
- status = rpc_call_sync(clnt, &msg, 0);
- rpc_shutdown_client(clnt);
-
- if (unlikely(status < 0))
- goto out_call_err;
-
- return;
-
-out_clnt_err:
- dprintk("NFS: failed to create UMNT RPC client, status=%ld\n",
- PTR_ERR(clnt));
- return;
-
-out_call_err:
- dprintk("NFS: UMNT request failed, status=%d\n", status);
-}
-
/*
* XDR encode/decode functions for MOUNT
*/
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index b0ef7e7ddb30..5a4d193da1a9 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -182,7 +182,7 @@ struct vfsmount *nfs_d_automount(struct path *path)
ctx->version = client->rpc_ops->version;
ctx->minorversion = client->cl_minorversion;
ctx->nfs_mod = client->cl_nfs_mod;
- __module_get(ctx->nfs_mod->owner);
+ get_nfs_version(ctx->nfs_mod);
ret = client->rpc_ops->submount(fc, server);
if (ret < 0) {
@@ -195,7 +195,6 @@ struct vfsmount *nfs_d_automount(struct path *path)
if (IS_ERR(mnt))
goto out_fc;
- mntget(mnt); /* prevent immediate expiration */
if (timeout <= 0)
goto out_fc;
@@ -208,23 +207,24 @@ out_fc:
}
static int
-nfs_namespace_getattr(struct user_namespace *mnt_userns,
+nfs_namespace_getattr(struct mnt_idmap *idmap,
const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags)
{
if (NFS_FH(d_inode(path->dentry))->size != 0)
- return nfs_getattr(mnt_userns, path, stat, request_mask,
+ return nfs_getattr(idmap, path, stat, request_mask,
query_flags);
- generic_fillattr(&init_user_ns, d_inode(path->dentry), stat);
+ generic_fillattr(&nop_mnt_idmap, request_mask, d_inode(path->dentry),
+ stat);
return 0;
}
static int
-nfs_namespace_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+nfs_namespace_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr)
{
if (NFS_FH(d_inode(dentry))->size != 0)
- return nfs_setattr(mnt_userns, dentry, attr);
+ return nfs_setattr(idmap, dentry, attr);
return -EACCES;
}
@@ -290,7 +290,8 @@ int nfs_do_submount(struct fs_context *fc)
nfs_errorf(fc, "NFS: Couldn't determine submount pathname");
ret = PTR_ERR(p);
} else {
- ret = vfs_parse_fs_string(fc, "source", p, buffer + 4096 - p);
+ ret = vfs_parse_fs_qstr(fc, "source",
+ &QSTR_LEN(p, buffer + 4096 - p));
if (!ret)
ret = vfs_get_tree(fc);
}
@@ -307,7 +308,7 @@ int nfs_submount(struct fs_context *fc, struct nfs_server *server)
int err;
/* Look it up again to get its attributes */
- err = server->nfs_client->rpc_ops->lookup(d_inode(parent), dentry,
+ err = server->nfs_client->rpc_ops->lookup(d_inode(parent), dentry, &dentry->d_name,
ctx->mntfh, ctx->clone_data.fattr);
dput(parent);
if (err != 0)
@@ -335,7 +336,7 @@ static int param_set_nfs_timeout(const char *val, const struct kernel_param *kp)
num *= HZ;
*((int *)kp->arg) = num;
if (!list_empty(&nfs_automount_list))
- mod_delayed_work(system_wq, &nfs_automount_task, num);
+ mod_delayed_work(system_percpu_wq, &nfs_automount_task, num);
} else {
*((int *)kp->arg) = -1*HZ;
cancel_delayed_work(&nfs_automount_task);
diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h
index c8374f74dce1..6ba3ea39e928 100644
--- a/fs/nfs/netns.h
+++ b/fs/nfs/netns.h
@@ -9,6 +9,7 @@
#include <linux/nfs4.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
+#include <linux/sunrpc/stats.h>
struct bl_dev_msg {
int32_t status;
@@ -30,10 +31,15 @@ struct nfs_net {
unsigned short nfs_callback_tcpport;
unsigned short nfs_callback_tcpport6;
int cb_users[NFS4_MAX_MINOR_VERSION + 1];
-#endif
+#endif /* CONFIG_NFS_V4 */
+#if IS_ENABLED(CONFIG_NFS_V4_1)
+ struct list_head nfs4_data_server_cache;
+ spinlock_t nfs4_data_server_lock;
+#endif /* CONFIG_NFS_V4_1 */
struct nfs_netns_client *nfs_client;
spinlock_t nfs_client_lock;
ktime_t boot_time;
+ struct rpc_stat rpcstats;
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *proc_nfsfs;
#endif
diff --git a/fs/nfs/nfs.h b/fs/nfs/nfs.h
index 5ba00610aede..8a5f51be013a 100644
--- a/fs/nfs/nfs.h
+++ b/fs/nfs/nfs.h
@@ -18,11 +18,11 @@ struct nfs_subversion {
const struct rpc_version *rpc_vers; /* NFS version information */
const struct nfs_rpc_ops *rpc_ops; /* NFS operations */
const struct super_operations *sops; /* NFS Super operations */
- const struct xattr_handler **xattr; /* NFS xattr handlers */
- struct list_head list; /* List of NFS versions */
+ const struct xattr_handler * const *xattr; /* NFS xattr handlers */
};
-struct nfs_subversion *get_nfs_version(unsigned int);
+struct nfs_subversion *find_nfs_version(unsigned int);
+int get_nfs_version(struct nfs_subversion *);
void put_nfs_version(struct nfs_subversion *);
void register_nfs_version(struct nfs_subversion *);
void unregister_nfs_version(struct nfs_subversion *);
diff --git a/fs/nfs/nfs2super.c b/fs/nfs/nfs2super.c
index 467f21ee6a35..b1badc70bd71 100644
--- a/fs/nfs/nfs2super.c
+++ b/fs/nfs/nfs2super.c
@@ -26,6 +26,7 @@ static void __exit exit_nfs_v2(void)
unregister_nfs_version(&nfs_v2);
}
+MODULE_DESCRIPTION("NFSv2 client support");
MODULE_LICENSE("GPL");
module_init(init_nfs_v2);
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 05c3b4b2b3dd..9eff09158518 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -22,14 +22,12 @@
#include <linux/nfs.h>
#include <linux/nfs2.h>
#include <linux/nfs_fs.h>
-#include "nfstrace.h"
+#include <linux/nfs_common.h>
#include "internal.h"
+#include "nfstrace.h"
#define NFSDBG_FACILITY NFSDBG_XDR
-/* Mapping from NFS error code to "errno" error code. */
-#define errno_NFSERR_IO EIO
-
/*
* Declare the space requirements for NFS arguments and replies as
* number of 32bit-words
@@ -64,8 +62,6 @@
#define NFS_readdirres_sz (1+NFS_pagepad_sz)
#define NFS_statfsres_sz (1+NFS_info_sz)
-static int nfs_stat_to_errno(enum nfs_stat);
-
/*
* Encode/decode NFSv2 basic data types
*
@@ -949,7 +945,7 @@ int nfs2_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
error = decode_filename_inline(xdr, &entry->name, &entry->len);
if (unlikely(error))
- return -EAGAIN;
+ return error == -ENAMETOOLONG ? -ENAMETOOLONG : -EAGAIN;
/*
* The type (size and byte order) of nfscookie isn't defined in
@@ -1054,70 +1050,6 @@ out_default:
return nfs_stat_to_errno(status);
}
-
-/*
- * We need to translate between nfs status return values and
- * the local errno values which may not be the same.
- */
-static const struct {
- int stat;
- int errno;
-} nfs_errtbl[] = {
- { NFS_OK, 0 },
- { NFSERR_PERM, -EPERM },
- { NFSERR_NOENT, -ENOENT },
- { NFSERR_IO, -errno_NFSERR_IO},
- { NFSERR_NXIO, -ENXIO },
-/* { NFSERR_EAGAIN, -EAGAIN }, */
- { NFSERR_ACCES, -EACCES },
- { NFSERR_EXIST, -EEXIST },
- { NFSERR_XDEV, -EXDEV },
- { NFSERR_NODEV, -ENODEV },
- { NFSERR_NOTDIR, -ENOTDIR },
- { NFSERR_ISDIR, -EISDIR },
- { NFSERR_INVAL, -EINVAL },
- { NFSERR_FBIG, -EFBIG },
- { NFSERR_NOSPC, -ENOSPC },
- { NFSERR_ROFS, -EROFS },
- { NFSERR_MLINK, -EMLINK },
- { NFSERR_NAMETOOLONG, -ENAMETOOLONG },
- { NFSERR_NOTEMPTY, -ENOTEMPTY },
- { NFSERR_DQUOT, -EDQUOT },
- { NFSERR_STALE, -ESTALE },
- { NFSERR_REMOTE, -EREMOTE },
-#ifdef EWFLUSH
- { NFSERR_WFLUSH, -EWFLUSH },
-#endif
- { NFSERR_BADHANDLE, -EBADHANDLE },
- { NFSERR_NOT_SYNC, -ENOTSYNC },
- { NFSERR_BAD_COOKIE, -EBADCOOKIE },
- { NFSERR_NOTSUPP, -ENOTSUPP },
- { NFSERR_TOOSMALL, -ETOOSMALL },
- { NFSERR_SERVERFAULT, -EREMOTEIO },
- { NFSERR_BADTYPE, -EBADTYPE },
- { NFSERR_JUKEBOX, -EJUKEBOX },
- { -1, -EIO }
-};
-
-/**
- * nfs_stat_to_errno - convert an NFS status code to a local errno
- * @status: NFS status code to convert
- *
- * Returns a local errno value, or -EIO if the NFS status code is
- * not recognized. This function is used jointly by NFSv2 and NFSv3.
- */
-static int nfs_stat_to_errno(enum nfs_stat status)
-{
- int i;
-
- for (i = 0; nfs_errtbl[i].stat != -1; i++) {
- if (nfs_errtbl[i].stat == (int)status)
- return nfs_errtbl[i].errno;
- }
- dprintk("NFS: Unrecognized nfs status value: %u\n", status);
- return nfs_errtbl[i].errno;
-}
-
#define PROC(proc, argtype, restype, timer) \
[NFSPROC_##proc] = { \
.p_proc = NFSPROC_##proc, \
diff --git a/fs/nfs/nfs3_fs.h b/fs/nfs/nfs3_fs.h
index df9ca56db347..b333ea119ef5 100644
--- a/fs/nfs/nfs3_fs.h
+++ b/fs/nfs/nfs3_fs.h
@@ -12,12 +12,11 @@
*/
#ifdef CONFIG_NFS_V3_ACL
extern struct posix_acl *nfs3_get_acl(struct inode *inode, int type, bool rcu);
-extern int nfs3_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
+extern int nfs3_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
struct posix_acl *acl, int type);
extern int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
struct posix_acl *dfacl);
extern ssize_t nfs3_listxattr(struct dentry *, char *, size_t);
-extern const struct xattr_handler *nfs3_xattr_handlers[];
#else
static inline int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
struct posix_acl *dfacl)
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 74d11e3c4205..a126eb31f62f 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -21,9 +21,8 @@ static void nfs3_prepare_get_acl(struct posix_acl **p)
{
struct posix_acl *sentinel = uncached_acl_sentinel(current);
- if (cmpxchg(p, ACL_NOT_CACHED, sentinel) != ACL_NOT_CACHED) {
- /* Not the first reader or sentinel already in place. */
- }
+ /* If the ACL isn't being read yet, set our sentinel. */
+ cmpxchg(p, ACL_NOT_CACHED, sentinel);
}
static void nfs3_complete_get_acl(struct posix_acl **p, struct posix_acl *acl)
@@ -105,7 +104,7 @@ struct posix_acl *nfs3_get_acl(struct inode *inode, int type, bool rcu)
switch (status) {
case 0:
- status = nfs_refresh_inode(inode, res.fattr);
+ nfs_refresh_inode(inode, res.fattr);
break;
case -EPFNOSUPPORT:
case -EPROTONOSUPPORT:
@@ -255,7 +254,7 @@ int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
}
-int nfs3_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
+int nfs3_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
struct posix_acl *acl, int type)
{
struct posix_acl *orig = acl, *dfacl = NULL, *alloc;
@@ -300,12 +299,6 @@ fail:
goto out;
}
-const struct xattr_handler *nfs3_xattr_handlers[] = {
- &posix_acl_access_xattr_handler,
- &posix_acl_default_xattr_handler,
- NULL,
-};
-
static int
nfs3_list_one_acl(struct inode *inode, int type, const char *name, void *data,
size_t size, ssize_t *result)
diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c
index 669cda757a5c..5d97c1d38bb6 100644
--- a/fs/nfs/nfs3client.c
+++ b/fs/nfs/nfs3client.c
@@ -2,8 +2,11 @@
#include <linux/nfs_fs.h>
#include <linux/nfs_mount.h>
#include <linux/sunrpc/addr.h>
+#include <net/handshake.h>
#include "internal.h"
#include "nfs3_fs.h"
+#include "netns.h"
+#include "sysfs.h"
#ifdef CONFIG_NFS_V3_ACL
static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program };
@@ -31,6 +34,8 @@ static void nfs_init_server_aclclient(struct nfs_server *server)
if (IS_ERR(server->client_acl))
goto out_noacl;
+ nfs_sysfs_link_rpc_client(server, server->client_acl, NULL);
+
/* No errors! Assume that Sun nfsacls are supported */
server->caps |= NFS_CAP_ACLS;
return;
@@ -82,6 +87,7 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv,
int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans)
{
struct rpc_timeout ds_timeout;
+ unsigned long connect_timeout = ds_timeo * (ds_retrans + 1) * HZ / 10;
struct nfs_client *mds_clp = mds_srv->nfs_client;
struct nfs_client_initdata cl_init = {
.addr = ds_addr,
@@ -93,6 +99,13 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv,
.net = mds_clp->cl_net,
.timeparms = &ds_timeout,
.cred = mds_srv->cred,
+ .xprtsec = {
+ .policy = RPC_XPRTSEC_NONE,
+ .cert_serial = TLS_NO_CERT,
+ .privkey_serial = TLS_NO_PRIVKEY,
+ },
+ .connect_timeout = connect_timeout,
+ .reconnect_timeout = connect_timeout,
};
struct nfs_client *clp;
char buf[INET6_ADDRSTRLEN + 1];
@@ -102,11 +115,23 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv,
return ERR_PTR(-EINVAL);
cl_init.hostname = buf;
- if (mds_clp->cl_nconnect > 1 && ds_proto == XPRT_TRANSPORT_TCP)
- cl_init.nconnect = mds_clp->cl_nconnect;
+ switch (ds_proto) {
+ case XPRT_TRANSPORT_TCP_TLS:
+ if (mds_clp->cl_xprtsec.policy != RPC_XPRTSEC_NONE)
+ cl_init.xprtsec = mds_clp->cl_xprtsec;
+ else
+ ds_proto = XPRT_TRANSPORT_TCP;
+ fallthrough;
+ case XPRT_TRANSPORT_RDMA:
+ case XPRT_TRANSPORT_TCP:
+ if (mds_clp->cl_nconnect > 1)
+ cl_init.nconnect = mds_clp->cl_nconnect;
+ }
if (mds_srv->flags & NFS_MOUNT_NORESVPORT)
__set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
+ if (test_bit(NFS_CS_NETUNREACH_FATAL, &mds_clp->cl_flags))
+ __set_bit(NFS_CS_NETUNREACH_FATAL, &cl_init.init_flags);
__set_bit(NFS_CS_DS, &cl_init.init_flags);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 4bf208a0a8e9..a4cb67573aa7 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -39,7 +39,7 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
__set_current_state(TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
schedule_timeout(NFS_JUKEBOX_RETRY_TIME);
res = -ERESTARTSYS;
- } while (!fatal_signal_pending(current));
+ } while (!fatal_signal_pending(current) && !nfs_current_task_exiting());
return res;
}
@@ -192,7 +192,7 @@ __nfs3_proc_lookup(struct inode *dir, const char *name, size_t len,
}
static int
-nfs3_proc_lookup(struct inode *dir, struct dentry *dentry,
+nfs3_proc_lookup(struct inode *dir, struct dentry *dentry, const struct qstr *name,
struct nfs_fh *fhandle, struct nfs_fattr *fattr)
{
unsigned short task_flags = 0;
@@ -202,8 +202,7 @@ nfs3_proc_lookup(struct inode *dir, struct dentry *dentry,
task_flags |= RPC_TASK_TIMEOUT;
dprintk("NFS call lookup %pd2\n", dentry);
- return __nfs3_proc_lookup(dir, dentry->d_name.name,
- dentry->d_name.len, fhandle, fattr,
+ return __nfs3_proc_lookup(dir, name->name, name->len, fhandle, fattr,
task_flags);
}
@@ -543,9 +542,10 @@ out:
}
static int
-nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
+nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct folio *folio,
unsigned int len, struct iattr *sattr)
{
+ struct page *page = &folio->page;
struct nfs3_createdata *data;
struct dentry *d_alias;
int status = -ENOMEM;
@@ -578,13 +578,13 @@ out:
return status;
}
-static int
+static struct dentry *
nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
{
struct posix_acl *default_acl, *acl;
struct nfs3_createdata *data;
- struct dentry *d_alias;
- int status = -ENOMEM;
+ struct dentry *ret = ERR_PTR(-ENOMEM);
+ int status;
dprintk("NFS call mkdir %pd\n", dentry);
@@ -592,8 +592,9 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
if (data == NULL)
goto out;
- status = posix_acl_create(dir, &sattr->ia_mode, &default_acl, &acl);
- if (status)
+ ret = ERR_PTR(posix_acl_create(dir, &sattr->ia_mode,
+ &default_acl, &acl));
+ if (IS_ERR(ret))
goto out;
data->msg.rpc_proc = &nfs3_procedures[NFS3PROC_MKDIR];
@@ -602,25 +603,27 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
data->arg.mkdir.len = dentry->d_name.len;
data->arg.mkdir.sattr = sattr;
- d_alias = nfs3_do_create(dir, dentry, data);
- status = PTR_ERR_OR_ZERO(d_alias);
+ ret = nfs3_do_create(dir, dentry, data);
- if (status != 0)
+ if (IS_ERR(ret))
goto out_release_acls;
- if (d_alias)
- dentry = d_alias;
+ if (ret)
+ dentry = ret;
status = nfs3_proc_setacls(d_inode(dentry), acl, default_acl);
+ if (status) {
+ dput(ret);
+ ret = ERR_PTR(status);
+ }
- dput(d_alias);
out_release_acls:
posix_acl_release(acl);
posix_acl_release(default_acl);
out:
nfs3_free_createdata(data);
- dprintk("NFS reply mkdir: %d\n", status);
- return status;
+ dprintk("NFS reply mkdir: %d\n", PTR_ERR_OR_ZERO(ret));
+ return ret;
}
static int
@@ -843,6 +846,41 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
return status;
}
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+
+static unsigned nfs3_localio_probe_throttle __read_mostly = 0;
+module_param(nfs3_localio_probe_throttle, uint, 0644);
+MODULE_PARM_DESC(nfs3_localio_probe_throttle,
+ "Probe for NFSv3 LOCALIO every N IO requests. Must be power-of-2, defaults to 0 (probing disabled).");
+
+static void nfs3_localio_probe(struct nfs_server *server)
+{
+ struct nfs_client *clp = server->nfs_client;
+
+ /* Throttled to reduce nfs_local_probe_async() frequency */
+ if (!nfs3_localio_probe_throttle || nfs_server_is_local(clp))
+ return;
+
+ /*
+ * Try (re)enabling LOCALIO if isn't enabled -- admin deems
+ * it worthwhile to periodically check if LOCALIO possible by
+ * setting the 'nfs3_localio_probe_throttle' module parameter.
+ *
+ * This is useful if LOCALIO was previously enabled, but was
+ * disabled due to server restart, and IO has successfully
+ * completed in terms of normal RPC.
+ */
+ if ((clp->cl_uuid.nfs3_localio_probe_count++ &
+ (nfs3_localio_probe_throttle - 1)) == 0) {
+ if (!nfs_server_is_local(clp))
+ nfs_local_probe_async(clp);
+ }
+}
+
+#else
+static void nfs3_localio_probe(struct nfs_server *server) {}
+#endif
+
static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
{
struct inode *inode = hdr->inode;
@@ -854,8 +892,11 @@ static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
if (nfs3_async_handle_jukebox(task, inode))
return -EAGAIN;
- if (task->tk_status >= 0 && !server->read_hdrsize)
- cmpxchg(&server->read_hdrsize, 0, hdr->res.replen);
+ if (task->tk_status >= 0) {
+ if (!server->read_hdrsize)
+ cmpxchg(&server->read_hdrsize, 0, hdr->res.replen);
+ nfs3_localio_probe(server);
+ }
nfs_invalidate_atime(inode);
nfs_refresh_inode(inode, &hdr->fattr);
@@ -885,8 +926,10 @@ static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
if (nfs3_async_handle_jukebox(task, inode))
return -EAGAIN;
- if (task->tk_status >= 0)
+ if (task->tk_status >= 0) {
nfs_writeback_update_inode(hdr);
+ nfs3_localio_probe(NFS_SERVER(inode));
+ }
return 0;
}
@@ -962,7 +1005,7 @@ nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
struct nfs_open_context *ctx = nfs_file_open_context(filp);
int status;
- if (fl->fl_flags & FL_CLOSE) {
+ if (fl->c.flc_flags & FL_CLOSE) {
l_ctx = nfs_get_lock_context(ctx);
if (IS_ERR(l_ctx))
l_ctx = NULL;
@@ -978,13 +1021,21 @@ nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
return status;
}
-static int nfs3_have_delegation(struct inode *inode, fmode_t flags)
+static int nfs3_have_delegation(struct inode *inode, fmode_t type, int flags)
+{
+ return 0;
+}
+
+static int nfs3_return_delegation(struct inode *inode)
{
+ if (S_ISREG(inode->i_mode))
+ nfs_wb_all(inode);
return 0;
}
static const struct inode_operations nfs3_dir_inode_operations = {
.create = nfs_create,
+ .atomic_open = nfs_atomic_open_v23,
.lookup = nfs_lookup,
.link = nfs_link,
.unlink = nfs_unlink,
@@ -1060,6 +1111,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
.clear_acl_cache = forget_all_cached_acls,
.close_context = nfs_close_context,
.have_delegation = nfs3_have_delegation,
+ .return_delegation = nfs3_return_delegation,
.alloc_client = nfs_alloc_client,
.init_client = nfs_init_client,
.free_client = nfs_free_client,
diff --git a/fs/nfs/nfs3super.c b/fs/nfs/nfs3super.c
index 7c5809431e61..20a80478449e 100644
--- a/fs/nfs/nfs3super.c
+++ b/fs/nfs/nfs3super.c
@@ -14,9 +14,6 @@ struct nfs_subversion nfs_v3 = {
.rpc_vers = &nfs_version3,
.rpc_ops = &nfs_v3_clientops,
.sops = &nfs_sops,
-#ifdef CONFIG_NFS_V3_ACL
- .xattr = nfs3_xattr_handlers,
-#endif
};
static int __init init_nfs_v3(void)
@@ -30,6 +27,7 @@ static void __exit exit_nfs_v3(void)
unregister_nfs_version(&nfs_v3);
}
+MODULE_DESCRIPTION("NFSv3 client support");
MODULE_LICENSE("GPL");
module_init(init_nfs_v3);
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 3b0b650c9c5a..e17d72908412 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -21,14 +21,13 @@
#include <linux/nfs3.h>
#include <linux/nfs_fs.h>
#include <linux/nfsacl.h>
-#include "nfstrace.h"
+#include <linux/nfs_common.h>
+
#include "internal.h"
+#include "nfstrace.h"
#define NFSDBG_FACILITY NFSDBG_XDR
-/* Mapping from NFS error code to "errno" error code. */
-#define errno_NFSERR_IO EIO
-
/*
* Declare the space requirements for NFS arguments and replies as
* number of 32bit-words
@@ -91,8 +90,6 @@
NFS3_pagepad_sz)
#define ACL3_setaclres_sz (1+NFS3_post_op_attr_sz)
-static int nfs3_stat_to_errno(enum nfs_stat);
-
/*
* Map file type to S_IFMT bits
*/
@@ -1406,7 +1403,7 @@ static int nfs3_xdr_dec_getattr3res(struct rpc_rqst *req,
out:
return error;
out_default:
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
/*
@@ -1445,7 +1442,7 @@ static int nfs3_xdr_dec_setattr3res(struct rpc_rqst *req,
out:
return error;
out_status:
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
/*
@@ -1495,7 +1492,7 @@ out_default:
error = decode_post_op_attr(xdr, result->dir_attr, userns);
if (unlikely(error))
goto out;
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
/*
@@ -1537,7 +1534,7 @@ static int nfs3_xdr_dec_access3res(struct rpc_rqst *req,
out:
return error;
out_default:
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
/*
@@ -1578,7 +1575,7 @@ static int nfs3_xdr_dec_readlink3res(struct rpc_rqst *req,
out:
return error;
out_default:
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
/*
@@ -1658,7 +1655,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
out:
return error;
out_status:
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
/*
@@ -1728,7 +1725,7 @@ static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr,
out:
return error;
out_status:
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
/*
@@ -1795,7 +1792,7 @@ out_default:
error = decode_wcc_data(xdr, result->dir_attr, userns);
if (unlikely(error))
goto out;
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
/*
@@ -1835,7 +1832,7 @@ static int nfs3_xdr_dec_remove3res(struct rpc_rqst *req,
out:
return error;
out_status:
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
/*
@@ -1881,7 +1878,7 @@ static int nfs3_xdr_dec_rename3res(struct rpc_rqst *req,
out:
return error;
out_status:
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
/*
@@ -1926,7 +1923,7 @@ static int nfs3_xdr_dec_link3res(struct rpc_rqst *req, struct xdr_stream *xdr,
out:
return error;
out_status:
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
/**
@@ -1991,7 +1988,7 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
error = decode_inline_filename3(xdr, &entry->name, &entry->len);
if (unlikely(error))
- return -EAGAIN;
+ return error == -ENAMETOOLONG ? -ENAMETOOLONG : -EAGAIN;
error = decode_cookie3(xdr, &new_cookie);
if (unlikely(error))
@@ -2101,7 +2098,7 @@ out_default:
error = decode_post_op_attr(xdr, result->dir_attr, rpc_rqst_userns(req));
if (unlikely(error))
goto out;
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
/*
@@ -2167,7 +2164,7 @@ static int nfs3_xdr_dec_fsstat3res(struct rpc_rqst *req,
out:
return error;
out_status:
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
/*
@@ -2243,7 +2240,7 @@ static int nfs3_xdr_dec_fsinfo3res(struct rpc_rqst *req,
out:
return error;
out_status:
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
/*
@@ -2304,7 +2301,7 @@ static int nfs3_xdr_dec_pathconf3res(struct rpc_rqst *req,
out:
return error;
out_status:
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
/*
@@ -2350,7 +2347,7 @@ static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req,
out:
return error;
out_status:
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
#ifdef CONFIG_NFS_V3_ACL
@@ -2416,7 +2413,7 @@ static int nfs3_xdr_dec_getacl3res(struct rpc_rqst *req,
out:
return error;
out_default:
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req,
@@ -2435,76 +2432,11 @@ static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req,
out:
return error;
out_default:
- return nfs3_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
}
#endif /* CONFIG_NFS_V3_ACL */
-
-/*
- * We need to translate between nfs status return values and
- * the local errno values which may not be the same.
- */
-static const struct {
- int stat;
- int errno;
-} nfs_errtbl[] = {
- { NFS_OK, 0 },
- { NFSERR_PERM, -EPERM },
- { NFSERR_NOENT, -ENOENT },
- { NFSERR_IO, -errno_NFSERR_IO},
- { NFSERR_NXIO, -ENXIO },
-/* { NFSERR_EAGAIN, -EAGAIN }, */
- { NFSERR_ACCES, -EACCES },
- { NFSERR_EXIST, -EEXIST },
- { NFSERR_XDEV, -EXDEV },
- { NFSERR_NODEV, -ENODEV },
- { NFSERR_NOTDIR, -ENOTDIR },
- { NFSERR_ISDIR, -EISDIR },
- { NFSERR_INVAL, -EINVAL },
- { NFSERR_FBIG, -EFBIG },
- { NFSERR_NOSPC, -ENOSPC },
- { NFSERR_ROFS, -EROFS },
- { NFSERR_MLINK, -EMLINK },
- { NFSERR_NAMETOOLONG, -ENAMETOOLONG },
- { NFSERR_NOTEMPTY, -ENOTEMPTY },
- { NFSERR_DQUOT, -EDQUOT },
- { NFSERR_STALE, -ESTALE },
- { NFSERR_REMOTE, -EREMOTE },
-#ifdef EWFLUSH
- { NFSERR_WFLUSH, -EWFLUSH },
-#endif
- { NFSERR_BADHANDLE, -EBADHANDLE },
- { NFSERR_NOT_SYNC, -ENOTSYNC },
- { NFSERR_BAD_COOKIE, -EBADCOOKIE },
- { NFSERR_NOTSUPP, -ENOTSUPP },
- { NFSERR_TOOSMALL, -ETOOSMALL },
- { NFSERR_SERVERFAULT, -EREMOTEIO },
- { NFSERR_BADTYPE, -EBADTYPE },
- { NFSERR_JUKEBOX, -EJUKEBOX },
- { -1, -EIO }
-};
-
-/**
- * nfs3_stat_to_errno - convert an NFS status code to a local errno
- * @status: NFS status code to convert
- *
- * Returns a local errno value, or -EIO if the NFS status code is
- * not recognized. This function is used jointly by NFSv2 and NFSv3.
- */
-static int nfs3_stat_to_errno(enum nfs_stat status)
-{
- int i;
-
- for (i = 0; nfs_errtbl[i].stat != -1; i++) {
- if (nfs_errtbl[i].stat == (int)status)
- return nfs_errtbl[i].errno;
- }
- dprintk("NFS: Unrecognized nfs status value: %u\n", status);
- return nfs_errtbl[i].errno;
-}
-
-
#define PROC(proc, argtype, restype, timer) \
[NFS3PROC_##proc] = { \
.p_proc = NFS3PROC_##proc, \
diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h
index 0fe5aacbcfdf..aafd15a4afce 100644
--- a/fs/nfs/nfs42.h
+++ b/fs/nfs/nfs42.h
@@ -13,6 +13,7 @@
* more? Need to consider not to pre-alloc too much for a compound.
*/
#define PNFS_LAYOUTSTATS_MAXDEV (4)
+#define READ_PLUS_SCRATCH_SIZE (16)
/* nfs4.2proc.c */
#ifdef CONFIG_NFS_V4_2
@@ -20,6 +21,7 @@ int nfs42_proc_allocate(struct file *, loff_t, loff_t);
ssize_t nfs42_proc_copy(struct file *, loff_t, struct file *, loff_t, size_t,
struct nl4_server *, nfs4_stateid *, bool);
int nfs42_proc_deallocate(struct file *, loff_t, loff_t);
+int nfs42_proc_zero_range(struct file *, loff_t, loff_t);
loff_t nfs42_proc_llseek(struct file *, loff_t, int);
int nfs42_proc_layoutstats_generic(struct nfs_server *,
struct nfs42_layoutstat_data *);
@@ -54,11 +56,14 @@ int nfs42_proc_removexattr(struct inode *inode, const char *name);
* They would be 7 bytes long in the eventual buffer ("user.x\0"), and
* 8 bytes long XDR-encoded.
*
- * Include the trailing eof word as well.
+ * Include the trailing eof word as well and make the result a multiple
+ * of 4 bytes.
*/
static inline u32 nfs42_listxattr_xdrsize(u32 buflen)
{
- return ((buflen / (XATTR_USER_PREFIX_LEN + 2)) * 8) + 4;
+ u32 size = 8 * buflen / (XATTR_USER_PREFIX_LEN + 2) + 4;
+
+ return (size + 3) & ~3;
}
#endif /* CONFIG_NFS_V4_2 */
#endif /* __LINUX_FS_NFS_NFS4_2_H */
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index ecb428512fe1..d537fb0c230e 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -21,6 +21,8 @@
#define NFSDBG_FACILITY NFSDBG_PROC
static int nfs42_do_offload_cancel_async(struct file *dst, nfs4_stateid *std);
+static int nfs42_proc_offload_status(struct file *file, nfs4_stateid *stateid,
+ u64 *copied);
static void nfs42_set_netaddr(struct file *filep, struct nfs42_netaddr *naddr)
{
@@ -81,7 +83,8 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
if (status == 0) {
if (nfs_should_remove_suid(inode)) {
spin_lock(&inode->i_lock);
- nfs_set_cache_invalid(inode, NFS_INO_INVALID_MODE);
+ nfs_set_cache_invalid(inode,
+ NFS_INO_REVAL_FORCED | NFS_INO_INVALID_MODE);
spin_unlock(&inode->i_lock);
}
status = nfs_post_op_update_inode_force_wcc(inode,
@@ -111,6 +114,7 @@ static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
exception.inode = inode;
exception.state = lock->open_context->state;
+ nfs_file_block_o_direct(NFS_I(inode));
err = nfs_sync_inode(inode);
if (err)
goto out;
@@ -134,6 +138,7 @@ int nfs42_proc_allocate(struct file *filep, loff_t offset, loff_t len)
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ALLOCATE],
};
struct inode *inode = file_inode(filep);
+ loff_t oldsize = i_size_read(inode);
int err;
if (!nfs_server_capable(inode, NFS_CAP_ALLOCATE))
@@ -142,8 +147,13 @@ int nfs42_proc_allocate(struct file *filep, loff_t offset, loff_t len)
inode_lock(inode);
err = nfs42_proc_fallocate(&msg, filep, offset, len);
- if (err == -EOPNOTSUPP)
- NFS_SERVER(inode)->caps &= ~NFS_CAP_ALLOCATE;
+
+ if (err == 0)
+ nfs_truncate_last_folio(inode->i_mapping, oldsize,
+ offset + len);
+ else if (err == -EOPNOTSUPP)
+ NFS_SERVER(inode)->caps &= ~(NFS_CAP_ALLOCATE |
+ NFS_CAP_ZERO_RANGE);
inode_unlock(inode);
return err;
@@ -166,12 +176,53 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len)
if (err == 0)
truncate_pagecache_range(inode, offset, (offset + len) -1);
if (err == -EOPNOTSUPP)
- NFS_SERVER(inode)->caps &= ~NFS_CAP_DEALLOCATE;
+ NFS_SERVER(inode)->caps &= ~(NFS_CAP_DEALLOCATE |
+ NFS_CAP_ZERO_RANGE);
inode_unlock(inode);
return err;
}
+int nfs42_proc_zero_range(struct file *filep, loff_t offset, loff_t len)
+{
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ZERO_RANGE],
+ };
+ struct inode *inode = file_inode(filep);
+ loff_t oldsize = i_size_read(inode);
+ int err;
+
+ if (!nfs_server_capable(inode, NFS_CAP_ZERO_RANGE))
+ return -EOPNOTSUPP;
+
+ inode_lock(inode);
+
+ err = nfs42_proc_fallocate(&msg, filep, offset, len);
+ if (err == 0) {
+ nfs_truncate_last_folio(inode->i_mapping, oldsize,
+ offset + len);
+ truncate_pagecache_range(inode, offset, (offset + len) -1);
+ } else if (err == -EOPNOTSUPP)
+ NFS_SERVER(inode)->caps &= ~NFS_CAP_ZERO_RANGE;
+
+ inode_unlock(inode);
+ return err;
+}
+
+static void nfs4_copy_dequeue_callback(struct nfs_server *dst_server,
+ struct nfs_server *src_server,
+ struct nfs4_copy_state *copy)
+{
+ spin_lock(&dst_server->nfs_client->cl_lock);
+ list_del_init(&copy->copies);
+ spin_unlock(&dst_server->nfs_client->cl_lock);
+ if (dst_server != src_server) {
+ spin_lock(&src_server->nfs_client->cl_lock);
+ list_del_init(&copy->src_copies);
+ spin_unlock(&src_server->nfs_client->cl_lock);
+ }
+}
+
static int handle_async_copy(struct nfs42_copy_res *res,
struct nfs_server *dst_server,
struct nfs_server *src_server,
@@ -181,9 +232,12 @@ static int handle_async_copy(struct nfs42_copy_res *res,
bool *restart)
{
struct nfs4_copy_state *copy, *tmp_copy = NULL, *iter;
- int status = NFS4_OK;
struct nfs_open_context *dst_ctx = nfs_file_open_context(dst);
struct nfs_open_context *src_ctx = nfs_file_open_context(src);
+ struct nfs_client *clp = dst_server->nfs_client;
+ unsigned long timeout = 3 * HZ;
+ int status = NFS4_OK;
+ u64 copied;
copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_KERNEL);
if (!copy)
@@ -217,19 +271,16 @@ static int handle_async_copy(struct nfs42_copy_res *res,
if (dst_server != src_server) {
spin_lock(&src_server->nfs_client->cl_lock);
- list_add_tail(&copy->src_copies, &src_server->ss_copies);
+ list_add_tail(&copy->src_copies, &src_server->ss_src_copies);
spin_unlock(&src_server->nfs_client->cl_lock);
}
- status = wait_for_completion_interruptible(&copy->completion);
- spin_lock(&dst_server->nfs_client->cl_lock);
- list_del_init(&copy->copies);
- spin_unlock(&dst_server->nfs_client->cl_lock);
- if (dst_server != src_server) {
- spin_lock(&src_server->nfs_client->cl_lock);
- list_del_init(&copy->src_copies);
- spin_unlock(&src_server->nfs_client->cl_lock);
- }
+wait:
+ status = wait_for_completion_interruptible_timeout(&copy->completion,
+ timeout);
+ if (!status)
+ goto timeout;
+ nfs4_copy_dequeue_callback(dst_server, src_server, copy);
if (status == -ERESTARTSYS) {
goto out_cancel;
} else if (copy->flags || copy->error == NFS4ERR_PARTNER_NO_AUTH) {
@@ -239,6 +290,7 @@ static int handle_async_copy(struct nfs42_copy_res *res,
}
out:
res->write_res.count = copy->count;
+ /* Copy out the updated write verifier provided by CB_OFFLOAD. */
memcpy(&res->write_res.verifier, &copy->verf, sizeof(copy->verf));
status = -copy->error;
@@ -250,6 +302,39 @@ out_cancel:
if (!nfs42_files_from_same_server(src, dst))
nfs42_do_offload_cancel_async(src, src_stateid);
goto out_free;
+timeout:
+ timeout <<= 1;
+ if (timeout > (clp->cl_lease_time >> 1))
+ timeout = clp->cl_lease_time >> 1;
+ status = nfs42_proc_offload_status(dst, &copy->stateid, &copied);
+ if (status == -EINPROGRESS)
+ goto wait;
+ nfs4_copy_dequeue_callback(dst_server, src_server, copy);
+ switch (status) {
+ case 0:
+ /* The server recognized the copy stateid, so it hasn't
+ * rebooted. Don't overwrite the verifier returned in the
+ * COPY result. */
+ res->write_res.count = copied;
+ goto out_free;
+ case -EREMOTEIO:
+ /* COPY operation failed on the server. */
+ status = -EOPNOTSUPP;
+ res->write_res.count = copied;
+ goto out_free;
+ case -EBADF:
+ /* Server did not recognize the copy stateid. It has
+ * probably restarted and lost the plot. */
+ res->write_res.count = 0;
+ status = -EOPNOTSUPP;
+ break;
+ case -EOPNOTSUPP:
+ /* RFC 7862 REQUIREs server to support OFFLOAD_STATUS when
+ * it has signed up for an async COPY, so server is not
+ * spec-compliant. */
+ res->write_res.count = 0;
+ }
+ goto out_free;
}
static int process_copy_commit(struct file *dst, loff_t pos_dst,
@@ -278,22 +363,27 @@ out:
/**
* nfs42_copy_dest_done - perform inode cache updates after clone/copy offload
- * @inode: pointer to destination inode
+ * @file: pointer to destination file
* @pos: destination offset
* @len: copy length
+ * @oldsize: length of the file prior to clone/copy
*
* Punch a hole in the inode page cache, so that the NFS client will
* know to retrieve new data.
* Update the file size if necessary, and then mark the inode as having
* invalid cached values for change attribute, ctime, mtime and space used.
*/
-static void nfs42_copy_dest_done(struct inode *inode, loff_t pos, loff_t len)
+static void nfs42_copy_dest_done(struct file *file, loff_t pos, loff_t len,
+ loff_t oldsize)
{
+ struct inode *inode = file_inode(file);
+ struct address_space *mapping = file->f_mapping;
loff_t newsize = pos + len;
loff_t end = newsize - 1;
- WARN_ON_ONCE(invalidate_inode_pages2_range(inode->i_mapping,
- pos >> PAGE_SHIFT, end >> PAGE_SHIFT));
+ nfs_truncate_last_folio(mapping, oldsize, pos);
+ WARN_ON_ONCE(invalidate_inode_pages2_range(mapping, pos >> PAGE_SHIFT,
+ end >> PAGE_SHIFT));
spin_lock(&inode->i_lock);
if (newsize > i_size_read(inode))
@@ -326,6 +416,7 @@ static ssize_t _nfs42_proc_copy(struct file *src,
struct nfs_server *src_server = NFS_SERVER(src_inode);
loff_t pos_src = args->src_pos;
loff_t pos_dst = args->dst_pos;
+ loff_t oldsize_dst = i_size_read(dst_inode);
size_t count = args->count;
ssize_t status;
@@ -354,6 +445,7 @@ static ssize_t _nfs42_proc_copy(struct file *src,
return status;
}
+ nfs_file_block_o_direct(NFS_I(dst_inode));
status = nfs_sync_inode(dst_inode);
if (status)
return status;
@@ -399,7 +491,7 @@ static ssize_t _nfs42_proc_copy(struct file *src,
goto out;
}
- nfs42_copy_dest_done(dst_inode, pos_dst, res->write_res.count);
+ nfs42_copy_dest_done(dst, pos_dst, res->write_res.count, oldsize_dst);
nfs_invalidate_atime(src_inode);
status = res->write_res.count;
out:
@@ -460,7 +552,8 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
if (err >= 0)
break;
- if (err == -ENOTSUPP &&
+ if ((err == -ENOTSUPP ||
+ err == -NFS4ERR_OFFLOAD_DENIED) &&
nfs42_files_from_same_server(src, dst)) {
err = -EOPNOTSUPP;
break;
@@ -470,8 +563,9 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
continue;
}
break;
- } else if (err == -NFS4ERR_OFFLOAD_NO_REQS && !args.sync) {
- args.sync = true;
+ } else if (err == -NFS4ERR_OFFLOAD_NO_REQS &&
+ args.sync != res.synchronous) {
+ args.sync = res.synchronous;
dst_exception.retry = 1;
continue;
} else if ((err == -ESTALE ||
@@ -495,15 +589,15 @@ out_put_src_lock:
return err;
}
-struct nfs42_offloadcancel_data {
+struct nfs42_offload_data {
struct nfs_server *seq_server;
struct nfs42_offload_status_args args;
struct nfs42_offload_status_res res;
};
-static void nfs42_offload_cancel_prepare(struct rpc_task *task, void *calldata)
+static void nfs42_offload_prepare(struct rpc_task *task, void *calldata)
{
- struct nfs42_offloadcancel_data *data = calldata;
+ struct nfs42_offload_data *data = calldata;
nfs4_setup_sequence(data->seq_server->nfs_client,
&data->args.osa_seq_args,
@@ -512,7 +606,7 @@ static void nfs42_offload_cancel_prepare(struct rpc_task *task, void *calldata)
static void nfs42_offload_cancel_done(struct rpc_task *task, void *calldata)
{
- struct nfs42_offloadcancel_data *data = calldata;
+ struct nfs42_offload_data *data = calldata;
trace_nfs4_offload_cancel(&data->args, task->tk_status);
nfs41_sequence_done(task, &data->res.osr_seq_res);
@@ -522,22 +616,22 @@ static void nfs42_offload_cancel_done(struct rpc_task *task, void *calldata)
rpc_restart_call_prepare(task);
}
-static void nfs42_free_offloadcancel_data(void *data)
+static void nfs42_offload_release(void *data)
{
kfree(data);
}
static const struct rpc_call_ops nfs42_offload_cancel_ops = {
- .rpc_call_prepare = nfs42_offload_cancel_prepare,
+ .rpc_call_prepare = nfs42_offload_prepare,
.rpc_call_done = nfs42_offload_cancel_done,
- .rpc_release = nfs42_free_offloadcancel_data,
+ .rpc_release = nfs42_offload_release,
};
static int nfs42_do_offload_cancel_async(struct file *dst,
nfs4_stateid *stateid)
{
struct nfs_server *dst_server = NFS_SERVER(file_inode(dst));
- struct nfs42_offloadcancel_data *data = NULL;
+ struct nfs42_offload_data *data = NULL;
struct nfs_open_context *ctx = nfs_file_open_context(dst);
struct rpc_task *task;
struct rpc_message msg = {
@@ -549,14 +643,14 @@ static int nfs42_do_offload_cancel_async(struct file *dst,
.rpc_message = &msg,
.callback_ops = &nfs42_offload_cancel_ops,
.workqueue = nfsiod_workqueue,
- .flags = RPC_TASK_ASYNC,
+ .flags = RPC_TASK_ASYNC | RPC_TASK_MOVEABLE,
};
int status;
if (!(dst_server->caps & NFS_CAP_OFFLOAD_CANCEL))
return -EOPNOTSUPP;
- data = kzalloc(sizeof(struct nfs42_offloadcancel_data), GFP_KERNEL);
+ data = kzalloc(sizeof(struct nfs42_offload_data), GFP_KERNEL);
if (data == NULL)
return -ENOMEM;
@@ -579,6 +673,108 @@ static int nfs42_do_offload_cancel_async(struct file *dst,
return status;
}
+static int
+_nfs42_proc_offload_status(struct nfs_server *server, struct file *file,
+ struct nfs42_offload_data *data)
+{
+ struct nfs_open_context *ctx = nfs_file_open_context(file);
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OFFLOAD_STATUS],
+ .rpc_argp = &data->args,
+ .rpc_resp = &data->res,
+ .rpc_cred = ctx->cred,
+ };
+ int status;
+
+ status = nfs4_call_sync(server->client, server, &msg,
+ &data->args.osa_seq_args,
+ &data->res.osr_seq_res, 1);
+ trace_nfs4_offload_status(&data->args, status);
+ switch (status) {
+ case 0:
+ break;
+
+ case -NFS4ERR_ADMIN_REVOKED:
+ case -NFS4ERR_BAD_STATEID:
+ case -NFS4ERR_OLD_STATEID:
+ /*
+ * Server does not recognize the COPY stateid. CB_OFFLOAD
+ * could have purged it, or server might have rebooted.
+ * Since COPY stateids don't have an associated inode,
+ * avoid triggering state recovery.
+ */
+ status = -EBADF;
+ break;
+ case -NFS4ERR_NOTSUPP:
+ case -ENOTSUPP:
+ case -EOPNOTSUPP:
+ server->caps &= ~NFS_CAP_OFFLOAD_STATUS;
+ status = -EOPNOTSUPP;
+ break;
+ }
+
+ return status;
+}
+
+/**
+ * nfs42_proc_offload_status - Poll completion status of an async copy operation
+ * @dst: handle of file being copied into
+ * @stateid: copy stateid (from async COPY result)
+ * @copied: OUT: number of bytes copied so far
+ *
+ * Return values:
+ * %0: Server returned an NFS4_OK completion status
+ * %-EINPROGRESS: Server returned no completion status
+ * %-EREMOTEIO: Server returned an error completion status
+ * %-EBADF: Server did not recognize the copy stateid
+ * %-EOPNOTSUPP: Server does not support OFFLOAD_STATUS
+ * %-ERESTARTSYS: Wait interrupted by signal
+ *
+ * Other negative errnos indicate the client could not complete the
+ * request.
+ */
+static int
+nfs42_proc_offload_status(struct file *dst, nfs4_stateid *stateid, u64 *copied)
+{
+ struct inode *inode = file_inode(dst);
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct nfs4_exception exception = {
+ .inode = inode,
+ };
+ struct nfs42_offload_data *data;
+ int status;
+
+ if (!(server->caps & NFS_CAP_OFFLOAD_STATUS))
+ return -EOPNOTSUPP;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+ data->seq_server = server;
+ data->args.osa_src_fh = NFS_FH(inode);
+ memcpy(&data->args.osa_stateid, stateid,
+ sizeof(data->args.osa_stateid));
+ exception.stateid = &data->args.osa_stateid;
+ do {
+ status = _nfs42_proc_offload_status(server, dst, data);
+ if (status == -EOPNOTSUPP)
+ goto out;
+ status = nfs4_handle_exception(server, status, &exception);
+ } while (exception.retry);
+ if (status)
+ goto out;
+
+ *copied = data->res.osr_count;
+ if (!data->res.complete_count)
+ status = -EINPROGRESS;
+ else if (data->res.osr_complete != NFS_OK)
+ status = -EREMOTEIO;
+
+out:
+ kfree(data);
+ return status;
+}
+
static int _nfs42_proc_copy_notify(struct file *src, struct file *dst,
struct nfs42_copy_notify_args *args,
struct nfs42_copy_notify_res *res)
@@ -858,7 +1054,7 @@ int nfs42_proc_layoutstats_generic(struct nfs_server *server,
.rpc_message = &msg,
.callback_ops = &nfs42_layoutstat_ops,
.callback_data = data,
- .flags = RPC_TASK_ASYNC,
+ .flags = RPC_TASK_ASYNC | RPC_TASK_MOVEABLE,
};
struct rpc_task *task;
@@ -1013,7 +1209,7 @@ int nfs42_proc_layouterror(struct pnfs_layout_segment *lseg,
struct rpc_task_setup task_setup = {
.rpc_message = &msg,
.callback_ops = &nfs42_layouterror_ops,
- .flags = RPC_TASK_ASYNC,
+ .flags = RPC_TASK_ASYNC | RPC_TASK_MOVEABLE,
};
unsigned int i;
@@ -1062,6 +1258,7 @@ static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f,
struct nfs42_clone_res res = {
.server = server,
};
+ loff_t oldsize_dst = i_size_read(dst_inode);
int status;
msg->rpc_argp = &args;
@@ -1096,7 +1293,7 @@ static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f,
/* a zero-length count means clone to EOF in src */
if (count == 0 && res.dst_fattr->valid & NFS_ATTR_FATTR_SIZE)
count = nfs_size_to_loff_t(res.dst_fattr->size) - dst_offset;
- nfs42_copy_dest_done(dst_inode, dst_offset, count);
+ nfs42_copy_dest_done(dst_f, dst_offset, count, oldsize_dst);
status = nfs_post_op_update_inode(dst_inode, res.dst_fattr);
}
@@ -1189,15 +1386,19 @@ static int _nfs42_proc_setxattr(struct inode *inode, const char *name,
const void *buf, size_t buflen, int flags)
{
struct nfs_server *server = NFS_SERVER(inode);
+ __u32 bitmask[NFS_BITMASK_SZ];
struct page *pages[NFS4XATTR_MAXPAGES];
struct nfs42_setxattrargs arg = {
.fh = NFS_FH(inode),
+ .bitmask = bitmask,
.xattr_pages = pages,
.xattr_len = buflen,
.xattr_name = name,
.xattr_flags = flags,
};
- struct nfs42_setxattrres res;
+ struct nfs42_setxattrres res = {
+ .server = server,
+ };
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETXATTR],
.rpc_argp = &arg,
@@ -1209,13 +1410,22 @@ static int _nfs42_proc_setxattr(struct inode *inode, const char *name,
if (buflen > server->sxasize)
return -ERANGE;
+ res.fattr = nfs_alloc_fattr();
+ if (!res.fattr)
+ return -ENOMEM;
+
if (buflen > 0) {
np = nfs4_buf_to_pages_noslab(buf, buflen, arg.xattr_pages);
- if (np < 0)
- return np;
+ if (np < 0) {
+ ret = np;
+ goto out;
+ }
} else
np = 0;
+ nfs4_bitmask_set(bitmask, server->cache_consistency_bitmask,
+ inode, NFS_INO_INVALID_CHANGE);
+
ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args,
&res.seq_res, 1);
trace_nfs4_setxattr(inode, name, ret);
@@ -1223,9 +1433,13 @@ static int _nfs42_proc_setxattr(struct inode *inode, const char *name,
for (; np > 0; np--)
put_page(pages[np - 1]);
- if (!ret)
+ if (!ret) {
nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
+ ret = nfs_post_op_update_inode(inode, res.fattr);
+ }
+out:
+ kfree(res.fattr);
return ret;
}
@@ -1300,7 +1514,7 @@ static ssize_t _nfs42_proc_listxattrs(struct inode *inode, void *buf,
ret = -ENOMEM;
- res.scratch = alloc_page(GFP_KERNEL);
+ res.scratch = folio_alloc(GFP_KERNEL, 0);
if (!res.scratch)
goto out;
@@ -1338,7 +1552,7 @@ out_free_pages:
}
kfree(pages);
out_free_scratch:
- __free_page(res.scratch);
+ folio_put(res.scratch);
out:
return ret;
@@ -1359,7 +1573,6 @@ ssize_t nfs42_proc_getxattr(struct inode *inode, const char *name,
for (i = 0; i < np; i++) {
pages[i] = alloc_page(GFP_KERNEL);
if (!pages[i]) {
- np = i + 1;
err = -ENOMEM;
goto out;
}
@@ -1383,8 +1596,8 @@ ssize_t nfs42_proc_getxattr(struct inode *inode, const char *name,
} while (exception.retry);
out:
- while (--np >= 0)
- __free_page(pages[np]);
+ while (--i >= 0)
+ __free_page(pages[i]);
kfree(pages);
return err;
diff --git a/fs/nfs/nfs42xattr.c b/fs/nfs/nfs42xattr.c
index 76ae11834206..37d79400e5f4 100644
--- a/fs/nfs/nfs42xattr.c
+++ b/fs/nfs/nfs42xattr.c
@@ -132,7 +132,7 @@ nfs4_xattr_entry_lru_add(struct nfs4_xattr_entry *entry)
lru = (entry->flags & NFS4_XATTR_ENTRY_EXTVAL) ?
&nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru;
- return list_lru_add(lru, &entry->lru);
+ return list_lru_add_obj(lru, &entry->lru);
}
static bool
@@ -143,7 +143,7 @@ nfs4_xattr_entry_lru_del(struct nfs4_xattr_entry *entry)
lru = (entry->flags & NFS4_XATTR_ENTRY_EXTVAL) ?
&nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru;
- return list_lru_del(lru, &entry->lru);
+ return list_lru_del_obj(lru, &entry->lru);
}
/*
@@ -349,7 +349,7 @@ nfs4_xattr_cache_unlink(struct inode *inode)
oldcache = nfsi->xattr_cache;
if (oldcache != NULL) {
- list_lru_del(&nfs4_xattr_cache_lru, &oldcache->lru);
+ list_lru_del_obj(&nfs4_xattr_cache_lru, &oldcache->lru);
oldcache->inode = NULL;
}
nfsi->xattr_cache = NULL;
@@ -474,7 +474,7 @@ nfs4_xattr_get_cache(struct inode *inode, int add)
kref_get(&cache->ref);
nfsi->xattr_cache = cache;
cache->inode = inode;
- list_lru_add(&nfs4_xattr_cache_lru, &cache->lru);
+ list_lru_add_obj(&nfs4_xattr_cache_lru, &cache->lru);
}
spin_unlock(&inode->i_lock);
@@ -796,32 +796,13 @@ static unsigned long nfs4_xattr_cache_scan(struct shrinker *shrink,
static unsigned long nfs4_xattr_entry_scan(struct shrinker *shrink,
struct shrink_control *sc);
-static struct shrinker nfs4_xattr_cache_shrinker = {
- .count_objects = nfs4_xattr_cache_count,
- .scan_objects = nfs4_xattr_cache_scan,
- .seeks = DEFAULT_SEEKS,
- .flags = SHRINKER_MEMCG_AWARE,
-};
-
-static struct shrinker nfs4_xattr_entry_shrinker = {
- .count_objects = nfs4_xattr_entry_count,
- .scan_objects = nfs4_xattr_entry_scan,
- .seeks = DEFAULT_SEEKS,
- .batch = 512,
- .flags = SHRINKER_MEMCG_AWARE,
-};
-
-static struct shrinker nfs4_xattr_large_entry_shrinker = {
- .count_objects = nfs4_xattr_entry_count,
- .scan_objects = nfs4_xattr_entry_scan,
- .seeks = 1,
- .batch = 512,
- .flags = SHRINKER_MEMCG_AWARE,
-};
+static struct shrinker *nfs4_xattr_cache_shrinker;
+static struct shrinker *nfs4_xattr_entry_shrinker;
+static struct shrinker *nfs4_xattr_large_entry_shrinker;
static enum lru_status
cache_lru_isolate(struct list_head *item,
- struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
+ struct list_lru_one *lru, void *arg)
{
struct list_head *dispose = arg;
struct inode *inode;
@@ -886,7 +867,7 @@ nfs4_xattr_cache_count(struct shrinker *shrink, struct shrink_control *sc)
static enum lru_status
entry_lru_isolate(struct list_head *item,
- struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
+ struct list_lru_one *lru, void *arg)
{
struct list_head *dispose = arg;
struct nfs4_xattr_bucket *bucket;
@@ -943,7 +924,7 @@ nfs4_xattr_entry_scan(struct shrinker *shrink, struct shrink_control *sc)
struct nfs4_xattr_entry *entry;
struct list_lru *lru;
- lru = (shrink == &nfs4_xattr_large_entry_shrinker) ?
+ lru = (shrink == nfs4_xattr_large_entry_shrinker) ?
&nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru;
freed = list_lru_shrink_walk(lru, sc, entry_lru_isolate, &dispose);
@@ -971,7 +952,7 @@ nfs4_xattr_entry_count(struct shrinker *shrink, struct shrink_control *sc)
unsigned long count;
struct list_lru *lru;
- lru = (shrink == &nfs4_xattr_large_entry_shrinker) ?
+ lru = (shrink == nfs4_xattr_large_entry_shrinker) ?
&nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru;
count = list_lru_shrink_count(lru, sc);
@@ -991,55 +972,84 @@ static void nfs4_xattr_cache_init_once(void *p)
INIT_LIST_HEAD(&cache->dispose);
}
+typedef unsigned long (*count_objects_cb)(struct shrinker *s,
+ struct shrink_control *sc);
+typedef unsigned long (*scan_objects_cb)(struct shrinker *s,
+ struct shrink_control *sc);
+
+static int __init nfs4_xattr_shrinker_init(struct shrinker **shrinker,
+ struct list_lru *lru, const char *name,
+ count_objects_cb count,
+ scan_objects_cb scan, long batch, int seeks)
+{
+ int ret;
+
+ *shrinker = shrinker_alloc(SHRINKER_MEMCG_AWARE, name);
+ if (!*shrinker)
+ return -ENOMEM;
+
+ ret = list_lru_init_memcg(lru, *shrinker);
+ if (ret) {
+ shrinker_free(*shrinker);
+ return ret;
+ }
+
+ (*shrinker)->count_objects = count;
+ (*shrinker)->scan_objects = scan;
+ (*shrinker)->batch = batch;
+ (*shrinker)->seeks = seeks;
+
+ shrinker_register(*shrinker);
+
+ return ret;
+}
+
+static void nfs4_xattr_shrinker_destroy(struct shrinker *shrinker,
+ struct list_lru *lru)
+{
+ shrinker_free(shrinker);
+ list_lru_destroy(lru);
+}
+
int __init nfs4_xattr_cache_init(void)
{
int ret = 0;
nfs4_xattr_cache_cachep = kmem_cache_create("nfs4_xattr_cache_cache",
sizeof(struct nfs4_xattr_cache), 0,
- (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
+ (SLAB_RECLAIM_ACCOUNT),
nfs4_xattr_cache_init_once);
if (nfs4_xattr_cache_cachep == NULL)
return -ENOMEM;
- ret = list_lru_init_memcg(&nfs4_xattr_large_entry_lru,
- &nfs4_xattr_large_entry_shrinker);
- if (ret)
- goto out4;
-
- ret = list_lru_init_memcg(&nfs4_xattr_entry_lru,
- &nfs4_xattr_entry_shrinker);
- if (ret)
- goto out3;
-
- ret = list_lru_init_memcg(&nfs4_xattr_cache_lru,
- &nfs4_xattr_cache_shrinker);
- if (ret)
- goto out2;
-
- ret = register_shrinker(&nfs4_xattr_cache_shrinker, "nfs-xattr_cache");
+ ret = nfs4_xattr_shrinker_init(&nfs4_xattr_cache_shrinker,
+ &nfs4_xattr_cache_lru, "nfs-xattr_cache",
+ nfs4_xattr_cache_count,
+ nfs4_xattr_cache_scan, 0, DEFAULT_SEEKS);
if (ret)
goto out1;
- ret = register_shrinker(&nfs4_xattr_entry_shrinker, "nfs-xattr_entry");
+ ret = nfs4_xattr_shrinker_init(&nfs4_xattr_entry_shrinker,
+ &nfs4_xattr_entry_lru, "nfs-xattr_entry",
+ nfs4_xattr_entry_count,
+ nfs4_xattr_entry_scan, 512, DEFAULT_SEEKS);
if (ret)
- goto out;
+ goto out2;
- ret = register_shrinker(&nfs4_xattr_large_entry_shrinker,
- "nfs-xattr_large_entry");
+ ret = nfs4_xattr_shrinker_init(&nfs4_xattr_large_entry_shrinker,
+ &nfs4_xattr_large_entry_lru,
+ "nfs-xattr_large_entry",
+ nfs4_xattr_entry_count,
+ nfs4_xattr_entry_scan, 512, 1);
if (!ret)
return 0;
- unregister_shrinker(&nfs4_xattr_entry_shrinker);
-out:
- unregister_shrinker(&nfs4_xattr_cache_shrinker);
-out1:
- list_lru_destroy(&nfs4_xattr_cache_lru);
+ nfs4_xattr_shrinker_destroy(nfs4_xattr_entry_shrinker,
+ &nfs4_xattr_entry_lru);
out2:
- list_lru_destroy(&nfs4_xattr_entry_lru);
-out3:
- list_lru_destroy(&nfs4_xattr_large_entry_lru);
-out4:
+ nfs4_xattr_shrinker_destroy(nfs4_xattr_cache_shrinker,
+ &nfs4_xattr_cache_lru);
+out1:
kmem_cache_destroy(nfs4_xattr_cache_cachep);
return ret;
@@ -1047,11 +1057,11 @@ out4:
void nfs4_xattr_cache_exit(void)
{
- unregister_shrinker(&nfs4_xattr_large_entry_shrinker);
- unregister_shrinker(&nfs4_xattr_entry_shrinker);
- unregister_shrinker(&nfs4_xattr_cache_shrinker);
- list_lru_destroy(&nfs4_xattr_large_entry_lru);
- list_lru_destroy(&nfs4_xattr_entry_lru);
- list_lru_destroy(&nfs4_xattr_cache_lru);
+ nfs4_xattr_shrinker_destroy(nfs4_xattr_large_entry_shrinker,
+ &nfs4_xattr_large_entry_lru);
+ nfs4_xattr_shrinker_destroy(nfs4_xattr_entry_shrinker,
+ &nfs4_xattr_entry_lru);
+ nfs4_xattr_shrinker_destroy(nfs4_xattr_cache_shrinker,
+ &nfs4_xattr_cache_lru);
kmem_cache_destroy(nfs4_xattr_cache_cachep);
}
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index d80ee88ca996..e10d83ba835e 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -7,6 +7,9 @@
#include "nfs42.h"
+/* Not limited by NFS itself, limited by the generic xattr code */
+#define nfs4_xattr_name_maxsz XDR_QUADLEN(XATTR_NAME_MAX)
+
#define encode_fallocate_maxsz (encode_stateid_maxsz + \
2 /* offset */ + \
2 /* length */)
@@ -32,6 +35,11 @@
#define encode_offload_cancel_maxsz (op_encode_hdr_maxsz + \
XDR_QUADLEN(NFS4_STATEID_SIZE))
#define decode_offload_cancel_maxsz (op_decode_hdr_maxsz)
+#define encode_offload_status_maxsz (op_encode_hdr_maxsz + \
+ XDR_QUADLEN(NFS4_STATEID_SIZE))
+#define decode_offload_status_maxsz (op_decode_hdr_maxsz + \
+ 2 /* osr_count */ + \
+ 2 /* osr_complete */)
#define encode_copy_notify_maxsz (op_encode_hdr_maxsz + \
XDR_QUADLEN(NFS4_STATEID_SIZE) + \
1 + /* nl4_type */ \
@@ -51,10 +59,16 @@
(1 /* data_content4 */ + \
2 /* data_info4.di_offset */ + \
1 /* data_info4.di_length */)
+#define NFS42_READ_PLUS_HOLE_SEGMENT_SIZE \
+ (1 /* data_content4 */ + \
+ 2 /* data_info4.di_offset */ + \
+ 2 /* data_info4.di_length */)
+#define READ_PLUS_SEGMENT_SIZE_DIFF (NFS42_READ_PLUS_HOLE_SEGMENT_SIZE - \
+ NFS42_READ_PLUS_DATA_SEGMENT_SIZE)
#define decode_read_plus_maxsz (op_decode_hdr_maxsz + \
1 /* rpr_eof */ + \
1 /* rpr_contents count */ + \
- NFS42_READ_PLUS_DATA_SEGMENT_SIZE)
+ NFS42_READ_PLUS_HOLE_SEGMENT_SIZE)
#define encode_seek_maxsz (op_encode_hdr_maxsz + \
encode_stateid_maxsz + \
2 /* offset */ + \
@@ -89,6 +103,18 @@
2 /* dst offset */ + \
2 /* count */)
#define decode_clone_maxsz (op_decode_hdr_maxsz)
+#define encode_getxattr_maxsz (op_encode_hdr_maxsz + 1 + \
+ nfs4_xattr_name_maxsz)
+#define decode_getxattr_maxsz (op_decode_hdr_maxsz + 1 + pagepad_maxsz)
+#define encode_setxattr_maxsz (op_encode_hdr_maxsz + \
+ 1 + nfs4_xattr_name_maxsz + 1)
+#define decode_setxattr_maxsz (op_decode_hdr_maxsz + decode_change_info_maxsz)
+#define encode_listxattrs_maxsz (op_encode_hdr_maxsz + 2 + 1)
+#define decode_listxattrs_maxsz (op_decode_hdr_maxsz + 2 + 1 + 1 + 1)
+#define encode_removexattr_maxsz (op_encode_hdr_maxsz + 1 + \
+ nfs4_xattr_name_maxsz)
+#define decode_removexattr_maxsz (op_decode_hdr_maxsz + \
+ decode_change_info_maxsz)
#define NFS4_enc_allocate_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
@@ -122,10 +148,20 @@
decode_sequence_maxsz + \
decode_putfh_maxsz + \
decode_offload_cancel_maxsz)
+#define NFS4_enc_offload_status_sz (compound_encode_hdr_maxsz + \
+ encode_sequence_maxsz + \
+ encode_putfh_maxsz + \
+ encode_offload_status_maxsz)
+#define NFS4_dec_offload_status_sz (compound_decode_hdr_maxsz + \
+ decode_sequence_maxsz + \
+ decode_putfh_maxsz + \
+ decode_offload_status_maxsz)
#define NFS4_enc_copy_notify_sz (compound_encode_hdr_maxsz + \
+ encode_sequence_maxsz + \
encode_putfh_maxsz + \
encode_copy_notify_maxsz)
#define NFS4_dec_copy_notify_sz (compound_decode_hdr_maxsz + \
+ decode_sequence_maxsz + \
decode_putfh_maxsz + \
decode_copy_notify_maxsz)
#define NFS4_enc_deallocate_sz (compound_encode_hdr_maxsz + \
@@ -138,6 +174,18 @@
decode_putfh_maxsz + \
decode_deallocate_maxsz + \
decode_getattr_maxsz)
+#define NFS4_enc_zero_range_sz (compound_encode_hdr_maxsz + \
+ encode_sequence_maxsz + \
+ encode_putfh_maxsz + \
+ encode_deallocate_maxsz + \
+ encode_allocate_maxsz + \
+ encode_getattr_maxsz)
+#define NFS4_dec_zero_range_sz (compound_decode_hdr_maxsz + \
+ decode_sequence_maxsz + \
+ decode_putfh_maxsz + \
+ decode_deallocate_maxsz + \
+ decode_allocate_maxsz + \
+ decode_getattr_maxsz)
#define NFS4_enc_read_plus_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_putfh_maxsz + \
@@ -186,55 +234,40 @@
decode_putfh_maxsz + \
decode_clone_maxsz + \
decode_getattr_maxsz)
-
-/* Not limited by NFS itself, limited by the generic xattr code */
-#define nfs4_xattr_name_maxsz XDR_QUADLEN(XATTR_NAME_MAX)
-
-#define encode_getxattr_maxsz (op_encode_hdr_maxsz + 1 + \
- nfs4_xattr_name_maxsz)
-#define decode_getxattr_maxsz (op_decode_hdr_maxsz + 1 + pagepad_maxsz)
-#define encode_setxattr_maxsz (op_encode_hdr_maxsz + \
- 1 + nfs4_xattr_name_maxsz + 1)
-#define decode_setxattr_maxsz (op_decode_hdr_maxsz + decode_change_info_maxsz)
-#define encode_listxattrs_maxsz (op_encode_hdr_maxsz + 2 + 1)
-#define decode_listxattrs_maxsz (op_decode_hdr_maxsz + 2 + 1 + 1 + 1)
-#define encode_removexattr_maxsz (op_encode_hdr_maxsz + 1 + \
- nfs4_xattr_name_maxsz)
-#define decode_removexattr_maxsz (op_decode_hdr_maxsz + \
- decode_change_info_maxsz)
-
-#define NFS4_enc_getxattr_sz (compound_encode_hdr_maxsz + \
- encode_sequence_maxsz + \
- encode_putfh_maxsz + \
- encode_getxattr_maxsz)
-#define NFS4_dec_getxattr_sz (compound_decode_hdr_maxsz + \
- decode_sequence_maxsz + \
- decode_putfh_maxsz + \
- decode_getxattr_maxsz)
-#define NFS4_enc_setxattr_sz (compound_encode_hdr_maxsz + \
- encode_sequence_maxsz + \
- encode_putfh_maxsz + \
- encode_setxattr_maxsz)
-#define NFS4_dec_setxattr_sz (compound_decode_hdr_maxsz + \
- decode_sequence_maxsz + \
- decode_putfh_maxsz + \
- decode_setxattr_maxsz)
-#define NFS4_enc_listxattrs_sz (compound_encode_hdr_maxsz + \
- encode_sequence_maxsz + \
- encode_putfh_maxsz + \
- encode_listxattrs_maxsz)
-#define NFS4_dec_listxattrs_sz (compound_decode_hdr_maxsz + \
- decode_sequence_maxsz + \
- decode_putfh_maxsz + \
- decode_listxattrs_maxsz)
-#define NFS4_enc_removexattr_sz (compound_encode_hdr_maxsz + \
- encode_sequence_maxsz + \
- encode_putfh_maxsz + \
- encode_removexattr_maxsz)
-#define NFS4_dec_removexattr_sz (compound_decode_hdr_maxsz + \
- decode_sequence_maxsz + \
- decode_putfh_maxsz + \
- decode_removexattr_maxsz)
+#define NFS4_enc_getxattr_sz (compound_encode_hdr_maxsz + \
+ encode_sequence_maxsz + \
+ encode_putfh_maxsz + \
+ encode_getxattr_maxsz)
+#define NFS4_dec_getxattr_sz (compound_decode_hdr_maxsz + \
+ decode_sequence_maxsz + \
+ decode_putfh_maxsz + \
+ decode_getxattr_maxsz)
+#define NFS4_enc_setxattr_sz (compound_encode_hdr_maxsz + \
+ encode_sequence_maxsz + \
+ encode_putfh_maxsz + \
+ encode_setxattr_maxsz + \
+ encode_getattr_maxsz)
+#define NFS4_dec_setxattr_sz (compound_decode_hdr_maxsz + \
+ decode_sequence_maxsz + \
+ decode_putfh_maxsz + \
+ decode_setxattr_maxsz + \
+ decode_getattr_maxsz)
+#define NFS4_enc_listxattrs_sz (compound_encode_hdr_maxsz + \
+ encode_sequence_maxsz + \
+ encode_putfh_maxsz + \
+ encode_listxattrs_maxsz)
+#define NFS4_dec_listxattrs_sz (compound_decode_hdr_maxsz + \
+ decode_sequence_maxsz + \
+ decode_putfh_maxsz + \
+ decode_listxattrs_maxsz)
+#define NFS4_enc_removexattr_sz (compound_encode_hdr_maxsz + \
+ encode_sequence_maxsz + \
+ encode_putfh_maxsz + \
+ encode_removexattr_maxsz)
+#define NFS4_dec_removexattr_sz (compound_decode_hdr_maxsz + \
+ decode_sequence_maxsz + \
+ decode_putfh_maxsz + \
+ decode_removexattr_maxsz)
/*
* These values specify the maximum amount of data that is not
@@ -317,6 +350,18 @@ static void encode_copy(struct xdr_stream *xdr,
encode_nl4_server(xdr, args->cp_src);
}
+static void encode_copy_commit(struct xdr_stream *xdr,
+ const struct nfs42_copy_args *args,
+ struct compound_hdr *hdr)
+{
+ __be32 *p;
+
+ encode_op_hdr(xdr, OP_COMMIT, decode_commit_maxsz, hdr);
+ p = reserve_space(xdr, 12);
+ p = xdr_encode_hyper(p, args->dst_pos);
+ *p = cpu_to_be32(args->count);
+}
+
static void encode_offload_cancel(struct xdr_stream *xdr,
const struct nfs42_offload_status_args *args,
struct compound_hdr *hdr)
@@ -325,6 +370,14 @@ static void encode_offload_cancel(struct xdr_stream *xdr,
encode_nfs4_stateid(xdr, &args->osa_stateid);
}
+static void encode_offload_status(struct xdr_stream *xdr,
+ const struct nfs42_offload_status_args *args,
+ struct compound_hdr *hdr)
+{
+ encode_op_hdr(xdr, OP_OFFLOAD_STATUS, decode_offload_status_maxsz, hdr);
+ encode_nfs4_stateid(xdr, &args->osa_stateid);
+}
+
static void encode_copy_notify(struct xdr_stream *xdr,
const struct nfs42_copy_notify_args *args,
struct compound_hdr *hdr)
@@ -452,20 +505,6 @@ static void encode_setxattr(struct xdr_stream *xdr,
xdr_write_pages(xdr, arg->xattr_pages, 0, arg->xattr_len);
}
-static int decode_setxattr(struct xdr_stream *xdr,
- struct nfs4_change_info *cinfo)
-{
- int status;
-
- status = decode_op_hdr(xdr, OP_SETXATTR);
- if (status)
- goto out;
- status = decode_change_info(xdr, cinfo);
-out:
- return status;
-}
-
-
static void encode_getxattr(struct xdr_stream *xdr, const char *name,
struct compound_hdr *hdr)
{
@@ -473,43 +512,6 @@ static void encode_getxattr(struct xdr_stream *xdr, const char *name,
encode_string(xdr, strlen(name), name);
}
-static int decode_getxattr(struct xdr_stream *xdr,
- struct nfs42_getxattrres *res,
- struct rpc_rqst *req)
-{
- int status;
- __be32 *p;
- u32 len, rdlen;
-
- status = decode_op_hdr(xdr, OP_GETXATTR);
- if (status)
- return status;
-
- p = xdr_inline_decode(xdr, 4);
- if (unlikely(!p))
- return -EIO;
-
- len = be32_to_cpup(p);
-
- /*
- * Only check against the page length here. The actual
- * requested length may be smaller, but that is only
- * checked against after possibly caching a valid reply.
- */
- if (len > req->rq_rcv_buf.page_len)
- return -ERANGE;
-
- res->xattr_len = len;
-
- if (len > 0) {
- rdlen = xdr_read_pages(xdr, len);
- if (rdlen < len)
- return -EIO;
- }
-
- return 0;
-}
-
static void encode_removexattr(struct xdr_stream *xdr, const char *name,
struct compound_hdr *hdr)
{
@@ -517,21 +519,6 @@ static void encode_removexattr(struct xdr_stream *xdr, const char *name,
encode_string(xdr, strlen(name), name);
}
-
-static int decode_removexattr(struct xdr_stream *xdr,
- struct nfs4_change_info *cinfo)
-{
- int status;
-
- status = decode_op_hdr(xdr, OP_REMOVEXATTR);
- if (status)
- goto out;
-
- status = decode_change_info(xdr, cinfo);
-out:
- return status;
-}
-
static void encode_listxattrs(struct xdr_stream *xdr,
const struct nfs42_listxattrsargs *arg,
struct compound_hdr *hdr)
@@ -553,104 +540,6 @@ static void encode_listxattrs(struct xdr_stream *xdr,
*p = cpu_to_be32(arg->count + 8 + 4);
}
-static int decode_listxattrs(struct xdr_stream *xdr,
- struct nfs42_listxattrsres *res)
-{
- int status;
- __be32 *p;
- u32 count, len, ulen;
- size_t left, copied;
- char *buf;
-
- status = decode_op_hdr(xdr, OP_LISTXATTRS);
- if (status) {
- /*
- * Special case: for LISTXATTRS, NFS4ERR_TOOSMALL
- * should be translated to ERANGE.
- */
- if (status == -ETOOSMALL)
- status = -ERANGE;
- /*
- * Special case: for LISTXATTRS, NFS4ERR_NOXATTR
- * should be translated to success with zero-length reply.
- */
- if (status == -ENODATA) {
- res->eof = true;
- status = 0;
- }
- goto out;
- }
-
- p = xdr_inline_decode(xdr, 8);
- if (unlikely(!p))
- return -EIO;
-
- xdr_decode_hyper(p, &res->cookie);
-
- p = xdr_inline_decode(xdr, 4);
- if (unlikely(!p))
- return -EIO;
-
- left = res->xattr_len;
- buf = res->xattr_buf;
-
- count = be32_to_cpup(p);
- copied = 0;
-
- /*
- * We have asked for enough room to encode the maximum number
- * of possible attribute names, so everything should fit.
- *
- * But, don't rely on that assumption. Just decode entries
- * until they don't fit anymore, just in case the server did
- * something odd.
- */
- while (count--) {
- p = xdr_inline_decode(xdr, 4);
- if (unlikely(!p))
- return -EIO;
-
- len = be32_to_cpup(p);
- if (len > (XATTR_NAME_MAX - XATTR_USER_PREFIX_LEN)) {
- status = -ERANGE;
- goto out;
- }
-
- p = xdr_inline_decode(xdr, len);
- if (unlikely(!p))
- return -EIO;
-
- ulen = len + XATTR_USER_PREFIX_LEN + 1;
- if (buf) {
- if (ulen > left) {
- status = -ERANGE;
- goto out;
- }
-
- memcpy(buf, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
- memcpy(buf + XATTR_USER_PREFIX_LEN, p, len);
-
- buf[ulen - 1] = 0;
- buf += ulen;
- left -= ulen;
- }
- copied += ulen;
- }
-
- p = xdr_inline_decode(xdr, 4);
- if (unlikely(!p))
- return -EIO;
-
- res->eof = be32_to_cpup(p);
- res->copied = copied;
-
-out:
- if (status == -ERANGE && res->xattr_len == XATTR_LIST_MAX)
- status = -E2BIG;
-
- return status;
-}
-
/*
* Encode ALLOCATE request
*/
@@ -671,18 +560,6 @@ static void nfs4_xdr_enc_allocate(struct rpc_rqst *req,
encode_nops(&hdr);
}
-static void encode_copy_commit(struct xdr_stream *xdr,
- const struct nfs42_copy_args *args,
- struct compound_hdr *hdr)
-{
- __be32 *p;
-
- encode_op_hdr(xdr, OP_COMMIT, decode_commit_maxsz, hdr);
- p = reserve_space(xdr, 12);
- p = xdr_encode_hyper(p, args->dst_pos);
- *p = cpu_to_be32(args->count);
-}
-
/*
* Encode COPY request
*/
@@ -707,7 +584,7 @@ static void nfs4_xdr_enc_copy(struct rpc_rqst *req,
}
/*
- * Encode OFFLOAD_CANEL request
+ * Encode OFFLOAD_CANCEL request
*/
static void nfs4_xdr_enc_offload_cancel(struct rpc_rqst *req,
struct xdr_stream *xdr,
@@ -726,6 +603,25 @@ static void nfs4_xdr_enc_offload_cancel(struct rpc_rqst *req,
}
/*
+ * Encode OFFLOAD_STATUS request
+ */
+static void nfs4_xdr_enc_offload_status(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const void *data)
+{
+ const struct nfs42_offload_status_args *args = data;
+ struct compound_hdr hdr = {
+ .minorversion = nfs4_xdr_minorversion(&args->osa_seq_args),
+ };
+
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->osa_seq_args, &hdr);
+ encode_putfh(xdr, args->osa_src_fh, &hdr);
+ encode_offload_status(xdr, args, &hdr);
+ encode_nops(&hdr);
+}
+
+/*
* Encode COPY_NOTIFY request
*/
static void nfs4_xdr_enc_copy_notify(struct rpc_rqst *req,
@@ -765,6 +661,27 @@ static void nfs4_xdr_enc_deallocate(struct rpc_rqst *req,
}
/*
+ * Encode ZERO_RANGE request
+ */
+static void nfs4_xdr_enc_zero_range(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const void *data)
+{
+ const struct nfs42_falloc_args *args = data;
+ struct compound_hdr hdr = {
+ .minorversion = nfs4_xdr_minorversion(&args->seq_args),
+ };
+
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->falloc_fh, &hdr);
+ encode_deallocate(xdr, args, &hdr);
+ encode_allocate(xdr, args, &hdr);
+ encode_getfattr(xdr, args->falloc_bitmask, &hdr);
+ encode_nops(&hdr);
+}
+
+/*
* Encode READ_PLUS request
*/
static void nfs4_xdr_enc_read_plus(struct rpc_rqst *req,
@@ -781,8 +698,8 @@ static void nfs4_xdr_enc_read_plus(struct rpc_rqst *req,
encode_putfh(xdr, args->fh, &hdr);
encode_read_plus(xdr, args, &hdr);
- rpc_prepare_reply_pages(req, args->pages, args->pgbase,
- args->count, hdr.replen);
+ rpc_prepare_reply_pages(req, args->pages, args->pgbase, args->count,
+ hdr.replen - READ_PLUS_SEGMENT_SIZE_DIFF);
encode_nops(&hdr);
}
@@ -871,6 +788,90 @@ static void nfs4_xdr_enc_layouterror(struct rpc_rqst *req,
encode_nops(&hdr);
}
+/*
+ * Encode SETXATTR request
+ */
+static void nfs4_xdr_enc_setxattr(struct rpc_rqst *req, struct xdr_stream *xdr,
+ const void *data)
+{
+ const struct nfs42_setxattrargs *args = data;
+ struct compound_hdr hdr = {
+ .minorversion = nfs4_xdr_minorversion(&args->seq_args),
+ };
+
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_setxattr(xdr, args, &hdr);
+ encode_getfattr(xdr, args->bitmask, &hdr);
+ encode_nops(&hdr);
+}
+
+/*
+ * Encode GETXATTR request
+ */
+static void nfs4_xdr_enc_getxattr(struct rpc_rqst *req, struct xdr_stream *xdr,
+ const void *data)
+{
+ const struct nfs42_getxattrargs *args = data;
+ struct compound_hdr hdr = {
+ .minorversion = nfs4_xdr_minorversion(&args->seq_args),
+ };
+ uint32_t replen;
+
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ replen = hdr.replen + op_decode_hdr_maxsz + 1;
+ encode_getxattr(xdr, args->xattr_name, &hdr);
+
+ rpc_prepare_reply_pages(req, args->xattr_pages, 0, args->xattr_len,
+ replen);
+
+ encode_nops(&hdr);
+}
+
+/*
+ * Encode LISTXATTR request
+ */
+static void nfs4_xdr_enc_listxattrs(struct rpc_rqst *req,
+ struct xdr_stream *xdr, const void *data)
+{
+ const struct nfs42_listxattrsargs *args = data;
+ struct compound_hdr hdr = {
+ .minorversion = nfs4_xdr_minorversion(&args->seq_args),
+ };
+ uint32_t replen;
+
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ replen = hdr.replen + op_decode_hdr_maxsz + 2 + 1;
+ encode_listxattrs(xdr, args, &hdr);
+
+ rpc_prepare_reply_pages(req, args->xattr_pages, 0, args->count, replen);
+
+ encode_nops(&hdr);
+}
+
+/*
+ * Encode REMOVEXATTR request
+ */
+static void nfs4_xdr_enc_removexattr(struct rpc_rqst *req,
+ struct xdr_stream *xdr, const void *data)
+{
+ const struct nfs42_removexattrargs *args = data;
+ struct compound_hdr hdr = {
+ .minorversion = nfs4_xdr_minorversion(&args->seq_args),
+ };
+
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_removexattr(xdr, args->xattr_name, &hdr);
+ encode_nops(&hdr);
+}
+
static int decode_allocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res)
{
return decode_op_hdr(xdr, OP_ALLOCATE);
@@ -993,6 +994,26 @@ static int decode_offload_cancel(struct xdr_stream *xdr,
return decode_op_hdr(xdr, OP_OFFLOAD_CANCEL);
}
+static int decode_offload_status(struct xdr_stream *xdr,
+ struct nfs42_offload_status_res *res)
+{
+ ssize_t result;
+ int status;
+
+ status = decode_op_hdr(xdr, OP_OFFLOAD_STATUS);
+ if (status)
+ return status;
+ /* osr_count */
+ if (xdr_stream_decode_u64(xdr, &res->osr_count) < 0)
+ return -EIO;
+ /* osr_complete<1> */
+ result = xdr_stream_decode_uint32_array(xdr, &res->osr_complete, 1);
+ if (result < 0)
+ return -EIO;
+ res->complete_count = result;
+ return 0;
+}
+
static int decode_copy_notify(struct xdr_stream *xdr,
struct nfs42_copy_notify_res *res)
{
@@ -1122,7 +1143,6 @@ static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res)
uint32_t segments;
struct read_plus_segment *segs;
int status, i;
- char scratch_buf[16];
__be32 *p;
status = decode_op_hdr(xdr, OP_READ_PLUS);
@@ -1137,14 +1157,12 @@ static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res)
res->eof = be32_to_cpup(p++);
segments = be32_to_cpup(p++);
if (segments == 0)
- return status;
+ return 0;
segs = kmalloc_array(segments, sizeof(*segs), GFP_KERNEL);
if (!segs)
return -ENOMEM;
- xdr_set_scratch_buffer(xdr, &scratch_buf, sizeof(scratch_buf));
- status = -EIO;
for (i = 0; i < segments; i++) {
status = decode_read_plus_segment(xdr, &segs[i]);
if (status < 0)
@@ -1194,6 +1212,168 @@ static int decode_layouterror(struct xdr_stream *xdr)
return decode_op_hdr(xdr, OP_LAYOUTERROR);
}
+static int decode_setxattr(struct xdr_stream *xdr,
+ struct nfs4_change_info *cinfo)
+{
+ int status;
+
+ status = decode_op_hdr(xdr, OP_SETXATTR);
+ if (status)
+ goto out;
+ status = decode_change_info(xdr, cinfo);
+out:
+ return status;
+}
+
+static int decode_getxattr(struct xdr_stream *xdr,
+ struct nfs42_getxattrres *res,
+ struct rpc_rqst *req)
+{
+ int status;
+ __be32 *p;
+ u32 len, rdlen;
+
+ status = decode_op_hdr(xdr, OP_GETXATTR);
+ if (status)
+ return status;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ return -EIO;
+
+ len = be32_to_cpup(p);
+
+ /*
+ * Only check against the page length here. The actual
+ * requested length may be smaller, but that is only
+ * checked against after possibly caching a valid reply.
+ */
+ if (len > req->rq_rcv_buf.page_len)
+ return -ERANGE;
+
+ res->xattr_len = len;
+
+ if (len > 0) {
+ rdlen = xdr_read_pages(xdr, len);
+ if (rdlen < len)
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int decode_removexattr(struct xdr_stream *xdr,
+ struct nfs4_change_info *cinfo)
+{
+ int status;
+
+ status = decode_op_hdr(xdr, OP_REMOVEXATTR);
+ if (status)
+ goto out;
+
+ status = decode_change_info(xdr, cinfo);
+out:
+ return status;
+}
+
+static int decode_listxattrs(struct xdr_stream *xdr,
+ struct nfs42_listxattrsres *res)
+{
+ int status;
+ __be32 *p;
+ u32 count, len, ulen;
+ size_t left, copied;
+ char *buf;
+
+ status = decode_op_hdr(xdr, OP_LISTXATTRS);
+ if (status) {
+ /*
+ * Special case: for LISTXATTRS, NFS4ERR_TOOSMALL
+ * should be translated to ERANGE.
+ */
+ if (status == -ETOOSMALL)
+ status = -ERANGE;
+ /*
+ * Special case: for LISTXATTRS, NFS4ERR_NOXATTR
+ * should be translated to success with zero-length reply.
+ */
+ if (status == -ENODATA) {
+ res->eof = true;
+ status = 0;
+ }
+ goto out;
+ }
+
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ return -EIO;
+
+ xdr_decode_hyper(p, &res->cookie);
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ return -EIO;
+
+ left = res->xattr_len;
+ buf = res->xattr_buf;
+
+ count = be32_to_cpup(p);
+ copied = 0;
+
+ /*
+ * We have asked for enough room to encode the maximum number
+ * of possible attribute names, so everything should fit.
+ *
+ * But, don't rely on that assumption. Just decode entries
+ * until they don't fit anymore, just in case the server did
+ * something odd.
+ */
+ while (count--) {
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ return -EIO;
+
+ len = be32_to_cpup(p);
+ if (len > (XATTR_NAME_MAX - XATTR_USER_PREFIX_LEN)) {
+ status = -ERANGE;
+ goto out;
+ }
+
+ p = xdr_inline_decode(xdr, len);
+ if (unlikely(!p))
+ return -EIO;
+
+ ulen = len + XATTR_USER_PREFIX_LEN + 1;
+ if (buf) {
+ if (ulen > left) {
+ status = -ERANGE;
+ goto out;
+ }
+
+ memcpy(buf, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
+ memcpy(buf + XATTR_USER_PREFIX_LEN, p, len);
+
+ buf[ulen - 1] = 0;
+ buf += ulen;
+ left -= ulen;
+ }
+ copied += ulen;
+ }
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ return -EIO;
+
+ res->eof = be32_to_cpup(p);
+ res->copied = copied;
+
+out:
+ if (status == -ERANGE && res->xattr_len == XATTR_LIST_MAX)
+ status = -E2BIG;
+
+ return status;
+}
+
/*
* Decode ALLOCATE request
*/
@@ -1284,6 +1464,32 @@ out:
}
/*
+ * Decode OFFLOAD_STATUS response
+ */
+static int nfs4_xdr_dec_offload_status(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ void *data)
+{
+ struct nfs42_offload_status_res *res = data;
+ struct compound_hdr hdr;
+ int status;
+
+ status = decode_compound_hdr(xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_sequence(xdr, &res->osr_seq_res, rqstp);
+ if (status)
+ goto out;
+ status = decode_putfh(xdr);
+ if (status)
+ goto out;
+ status = decode_offload_status(xdr, res);
+
+out:
+ return status;
+}
+
+/*
* Decode COPY_NOTIFY response
*/
static int nfs4_xdr_dec_copy_notify(struct rpc_rqst *rqstp,
@@ -1338,6 +1544,37 @@ out:
}
/*
+ * Decode ZERO_RANGE request
+ */
+static int nfs4_xdr_dec_zero_range(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ void *data)
+{
+ struct nfs42_falloc_res *res = data;
+ struct compound_hdr hdr;
+ int status;
+
+ status = decode_compound_hdr(xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
+ if (status)
+ goto out;
+ status = decode_putfh(xdr);
+ if (status)
+ goto out;
+ status = decode_deallocate(xdr, res);
+ if (status)
+ goto out;
+ status = decode_allocate(xdr, res);
+ if (status)
+ goto out;
+ decode_getfattr(xdr, res->falloc_fattr, res->falloc_server);
+out:
+ return status;
+}
+
+/*
* Decode READ_PLUS request
*/
static int nfs4_xdr_dec_read_plus(struct rpc_rqst *rqstp,
@@ -1348,6 +1585,8 @@ static int nfs4_xdr_dec_read_plus(struct rpc_rqst *rqstp,
struct compound_hdr hdr;
int status;
+ xdr_set_scratch_buffer(xdr, res->scratch, READ_PLUS_SCRATCH_SIZE);
+
status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
@@ -1481,22 +1720,9 @@ out:
return status;
}
-#ifdef CONFIG_NFS_V4_2
-static void nfs4_xdr_enc_setxattr(struct rpc_rqst *req, struct xdr_stream *xdr,
- const void *data)
-{
- const struct nfs42_setxattrargs *args = data;
- struct compound_hdr hdr = {
- .minorversion = nfs4_xdr_minorversion(&args->seq_args),
- };
-
- encode_compound_hdr(xdr, req, &hdr);
- encode_sequence(xdr, &args->seq_args, &hdr);
- encode_putfh(xdr, args->fh, &hdr);
- encode_setxattr(xdr, args, &hdr);
- encode_nops(&hdr);
-}
-
+/*
+ * Decode SETXATTR request
+ */
static int nfs4_xdr_dec_setxattr(struct rpc_rqst *req, struct xdr_stream *xdr,
void *data)
{
@@ -1513,33 +1739,17 @@ static int nfs4_xdr_dec_setxattr(struct rpc_rqst *req, struct xdr_stream *xdr,
status = decode_putfh(xdr);
if (status)
goto out;
-
status = decode_setxattr(xdr, &res->cinfo);
+ if (status)
+ goto out;
+ status = decode_getfattr(xdr, res->fattr, res->server);
out:
return status;
}
-static void nfs4_xdr_enc_getxattr(struct rpc_rqst *req, struct xdr_stream *xdr,
- const void *data)
-{
- const struct nfs42_getxattrargs *args = data;
- struct compound_hdr hdr = {
- .minorversion = nfs4_xdr_minorversion(&args->seq_args),
- };
- uint32_t replen;
-
- encode_compound_hdr(xdr, req, &hdr);
- encode_sequence(xdr, &args->seq_args, &hdr);
- encode_putfh(xdr, args->fh, &hdr);
- replen = hdr.replen + op_decode_hdr_maxsz + 1;
- encode_getxattr(xdr, args->xattr_name, &hdr);
-
- rpc_prepare_reply_pages(req, args->xattr_pages, 0, args->xattr_len,
- replen);
-
- encode_nops(&hdr);
-}
-
+/*
+ * Decode GETXATTR request
+ */
static int nfs4_xdr_dec_getxattr(struct rpc_rqst *rqstp,
struct xdr_stream *xdr, void *data)
{
@@ -1561,26 +1771,9 @@ out:
return status;
}
-static void nfs4_xdr_enc_listxattrs(struct rpc_rqst *req,
- struct xdr_stream *xdr, const void *data)
-{
- const struct nfs42_listxattrsargs *args = data;
- struct compound_hdr hdr = {
- .minorversion = nfs4_xdr_minorversion(&args->seq_args),
- };
- uint32_t replen;
-
- encode_compound_hdr(xdr, req, &hdr);
- encode_sequence(xdr, &args->seq_args, &hdr);
- encode_putfh(xdr, args->fh, &hdr);
- replen = hdr.replen + op_decode_hdr_maxsz + 2 + 1;
- encode_listxattrs(xdr, args, &hdr);
-
- rpc_prepare_reply_pages(req, args->xattr_pages, 0, args->count, replen);
-
- encode_nops(&hdr);
-}
-
+/*
+ * Decode LISTXATTR request
+ */
static int nfs4_xdr_dec_listxattrs(struct rpc_rqst *rqstp,
struct xdr_stream *xdr, void *data)
{
@@ -1588,7 +1781,7 @@ static int nfs4_xdr_dec_listxattrs(struct rpc_rqst *rqstp,
struct compound_hdr hdr;
int status;
- xdr_set_scratch_page(xdr, res->scratch);
+ xdr_set_scratch_folio(xdr, res->scratch);
status = decode_compound_hdr(xdr, &hdr);
if (status)
@@ -1604,21 +1797,9 @@ out:
return status;
}
-static void nfs4_xdr_enc_removexattr(struct rpc_rqst *req,
- struct xdr_stream *xdr, const void *data)
-{
- const struct nfs42_removexattrargs *args = data;
- struct compound_hdr hdr = {
- .minorversion = nfs4_xdr_minorversion(&args->seq_args),
- };
-
- encode_compound_hdr(xdr, req, &hdr);
- encode_sequence(xdr, &args->seq_args, &hdr);
- encode_putfh(xdr, args->fh, &hdr);
- encode_removexattr(xdr, args->xattr_name, &hdr);
- encode_nops(&hdr);
-}
-
+/*
+ * Decode REMOVEXATTR request
+ */
static int nfs4_xdr_dec_removexattr(struct rpc_rqst *req,
struct xdr_stream *xdr, void *data)
{
@@ -1640,5 +1821,4 @@ static int nfs4_xdr_dec_removexattr(struct rpc_rqst *req,
out:
return status;
}
-#endif
#endif /* __LINUX_FS_NFS_NFS4_2XDR_H */
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 5edd1704f735..c34c89af9c7d 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -23,6 +23,7 @@
#define NFS4_MAX_LOOP_ON_RECOVER (10)
#include <linux/seqlock.h>
+#include <linux/filelock.h>
struct idmap;
@@ -62,11 +63,11 @@ struct nfs4_minor_version_ops {
bool (*match_stateid)(const nfs4_stateid *,
const nfs4_stateid *);
int (*find_root_sec)(struct nfs_server *, struct nfs_fh *,
- struct nfs_fsinfo *);
+ struct nfs_fattr *);
void (*free_lock_state)(struct nfs_server *,
struct nfs4_lock_state *);
int (*test_and_free_expired)(struct nfs_server *,
- nfs4_stateid *, const struct cred *);
+ nfs4_stateid *, const struct cred *);
struct nfs_seqid *
(*alloc_seqid)(struct nfs_seqid_counter *, gfp_t);
void (*session_trunk)(struct rpc_clnt *clnt,
@@ -81,7 +82,7 @@ struct nfs4_minor_version_ops {
#define NFS_SEQID_CONFIRMED 1
struct nfs_seqid_counter {
ktime_t create_time;
- int owner_id;
+ u64 owner_id;
int flags;
u32 counter;
spinlock_t lock; /* Protects the list */
@@ -119,7 +120,6 @@ struct nfs4_state_owner {
unsigned long so_flags;
struct list_head so_states;
struct nfs_seqid_counter so_seqid;
- seqcount_spinlock_t so_reclaim_seqcount;
struct mutex so_delegreturn_mutex;
};
@@ -208,6 +208,7 @@ struct nfs4_exception {
struct inode *inode;
nfs4_stateid *stateid;
long timeout;
+ unsigned short retrans;
unsigned char task_is_privileged : 1;
unsigned char delay : 1,
recovering : 1,
@@ -295,7 +296,8 @@ extern int nfs4_call_sync(struct rpc_clnt *, struct nfs_server *,
extern void nfs4_init_sequence(struct nfs4_sequence_args *, struct nfs4_sequence_res *, int, int);
extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, const struct cred *, struct nfs4_setclientid_res *);
extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, const struct cred *);
-extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *, bool);
+extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *,
+ struct nfs_fattr *, bool);
extern int nfs4_proc_bind_conn_to_session(struct nfs_client *, const struct cred *cred);
extern int nfs4_proc_exchange_id(struct nfs_client *clp, const struct cred *cred);
extern int nfs4_destroy_clientid(struct nfs_client *clp);
@@ -314,7 +316,7 @@ extern struct rpc_clnt *nfs4_proc_lookup_mountpoint(struct inode *,
struct nfs_fh *,
struct nfs_fattr *);
extern int nfs4_proc_secinfo(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *);
-extern const struct xattr_handler *nfs4_xattr_handlers[];
+extern const struct xattr_handler * const nfs4_xattr_handlers[];
extern int nfs4_set_rw_stateid(nfs4_stateid *stateid,
const struct nfs_open_context *ctx,
const struct nfs_lock_context *l_ctx,
@@ -327,8 +329,8 @@ extern int update_open_stateid(struct nfs4_state *state,
const nfs4_stateid *open_stateid,
const nfs4_stateid *deleg_stateid,
fmode_t fmode);
-extern int nfs4_proc_setlease(struct file *file, long arg,
- struct file_lock **lease, void **priv);
+extern int nfs4_proc_setlease(struct file *file, int arg,
+ struct file_lease **lease, void **priv);
extern int nfs4_proc_get_lease_time(struct nfs_client *clp,
struct nfs_fsinfo *fsinfo);
extern void nfs4_update_changeattr(struct inode *dir,
@@ -545,6 +547,7 @@ extern unsigned short max_session_slots;
extern unsigned short max_session_cb_slots;
extern unsigned short send_implementation_id;
extern bool recover_lost_locks;
+extern short nfs_delay_retrans;
#define NFS4_CLIENT_ID_UNIQ_LEN (64)
extern char nfs4_client_id_uniquifier[NFS4_CLIENT_ID_UNIQ_LEN];
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index d3051b051a56..3a4baed993c9 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -11,6 +11,7 @@
#include <linux/sunrpc/xprt.h>
#include <linux/sunrpc/bc_xprt.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
+#include <net/handshake.h>
#include "internal.h"
#include "callback.h"
#include "delegation.h"
@@ -18,6 +19,7 @@
#include "nfs4idmap.h"
#include "pnfs.h"
#include "netns.h"
+#include "sysfs.h"
#define NFSDBG_FACILITY NFSDBG_CLIENT
@@ -221,6 +223,7 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init)
clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion];
clp->cl_mig_gen = 1;
+ clp->cl_last_renewal = jiffies;
#if IS_ENABLED(CONFIG_NFS_V4_1)
init_waitqueue_head(&clp->cl_lock_waitq);
#endif
@@ -230,7 +233,10 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init)
__set_bit(NFS_CS_INFINITE_SLOTS, &clp->cl_flags);
__set_bit(NFS_CS_DISCRTRY, &clp->cl_flags);
__set_bit(NFS_CS_NO_RETRANS_TIMEOUT, &clp->cl_flags);
-
+ if (test_bit(NFS_CS_PNFS, &cl_init->init_flags))
+ __set_bit(NFS_CS_PNFS, &clp->cl_flags);
+ if (test_bit(NFS_CS_NETUNREACH_FATAL, &cl_init->init_flags))
+ __set_bit(NFS_CS_NETUNREACH_FATAL, &clp->cl_flags);
/*
* Set up the connection to the server before we add add to the
* global list.
@@ -414,6 +420,8 @@ static void nfs4_add_trunk(struct nfs_client *clp, struct nfs_client *old)
.net = old->cl_net,
.servername = old->cl_hostname,
};
+ int max_connect = test_bit(NFS_CS_PNFS, &clp->cl_flags) ?
+ clp->cl_max_connect : old->cl_max_connect;
if (clp->cl_proto != old->cl_proto)
return;
@@ -427,7 +435,7 @@ static void nfs4_add_trunk(struct nfs_client *clp, struct nfs_client *old)
xprt_args.addrlen = clp_salen;
rpc_clnt_add_xprt(old->cl_rpcclient, &xprt_args,
- rpc_clnt_test_and_add_xprt, NULL);
+ rpc_clnt_test_and_add_xprt, &max_connect);
}
/**
@@ -796,6 +804,7 @@ static void nfs4_destroy_server(struct nfs_server *server)
unset_pnfs_layoutdriver(server);
nfs4_purge_state_owners(server, &freeme);
nfs4_free_state_owners(&freeme);
+ kfree(server->delegation_hash_table);
}
/*
@@ -889,46 +898,40 @@ nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr,
* Set up an NFS4 client
*/
static int nfs4_set_client(struct nfs_server *server,
- const char *hostname,
- const struct sockaddr_storage *addr,
- const size_t addrlen,
- const char *ip_addr,
- int proto, const struct rpc_timeout *timeparms,
- u32 minorversion, unsigned int nconnect,
- unsigned int max_connect,
- struct net *net)
+ struct nfs_client_initdata *cl_init)
{
- struct nfs_client_initdata cl_init = {
- .hostname = hostname,
- .addr = addr,
- .addrlen = addrlen,
- .ip_addr = ip_addr,
- .nfs_mod = &nfs_v4,
- .proto = proto,
- .minorversion = minorversion,
- .net = net,
- .timeparms = timeparms,
- .cred = server->cred,
- };
struct nfs_client *clp;
- if (minorversion == 0)
- __set_bit(NFS_CS_REUSEPORT, &cl_init.init_flags);
- else
- cl_init.max_connect = max_connect;
- if (proto == XPRT_TRANSPORT_TCP)
- cl_init.nconnect = nconnect;
+ cl_init->nfs_mod = &nfs_v4;
+ cl_init->cred = server->cred;
+
+ if (cl_init->minorversion == 0) {
+ __set_bit(NFS_CS_REUSEPORT, &cl_init->init_flags);
+ cl_init->max_connect = 0;
+ }
+
+ switch (cl_init->proto) {
+ case XPRT_TRANSPORT_RDMA:
+ case XPRT_TRANSPORT_TCP:
+ case XPRT_TRANSPORT_TCP_TLS:
+ break;
+ default:
+ cl_init->nconnect = 0;
+ }
if (server->flags & NFS_MOUNT_NORESVPORT)
- __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
+ __set_bit(NFS_CS_NORESVPORT, &cl_init->init_flags);
if (server->options & NFS_OPTION_MIGRATION)
- __set_bit(NFS_CS_MIGRATION, &cl_init.init_flags);
+ __set_bit(NFS_CS_MIGRATION, &cl_init->init_flags);
if (test_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status))
- __set_bit(NFS_CS_TSM_POSSIBLE, &cl_init.init_flags);
- server->port = rpc_get_port((struct sockaddr *)addr);
+ __set_bit(NFS_CS_TSM_POSSIBLE, &cl_init->init_flags);
+ server->port = rpc_get_port((struct sockaddr *)cl_init->addr);
+
+ if (server->flags & NFS_MOUNT_NETUNREACH_FATAL)
+ __set_bit(NFS_CS_NETUNREACH_FATAL, &cl_init->init_flags);
/* Allocate or find a client reference we can use */
- clp = nfs_get_client(&cl_init);
+ clp = nfs_get_client(cl_init);
if (IS_ERR(clp))
return PTR_ERR(clp);
@@ -947,6 +950,9 @@ static int nfs4_set_client(struct nfs_server *server,
set_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state);
server->nfs_client = clp;
+ nfs_sysfs_add_server(server);
+ nfs_sysfs_link_rpc_client(server, clp->cl_rpcclient, "_state");
+
return 0;
}
@@ -978,6 +984,11 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
.net = mds_clp->cl_net,
.timeparms = &ds_timeout,
.cred = mds_srv->cred,
+ .xprtsec = {
+ .policy = RPC_XPRTSEC_NONE,
+ .cert_serial = TLS_NO_CERT,
+ .privkey_serial = TLS_NO_PRIVKEY,
+ },
};
char buf[INET6_ADDRSTRLEN + 1];
@@ -985,14 +996,28 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
return ERR_PTR(-EINVAL);
cl_init.hostname = buf;
- if (mds_clp->cl_nconnect > 1 && ds_proto == XPRT_TRANSPORT_TCP) {
- cl_init.nconnect = mds_clp->cl_nconnect;
- cl_init.max_connect = NFS_MAX_TRANSPORTS;
+ switch (ds_proto) {
+ case XPRT_TRANSPORT_TCP_TLS:
+ if (mds_srv->nfs_client->cl_xprtsec.policy != RPC_XPRTSEC_NONE)
+ cl_init.xprtsec = mds_srv->nfs_client->cl_xprtsec;
+ else
+ ds_proto = XPRT_TRANSPORT_TCP;
+ fallthrough;
+ case XPRT_TRANSPORT_RDMA:
+ case XPRT_TRANSPORT_TCP:
+ if (mds_clp->cl_nconnect > 1) {
+ cl_init.nconnect = mds_clp->cl_nconnect;
+ cl_init.max_connect = NFS_MAX_TRANSPORTS;
+ }
}
if (mds_srv->flags & NFS_MOUNT_NORESVPORT)
__set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
+ if (test_bit(NFS_CS_NETUNREACH_FATAL, &mds_clp->cl_flags))
+ __set_bit(NFS_CS_NETUNREACH_FATAL, &cl_init.init_flags);
+ __set_bit(NFS_CS_PNFS, &cl_init.init_flags);
+ cl_init.max_connect = NFS_MAX_TRANSPORTS;
/*
* Set an authflavor equual to the MDS value. Use the MDS nfs_client
* cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as the MDS
@@ -1060,29 +1085,15 @@ static void nfs4_session_limit_xasize(struct nfs_server *server)
#endif
}
-void nfs4_server_set_init_caps(struct nfs_server *server)
-{
- /* Set the basic capabilities */
- server->caps |= server->nfs_client->cl_mvops->init_caps;
- if (server->flags & NFS_MOUNT_NORDIRPLUS)
- server->caps &= ~NFS_CAP_READDIRPLUS;
- if (server->nfs_client->cl_proto == XPRT_TRANSPORT_RDMA)
- server->caps &= ~NFS_CAP_READ_PLUS;
-
- /*
- * Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower
- * authentication.
- */
- if (nfs4_disable_idmapping &&
- server->client->cl_auth->au_flavor == RPC_AUTH_UNIX)
- server->caps |= NFS_CAP_UIDGID_NOMAP;
-}
-
static int nfs4_server_common_setup(struct nfs_server *server,
struct nfs_fh *mntfh, bool auth_probe)
{
int error;
+ error = nfs4_delegation_hash_alloc(server);
+ if (error)
+ return error;
+
/* data servers support only a subset of NFSv4.1 */
if (is_ds_only_client(server->nfs_client))
return -EPROTONOSUPPORT;
@@ -1090,14 +1101,14 @@ static int nfs4_server_common_setup(struct nfs_server *server,
/* We must ensure the session is initialised first */
error = nfs4_init_session(server->nfs_client);
if (error < 0)
- goto out;
+ return error;
- nfs4_server_set_init_caps(server);
+ nfs_server_set_init_caps(server);
/* Probe the root fh to retrieve its FSID and filehandle */
error = nfs4_get_rootfh(server, mntfh, auth_probe);
if (error < 0)
- goto out;
+ return error;
dprintk("Server FSID: %llx:%llx\n",
(unsigned long long) server->fsid.major,
@@ -1106,7 +1117,7 @@ static int nfs4_server_common_setup(struct nfs_server *server,
error = nfs_probe_server(server, mntfh);
if (error < 0)
- goto out;
+ return error;
nfs4_session_limit_rwsize(server);
nfs4_session_limit_xasize(server);
@@ -1117,8 +1128,7 @@ static int nfs4_server_common_setup(struct nfs_server *server,
nfs_server_insert_lists(server);
server->mount_time = jiffies;
server->destroy = nfs4_destroy_server;
-out:
- return error;
+ return 0;
}
/*
@@ -1128,6 +1138,19 @@ static int nfs4_init_server(struct nfs_server *server, struct fs_context *fc)
{
struct nfs_fs_context *ctx = nfs_fc2context(fc);
struct rpc_timeout timeparms;
+ struct nfs_client_initdata cl_init = {
+ .hostname = ctx->nfs_server.hostname,
+ .addr = &ctx->nfs_server._address,
+ .addrlen = ctx->nfs_server.addrlen,
+ .ip_addr = ctx->client_address,
+ .proto = ctx->nfs_server.protocol,
+ .minorversion = ctx->minorversion,
+ .net = fc->net_ns,
+ .timeparms = &timeparms,
+ .xprtsec = ctx->xprtsec,
+ .nconnect = ctx->nfs_server.nconnect,
+ .max_connect = ctx->nfs_server.max_connect,
+ };
int error;
nfs_init_timeout_values(&timeparms, ctx->nfs_server.protocol,
@@ -1147,17 +1170,7 @@ static int nfs4_init_server(struct nfs_server *server, struct fs_context *fc)
ctx->selected_flavor = RPC_AUTH_UNIX;
/* Get a client record */
- error = nfs4_set_client(server,
- ctx->nfs_server.hostname,
- &ctx->nfs_server._address,
- ctx->nfs_server.addrlen,
- ctx->client_address,
- ctx->nfs_server.protocol,
- &timeparms,
- ctx->minorversion,
- ctx->nfs_server.nconnect,
- ctx->nfs_server.max_connect,
- fc->net_ns);
+ error = nfs4_set_client(server, &cl_init);
if (error < 0)
return error;
@@ -1217,8 +1230,21 @@ error:
struct nfs_server *nfs4_create_referral_server(struct fs_context *fc)
{
struct nfs_fs_context *ctx = nfs_fc2context(fc);
- struct nfs_client *parent_client;
- struct nfs_server *server, *parent_server;
+ struct nfs_server *parent_server = NFS_SB(ctx->clone_data.sb);
+ struct nfs_client *parent_client = parent_server->nfs_client;
+ struct nfs_client_initdata cl_init = {
+ .hostname = ctx->nfs_server.hostname,
+ .addr = &ctx->nfs_server._address,
+ .addrlen = ctx->nfs_server.addrlen,
+ .ip_addr = parent_client->cl_ipaddr,
+ .minorversion = parent_client->cl_mvops->minor_version,
+ .net = parent_client->cl_net,
+ .timeparms = parent_server->client->cl_timeout,
+ .xprtsec = parent_client->cl_xprtsec,
+ .nconnect = parent_client->cl_nconnect,
+ .max_connect = parent_client->cl_max_connect,
+ };
+ struct nfs_server *server;
bool auth_probe;
int error;
@@ -1226,9 +1252,6 @@ struct nfs_server *nfs4_create_referral_server(struct fs_context *fc)
if (!server)
return ERR_PTR(-ENOMEM);
- parent_server = NFS_SB(ctx->clone_data.sb);
- parent_client = parent_server->nfs_client;
-
server->cred = get_cred(parent_server->cred);
/* Initialise the client representation from the parent server */
@@ -1237,33 +1260,17 @@ struct nfs_server *nfs4_create_referral_server(struct fs_context *fc)
/* Get a client representation */
#if IS_ENABLED(CONFIG_SUNRPC_XPRT_RDMA)
rpc_set_port(&ctx->nfs_server.address, NFS_RDMA_PORT);
- error = nfs4_set_client(server,
- ctx->nfs_server.hostname,
- &ctx->nfs_server._address,
- ctx->nfs_server.addrlen,
- parent_client->cl_ipaddr,
- XPRT_TRANSPORT_RDMA,
- parent_server->client->cl_timeout,
- parent_client->cl_mvops->minor_version,
- parent_client->cl_nconnect,
- parent_client->cl_max_connect,
- parent_client->cl_net);
+ cl_init.proto = XPRT_TRANSPORT_RDMA;
+ error = nfs4_set_client(server, &cl_init);
if (!error)
goto init_server;
#endif /* IS_ENABLED(CONFIG_SUNRPC_XPRT_RDMA) */
+ cl_init.proto = XPRT_TRANSPORT_TCP;
+ if (parent_client->cl_xprtsec.policy != RPC_XPRTSEC_NONE)
+ cl_init.proto = XPRT_TRANSPORT_TCP_TLS;
rpc_set_port(&ctx->nfs_server.address, NFS_PORT);
- error = nfs4_set_client(server,
- ctx->nfs_server.hostname,
- &ctx->nfs_server._address,
- ctx->nfs_server.addrlen,
- parent_client->cl_ipaddr,
- XPRT_TRANSPORT_TCP,
- parent_server->client->cl_timeout,
- parent_client->cl_mvops->minor_version,
- parent_client->cl_nconnect,
- parent_client->cl_max_connect,
- parent_client->cl_net);
+ error = nfs4_set_client(server, &cl_init);
if (error < 0)
goto error;
@@ -1314,10 +1321,24 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname,
.dstaddr = (struct sockaddr *)sap,
.addrlen = salen,
.servername = hostname,
+ /* cel: bleh. We might need to pass TLS parameters here */
};
char buf[INET6_ADDRSTRLEN + 1];
struct sockaddr_storage address;
struct sockaddr *localaddr = (struct sockaddr *)&address;
+ struct nfs_client_initdata cl_init = {
+ .hostname = hostname,
+ .addr = sap,
+ .addrlen = salen,
+ .ip_addr = buf,
+ .proto = clp->cl_proto,
+ .minorversion = clp->cl_minorversion,
+ .net = net,
+ .timeparms = clnt->cl_timeout,
+ .xprtsec = clp->cl_xprtsec,
+ .nconnect = clp->cl_nconnect,
+ .max_connect = clp->cl_max_connect,
+ };
int error;
error = rpc_switch_client_transport(clnt, &xargs, clnt->cl_timeout);
@@ -1333,10 +1354,7 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname,
nfs_server_remove_lists(server);
set_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status);
- error = nfs4_set_client(server, hostname, sap, salen, buf,
- clp->cl_proto, clnt->cl_timeout,
- clp->cl_minorversion,
- clp->cl_nconnect, clp->cl_max_connect, net);
+ error = nfs4_set_client(server, &cl_init);
clear_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status);
if (error != 0) {
nfs_server_insert_lists(server);
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 2563ed8580f3..7317f26892c5 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -10,6 +10,7 @@
#include <linux/mount.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_ssc.h>
+#include <linux/splice.h>
#include "delegation.h"
#include "internal.h"
#include "iostat.h"
@@ -195,8 +196,8 @@ static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in,
ret = __nfs4_copy_file_range(file_in, pos_in, file_out, pos_out, count,
flags);
if (ret == -EOPNOTSUPP || ret == -EXDEV)
- ret = generic_copy_file_range(file_in, pos_in, file_out,
- pos_out, count, flags);
+ ret = splice_copy_file_range(file_in, pos_in, file_out,
+ pos_out, count);
return ret;
}
@@ -224,8 +225,14 @@ static long nfs42_fallocate(struct file *filep, int mode, loff_t offset, loff_t
if (!S_ISREG(inode->i_mode))
return -EOPNOTSUPP;
- if ((mode != 0) && (mode != (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE)))
+ switch (mode) {
+ case 0:
+ case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE:
+ case FALLOC_FL_ZERO_RANGE:
+ break;
+ default:
return -EOPNOTSUPP;
+ }
ret = inode_newsize_ok(inode, offset + len);
if (ret < 0)
@@ -233,6 +240,8 @@ static long nfs42_fallocate(struct file *filep, int mode, loff_t offset, loff_t
if (mode & FALLOC_FL_PUNCH_HOLE)
return nfs42_proc_deallocate(filep, offset, len);
+ else if (mode & FALLOC_FL_ZERO_RANGE)
+ return nfs42_proc_zero_range(filep, offset ,len);
return nfs42_proc_allocate(filep, offset, len);
}
@@ -244,7 +253,6 @@ static loff_t nfs42_remap_file_range(struct file *src_file, loff_t src_off,
struct nfs_server *server = NFS_SERVER(dst_inode);
struct inode *src_inode = file_inode(src_file);
unsigned int bs = server->clone_blksize;
- bool same_inode = false;
int ret;
/* NFS does not support deduplication. */
@@ -266,25 +274,15 @@ static loff_t nfs42_remap_file_range(struct file *src_file, loff_t src_off,
goto out;
}
- if (src_inode == dst_inode)
- same_inode = true;
-
/* XXX: do we lock at all? what if server needs CB_RECALL_LAYOUT? */
- if (same_inode) {
- inode_lock(src_inode);
- } else if (dst_inode < src_inode) {
- inode_lock_nested(dst_inode, I_MUTEX_PARENT);
- inode_lock_nested(src_inode, I_MUTEX_CHILD);
- } else {
- inode_lock_nested(src_inode, I_MUTEX_PARENT);
- inode_lock_nested(dst_inode, I_MUTEX_CHILD);
- }
-
+ lock_two_nondirectories(src_inode, dst_inode);
/* flush all pending writes on both src and dst so that server
* has the latest data */
+ nfs_file_block_o_direct(NFS_I(src_inode));
ret = nfs_sync_inode(src_inode);
if (ret)
goto out_unlock;
+ nfs_file_block_o_direct(NFS_I(dst_inode));
ret = nfs_sync_inode(dst_inode);
if (ret)
goto out_unlock;
@@ -297,15 +295,7 @@ static loff_t nfs42_remap_file_range(struct file *src_file, loff_t src_off,
truncate_inode_pages_range(&dst_inode->i_data, dst_off, dst_off + count - 1);
out_unlock:
- if (same_inode) {
- inode_unlock(src_inode);
- } else if (dst_inode < src_inode) {
- inode_unlock(src_inode);
- inode_unlock(dst_inode);
- } else {
- inode_unlock(dst_inode);
- inode_unlock(src_inode);
- }
+ unlock_two_nondirectories(src_inode, dst_inode);
out:
return ret < 0 ? ret : count;
}
@@ -438,23 +428,25 @@ void nfs42_ssc_unregister_ops(void)
}
#endif /* CONFIG_NFS_V4_2 */
-static int nfs4_setlease(struct file *file, long arg, struct file_lock **lease,
+static int nfs4_setlease(struct file *file, int arg, struct file_lease **lease,
void **priv)
{
+ if (!S_ISREG(file_inode(file)->i_mode))
+ return -EINVAL;
return nfs4_proc_setlease(file, arg, lease, priv);
}
const struct file_operations nfs4_file_operations = {
.read_iter = nfs_file_read,
.write_iter = nfs_file_write,
- .mmap = nfs_file_mmap,
+ .mmap_prepare = nfs_file_mmap_prepare,
.open = nfs4_file_open,
.flush = nfs4_file_flush,
.release = nfs_file_release,
.fsync = nfs_file_fsync,
.lock = nfs_lock,
.flock = nfs_flock,
- .splice_read = generic_file_splice_read,
+ .splice_read = nfs_file_splice_read,
.splice_write = iter_file_splice_write,
.check_flags = nfs_check_flags,
.setlease = nfs4_setlease,
@@ -466,4 +458,5 @@ const struct file_operations nfs4_file_operations = {
#else
.llseek = nfs_file_llseek,
#endif
+ .fop_flags = FOP_DONTCACHE,
};
diff --git a/fs/nfs/nfs4getroot.c b/fs/nfs/nfs4getroot.c
index 1a69479a3a59..e67ea345de69 100644
--- a/fs/nfs/nfs4getroot.c
+++ b/fs/nfs/nfs4getroot.c
@@ -12,30 +12,28 @@
int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool auth_probe)
{
- struct nfs_fsinfo fsinfo;
+ struct nfs_fattr *fattr = nfs_alloc_fattr();
int ret = -ENOMEM;
- fsinfo.fattr = nfs_alloc_fattr();
- if (fsinfo.fattr == NULL)
+ if (fattr == NULL)
goto out;
/* Start by getting the root filehandle from the server */
- ret = nfs4_proc_get_rootfh(server, mntfh, &fsinfo, auth_probe);
+ ret = nfs4_proc_get_rootfh(server, mntfh, fattr, auth_probe);
if (ret < 0) {
dprintk("nfs4_get_rootfh: getroot error = %d\n", -ret);
goto out;
}
- if (!(fsinfo.fattr->valid & NFS_ATTR_FATTR_TYPE)
- || !S_ISDIR(fsinfo.fattr->mode)) {
+ if (!(fattr->valid & NFS_ATTR_FATTR_TYPE) || !S_ISDIR(fattr->mode)) {
printk(KERN_ERR "nfs4_get_rootfh:"
" getroot encountered non-directory\n");
ret = -ENOTDIR;
goto out;
}
- memcpy(&server->fsid, &fsinfo.fattr->fsid, sizeof(server->fsid));
+ memcpy(&server->fsid, &fattr->fsid, sizeof(server->fsid));
out:
- nfs_free_fattr(fsinfo.fattr);
+ nfs_free_fattr(fattr);
return ret;
}
diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c
index 25a7c771cfd8..9e1c48c5c0b8 100644
--- a/fs/nfs/nfs4idmap.c
+++ b/fs/nfs/nfs4idmap.c
@@ -306,15 +306,12 @@ static ssize_t nfs_idmap_get_key(const char *name, size_t namelen,
const char *type, void *data,
size_t data_size, struct idmap *idmap)
{
- const struct cred *saved_cred;
struct key *rkey;
const struct user_key_payload *payload;
ssize_t ret;
- saved_cred = override_creds(id_resolver_cache);
- rkey = nfs_idmap_request_key(name, namelen, type, idmap);
- revert_creds(saved_cred);
-
+ scoped_with_creds(id_resolver_cache)
+ rkey = nfs_idmap_request_key(name, namelen, type, idmap);
if (IS_ERR(rkey)) {
ret = PTR_ERR(rkey);
goto out;
@@ -424,26 +421,16 @@ static void nfs_idmap_pipe_destroy(struct dentry *dir,
struct rpc_pipe_dir_object *pdo)
{
struct idmap *idmap = pdo->pdo_data;
- struct rpc_pipe *pipe = idmap->idmap_pipe;
- if (pipe->dentry) {
- rpc_unlink(pipe->dentry);
- pipe->dentry = NULL;
- }
+ rpc_unlink(idmap->idmap_pipe);
}
static int nfs_idmap_pipe_create(struct dentry *dir,
struct rpc_pipe_dir_object *pdo)
{
struct idmap *idmap = pdo->pdo_data;
- struct rpc_pipe *pipe = idmap->idmap_pipe;
- struct dentry *dentry;
- dentry = rpc_mkpipe_dentry(dir, "idmap", idmap, pipe);
- if (IS_ERR(dentry))
- return PTR_ERR(dentry);
- pipe->dentry = dentry;
- return 0;
+ return rpc_mkpipe_dentry(dir, "idmap", idmap, idmap->idmap_pipe);
}
static const struct rpc_pipe_dir_object_ops nfs_idmap_pipe_dir_object_ops = {
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 40d749f29ed3..93c6ce04332b 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -103,10 +103,10 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
const struct cred *cred,
struct nfs4_slot *slot,
bool is_privileged);
-static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *,
- const struct cred *);
-static int nfs41_free_stateid(struct nfs_server *, const nfs4_stateid *,
- const struct cred *, bool);
+static int nfs41_test_stateid(struct nfs_server *, const nfs4_stateid *,
+ const struct cred *);
+static int nfs41_free_stateid(struct nfs_server *, nfs4_stateid *,
+ const struct cred *, bool);
#endif
#ifdef CONFIG_NFS_V4_SECURITY_LABEL
@@ -114,6 +114,7 @@ static inline struct nfs4_label *
nfs4_label_init_security(struct inode *dir, struct dentry *dentry,
struct iattr *sattr, struct nfs4_label *label)
{
+ struct lsm_context shim;
int err;
if (label == NULL)
@@ -128,18 +129,26 @@ nfs4_label_init_security(struct inode *dir, struct dentry *dentry,
label->label = NULL;
err = security_dentry_init_security(dentry, sattr->ia_mode,
- &dentry->d_name, NULL,
- (void **)&label->label, &label->len);
- if (err == 0)
- return label;
+ &dentry->d_name, NULL, &shim);
+ if (err)
+ return NULL;
- return NULL;
+ label->lsmid = shim.id;
+ label->label = shim.context;
+ label->len = shim.len;
+ return label;
}
static inline void
nfs4_label_release_security(struct nfs4_label *label)
{
- if (label)
- security_release_secctx(label->label, label->len);
+ struct lsm_context shim;
+
+ if (label) {
+ shim.context = label->label;
+ shim.len = label->len;
+ shim.id = label->lsmid;
+ security_release_secctx(&shim);
+ }
}
static inline u32 *nfs4_bitmask(struct nfs_server *server, struct nfs4_label *label)
{
@@ -170,6 +179,7 @@ static int nfs4_map_errors(int err)
case -NFS4ERR_RESOURCE:
case -NFS4ERR_LAYOUTTRYLATER:
case -NFS4ERR_RECALLCONFLICT:
+ case -NFS4ERR_RETURNCONFLICT:
return -EREMOTEIO;
case -NFS4ERR_WRONGSEC:
case -NFS4ERR_WRONG_CRED:
@@ -185,6 +195,9 @@ static int nfs4_map_errors(int err)
return -EBUSY;
case -NFS4ERR_NOT_SAME:
return -ENOTSYNC;
+ case -ENETDOWN:
+ case -ENETUNREACH:
+ break;
default:
dprintk("%s could not handle NFSv4 error %d\n",
__func__, -err);
@@ -209,6 +222,7 @@ const u32 nfs4_fattr_bitmap[3] = {
| FATTR4_WORD1_RAWDEV
| FATTR4_WORD1_SPACE_USED
| FATTR4_WORD1_TIME_ACCESS
+ | FATTR4_WORD1_TIME_CREATE
| FATTR4_WORD1_TIME_METADATA
| FATTR4_WORD1_TIME_MODIFY
| FATTR4_WORD1_MOUNTED_ON_FILEID,
@@ -230,6 +244,7 @@ static const u32 nfs4_pnfs_open_bitmap[3] = {
| FATTR4_WORD1_RAWDEV
| FATTR4_WORD1_SPACE_USED
| FATTR4_WORD1_TIME_ACCESS
+ | FATTR4_WORD1_TIME_CREATE
| FATTR4_WORD1_TIME_METADATA
| FATTR4_WORD1_TIME_MODIFY,
FATTR4_WORD2_MDSTHRESHOLD
@@ -292,7 +307,7 @@ static void nfs4_bitmap_copy_adjust(__u32 *dst, const __u32 *src,
unsigned long cache_validity;
memcpy(dst, src, NFS4_BITMASK_SZ*sizeof(*dst));
- if (!inode || !nfs4_have_delegation(inode, FMODE_READ))
+ if (!inode || !nfs_have_read_or_write_delegation(inode))
return;
cache_validity = READ_ONCE(NFS_I(inode)->cache_validity) | flags;
@@ -309,6 +324,21 @@ static void nfs4_bitmap_copy_adjust(__u32 *dst, const __u32 *src,
dst[1] &= ~FATTR4_WORD1_MODE;
if (!(cache_validity & NFS_INO_INVALID_OTHER))
dst[1] &= ~(FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP);
+
+ if (!(cache_validity & NFS_INO_INVALID_BTIME))
+ dst[1] &= ~FATTR4_WORD1_TIME_CREATE;
+
+ if (nfs_have_delegated_mtime(inode)) {
+ if (!(cache_validity & NFS_INO_INVALID_ATIME))
+ dst[1] &= ~(FATTR4_WORD1_TIME_ACCESS|FATTR4_WORD1_TIME_ACCESS_SET);
+ if (!(cache_validity & NFS_INO_INVALID_MTIME))
+ dst[1] &= ~(FATTR4_WORD1_TIME_MODIFY|FATTR4_WORD1_TIME_MODIFY_SET);
+ if (!(cache_validity & NFS_INO_INVALID_CTIME))
+ dst[1] &= ~(FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY_SET);
+ } else if (nfs_have_delegated_atime(inode)) {
+ if (!(cache_validity & NFS_INO_INVALID_ATIME))
+ dst[1] &= ~(FATTR4_WORD1_TIME_ACCESS|FATTR4_WORD1_TIME_ACCESS_SET);
+ }
}
static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dentry,
@@ -361,7 +391,9 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent
*p++ = htonl(attrs); /* bitmap */
*p++ = htonl(12); /* attribute buffer length */
*p++ = htonl(NF4DIR);
+ spin_lock(&dentry->d_lock);
p = xdr_encode_hyper(p, NFS_FILEID(d_inode(dentry->d_parent)));
+ spin_unlock(&dentry->d_lock);
readdir->pgbase = (char *)p - (char *)start;
readdir->count -= readdir->pgbase;
@@ -421,6 +453,8 @@ static int nfs4_delay_killable(long *timeout)
{
might_sleep();
+ if (unlikely(nfs_current_task_exiting()))
+ return -EINTR;
__set_current_state(TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
schedule_timeout(nfs4_update_delay(timeout));
if (!__fatal_signal_pending(current))
@@ -432,6 +466,8 @@ static int nfs4_delay_interruptible(long *timeout)
{
might_sleep();
+ if (unlikely(nfs_current_task_exiting()))
+ return -EINTR;
__set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE_UNSAFE);
schedule_timeout(nfs4_update_delay(timeout));
if (!signal_pending(current))
@@ -558,6 +594,7 @@ static int nfs4_do_handle_exception(struct nfs_server *server,
case -NFS4ERR_GRACE:
case -NFS4ERR_LAYOUTTRYLATER:
case -NFS4ERR_RECALLCONFLICT:
+ case -NFS4ERR_RETURNCONFLICT:
exception->delay = 1;
return 0;
@@ -585,6 +622,21 @@ wait_on_recovery:
return 0;
}
+/*
+ * Track the number of NFS4ERR_DELAY related retransmissions and return
+ * EAGAIN if the 'softerr' mount option is set, and we've exceeded the limit
+ * set by 'nfs_delay_retrans'.
+ */
+static int nfs4_exception_should_retrans(const struct nfs_server *server,
+ struct nfs4_exception *exception)
+{
+ if (server->flags & NFS_MOUNT_SOFTERR && nfs_delay_retrans >= 0) {
+ if (exception->retrans++ >= (unsigned short)nfs_delay_retrans)
+ return -EAGAIN;
+ }
+ return 0;
+}
+
/* This is the error handling routine for processes that are allowed
* to sleep.
*/
@@ -595,6 +647,11 @@ int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_
ret = nfs4_do_handle_exception(server, errorcode, exception);
if (exception->delay) {
+ int ret2 = nfs4_exception_should_retrans(server, exception);
+ if (ret2 < 0) {
+ exception->retry = 0;
+ return ret2;
+ }
ret = nfs4_delay(&exception->timeout,
exception->interruptible);
goto out_retry;
@@ -621,8 +678,22 @@ nfs4_async_handle_exception(struct rpc_task *task, struct nfs_server *server,
struct nfs_client *clp = server->nfs_client;
int ret;
+ if ((task->tk_rpc_status == -ENETDOWN ||
+ task->tk_rpc_status == -ENETUNREACH) &&
+ task->tk_flags & RPC_TASK_NETUNREACH_FATAL) {
+ exception->delay = 0;
+ exception->recovering = 0;
+ exception->retry = 0;
+ return -EIO;
+ }
+
ret = nfs4_do_handle_exception(server, errorcode, exception);
if (exception->delay) {
+ int ret2 = nfs4_exception_should_retrans(server, exception);
+ if (ret2 < 0) {
+ exception->retry = 0;
+ return ret2;
+ }
rpc_delay(task, nfs4_update_delay(&exception->timeout));
goto out_retry;
}
@@ -921,6 +992,7 @@ out:
out_noaction:
return ret;
session_recover:
+ set_bit(NFS4_SLOT_TBL_DRAINING, &session->fc_slot_table.slot_tbl_state);
nfs4_schedule_session_recovery(session, status);
dprintk("%s ERROR: %d Reset session\n", __func__, status);
nfs41_sequence_free_slot(res);
@@ -1217,7 +1289,8 @@ nfs4_update_changeattr_locked(struct inode *inode,
struct nfs_inode *nfsi = NFS_I(inode);
u64 change_attr = inode_peek_iversion_raw(inode);
- cache_validity |= NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME;
+ if (!nfs_have_delegated_mtime(inode))
+ cache_validity |= NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME;
if (S_ISDIR(inode->i_mode))
cache_validity |= NFS_INO_INVALID_DATA;
@@ -1236,12 +1309,13 @@ nfs4_update_changeattr_locked(struct inode *inode,
if (S_ISDIR(inode->i_mode))
nfs_force_lookup_revalidate(inode);
- if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
+ if (!nfs_have_delegated_attributes(inode))
cache_validity |=
NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL |
NFS_INO_INVALID_SIZE | NFS_INO_INVALID_OTHER |
NFS_INO_INVALID_BLOCKS | NFS_INO_INVALID_NLINK |
- NFS_INO_INVALID_MODE | NFS_INO_INVALID_XATTR;
+ NFS_INO_INVALID_MODE | NFS_INO_INVALID_BTIME |
+ NFS_INO_INVALID_XATTR;
nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
}
nfsi->attrtimeo_timestamp = jiffies;
@@ -1292,8 +1366,7 @@ static fmode_t _nfs4_ctx_to_openmode(const struct nfs_open_context *ctx)
}
static u32
-nfs4_map_atomic_open_share(struct nfs_server *server,
- fmode_t fmode, int openflags)
+nfs4_fmode_to_share_access(fmode_t fmode)
{
u32 res = 0;
@@ -1307,11 +1380,27 @@ nfs4_map_atomic_open_share(struct nfs_server *server,
case FMODE_READ|FMODE_WRITE:
res = NFS4_SHARE_ACCESS_BOTH;
}
+ return res;
+}
+
+static u32
+nfs4_map_atomic_open_share(struct nfs_server *server,
+ fmode_t fmode, int openflags)
+{
+ u32 res = nfs4_fmode_to_share_access(fmode);
+
if (!(server->caps & NFS_CAP_ATOMIC_OPEN_V1))
goto out;
/* Want no delegation if we're using O_DIRECT */
- if (openflags & O_DIRECT)
+ if (openflags & O_DIRECT) {
res |= NFS4_SHARE_WANT_NO_DELEG;
+ goto out;
+ }
+ /* res |= NFS4_SHARE_WANT_NO_PREFERENCE; */
+ if (server->caps & NFS_CAP_DELEGTIME)
+ res |= NFS4_SHARE_WANT_DELEG_TIMESTAMPS;
+ if (server->caps & NFS_CAP_OPEN_XOR)
+ res |= NFS4_SHARE_WANT_OPEN_XOR_DELEGATION;
out:
return res;
}
@@ -1709,7 +1798,8 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state,
rcu_read_unlock();
trace_nfs4_open_stateid_update_wait(state->inode, stateid, 0);
- if (!fatal_signal_pending(current)) {
+ if (!fatal_signal_pending(current) &&
+ !nfs_current_task_exiting()) {
if (schedule_timeout(5*HZ) == 0)
status = -EAGAIN;
else
@@ -1926,44 +2016,41 @@ out_return_state:
}
static void
-nfs4_opendata_check_deleg(struct nfs4_opendata *data, struct nfs4_state *state)
-{
- struct nfs_client *clp = NFS_SERVER(state->inode)->nfs_client;
- struct nfs_delegation *delegation;
- int delegation_flags = 0;
-
- rcu_read_lock();
- delegation = rcu_dereference(NFS_I(state->inode)->delegation);
- if (delegation)
- delegation_flags = delegation->flags;
- rcu_read_unlock();
- switch (data->o_arg.claim) {
- default:
+nfs4_process_delegation(struct inode *inode, const struct cred *cred,
+ enum open_claim_type4 claim,
+ const struct nfs4_open_delegation *delegation)
+{
+ switch (delegation->open_delegation_type) {
+ case NFS4_OPEN_DELEGATE_READ:
+ case NFS4_OPEN_DELEGATE_WRITE:
+ case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG:
+ case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG:
break;
+ default:
+ return;
+ }
+ switch (claim) {
case NFS4_OPEN_CLAIM_DELEGATE_CUR:
case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
pr_err_ratelimited("NFS: Broken NFSv4 server %s is "
"returning a delegation for "
"OPEN(CLAIM_DELEGATE_CUR)\n",
- clp->cl_hostname);
- return;
+ NFS_SERVER(inode)->nfs_client->cl_hostname);
+ break;
+ case NFS4_OPEN_CLAIM_PREVIOUS:
+ nfs_inode_reclaim_delegation(inode, cred, delegation->type,
+ &delegation->stateid,
+ delegation->pagemod_limit,
+ delegation->open_delegation_type);
+ break;
+ default:
+ nfs_inode_set_delegation(inode, cred, delegation->type,
+ &delegation->stateid,
+ delegation->pagemod_limit,
+ delegation->open_delegation_type);
}
- if ((delegation_flags & 1UL<<NFS_DELEGATION_NEED_RECLAIM) == 0)
- nfs_inode_set_delegation(state->inode,
- data->owner->so_cred,
- data->o_res.delegation_type,
- &data->o_res.delegation,
- data->o_res.pagemod_limit);
- else
- nfs_inode_reclaim_delegation(state->inode,
- data->owner->so_cred,
- data->o_res.delegation_type,
- &data->o_res.delegation,
- data->o_res.pagemod_limit);
-
- if (data->o_res.do_recall)
- nfs_async_inode_return_delegation(state->inode,
- &data->o_res.delegation);
+ if (delegation->do_recall)
+ nfs_async_inode_return_delegation(inode, &delegation->stateid);
}
/*
@@ -1980,19 +2067,23 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data)
if (!data->rpc_done) {
if (data->rpc_status)
return ERR_PTR(data->rpc_status);
- /* cached opens have already been processed */
- goto update;
+ return nfs4_try_open_cached(data);
}
ret = nfs_refresh_inode(inode, &data->f_attr);
if (ret)
return ERR_PTR(ret);
- if (data->o_res.delegation_type != 0)
- nfs4_opendata_check_deleg(data, state);
-update:
- if (!update_open_stateid(state, &data->o_res.stateid,
- NULL, data->o_arg.fmode))
+ nfs4_process_delegation(state->inode,
+ data->owner->so_cred,
+ data->o_arg.claim,
+ &data->o_res.delegation);
+
+ if (!(data->o_res.rflags & NFS4_OPEN_RESULT_NO_OPEN_STATEID)) {
+ if (!update_open_stateid(state, &data->o_res.stateid,
+ NULL, data->o_arg.fmode))
+ return ERR_PTR(-EAGAIN);
+ } else if (!update_open_stateid(state, NULL, NULL, data->o_arg.fmode))
return ERR_PTR(-EAGAIN);
refcount_inc(&state->count);
@@ -2056,10 +2147,18 @@ _nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data)
if (IS_ERR(state))
goto out;
- if (data->o_res.delegation_type != 0)
- nfs4_opendata_check_deleg(data, state);
- if (!update_open_stateid(state, &data->o_res.stateid,
- NULL, data->o_arg.fmode)) {
+ nfs4_process_delegation(state->inode,
+ data->owner->so_cred,
+ data->o_arg.claim,
+ &data->o_res.delegation);
+
+ if (!(data->o_res.rflags & NFS4_OPEN_RESULT_NO_OPEN_STATEID)) {
+ if (!update_open_stateid(state, &data->o_res.stateid,
+ NULL, data->o_arg.fmode)) {
+ nfs4_put_open_state(state);
+ state = ERR_PTR(-EAGAIN);
+ }
+ } else if (!update_open_stateid(state, NULL, NULL, data->o_arg.fmode)) {
nfs4_put_open_state(state);
state = ERR_PTR(-EAGAIN);
}
@@ -2195,7 +2294,7 @@ static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state
{
struct nfs_delegation *delegation;
struct nfs4_opendata *opendata;
- fmode_t delegation_type = 0;
+ u32 delegation_type = NFS4_OPEN_DELEGATE_NONE;
int status;
opendata = nfs4_open_recoverdata_alloc(ctx, state,
@@ -2204,8 +2303,20 @@ static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state
return PTR_ERR(opendata);
rcu_read_lock();
delegation = rcu_dereference(NFS_I(state->inode)->delegation);
- if (delegation != NULL && test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) != 0)
- delegation_type = delegation->type;
+ if (delegation != NULL && test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) != 0) {
+ switch(delegation->type) {
+ case FMODE_READ:
+ delegation_type = NFS4_OPEN_DELEGATE_READ;
+ if (test_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags))
+ delegation_type = NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG;
+ break;
+ case FMODE_WRITE:
+ case FMODE_READ|FMODE_WRITE:
+ delegation_type = NFS4_OPEN_DELEGATE_WRITE;
+ if (test_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags))
+ delegation_type = NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG;
+ }
+ }
rcu_read_unlock();
opendata->o_arg.u.delegation_type = delegation_type;
status = nfs4_open_recover(opendata, state);
@@ -2526,12 +2637,14 @@ static void nfs4_open_release(void *calldata)
struct nfs4_opendata *data = calldata;
struct nfs4_state *state = NULL;
+ /* In case of error, no cleanup! */
+ if (data->rpc_status != 0 || !data->rpc_done) {
+ nfs_release_seqid(data->o_arg.seqid);
+ goto out_free;
+ }
/* If this request hasn't been cancelled, do nothing */
if (!data->cancelled)
goto out_free;
- /* In case of error, no cleanup! */
- if (data->rpc_status != 0 || !data->rpc_done)
- goto out_free;
/* In case we need an open_confirm, no cleanup! */
if (data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM)
goto out_free;
@@ -2703,8 +2816,12 @@ static int _nfs4_proc_open(struct nfs4_opendata *data,
return status;
}
if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) {
+ struct nfs_fh *fh = &o_res->fh;
+
nfs4_sequence_free_slot(&o_res->seq_res);
- nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr, NULL);
+ if (o_arg->claim == NFS4_OPEN_CLAIM_FH)
+ fh = NFS_FH(d_inode(data->dentry));
+ nfs4_proc_getattr(server, fh, o_res->f_attr, NULL);
}
return 0;
}
@@ -2794,16 +2911,14 @@ static int nfs40_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st
}
static int nfs40_test_and_free_expired_stateid(struct nfs_server *server,
- nfs4_stateid *stateid,
- const struct cred *cred)
+ nfs4_stateid *stateid, const struct cred *cred)
{
return -NFS4ERR_BAD_STATEID;
}
#if defined(CONFIG_NFS_V4_1)
static int nfs41_test_and_free_expired_stateid(struct nfs_server *server,
- nfs4_stateid *stateid,
- const struct cred *cred)
+ nfs4_stateid *stateid, const struct cred *cred)
{
int status;
@@ -2812,6 +2927,7 @@ static int nfs41_test_and_free_expired_stateid(struct nfs_server *server,
break;
case NFS4_INVALID_STATEID_TYPE:
case NFS4_SPECIAL_STATEID_TYPE:
+ case NFS4_FREED_STATEID_TYPE:
return -NFS4ERR_BAD_STATEID;
case NFS4_REVOKED_STATEID_TYPE:
goto out_free;
@@ -3038,10 +3154,8 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
fmode_t acc_mode = _nfs4_ctx_to_accessmode(ctx);
struct inode *dir = d_inode(opendata->dir);
unsigned long dir_verifier;
- unsigned int seq;
int ret;
- seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
dir_verifier = nfs_save_change_attribute(dir);
ret = _nfs4_proc_open(opendata, ctx);
@@ -3064,9 +3178,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
if (d_really_is_negative(dentry)) {
struct dentry *alias;
d_drop(dentry);
- alias = d_exact_alias(dentry, state->inode);
- if (!alias)
- alias = d_splice_alias(igrab(state->inode), dentry);
+ alias = d_splice_alias(igrab(state->inode), dentry);
/* d_splice_alias() can't fail here - it's a non-directory */
if (alias) {
dput(ctx->dentry);
@@ -3082,7 +3194,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
case NFS4_OPEN_CLAIM_DELEGATE_PREV:
if (!opendata->rpc_done)
break;
- if (opendata->o_res.delegation_type != 0)
+ if (opendata->o_res.delegation.type != 0)
dir_verifier = nfs_save_change_attribute(dir);
nfs_set_verifier(dentry, dir_verifier);
}
@@ -3094,11 +3206,8 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
if (ret != 0)
goto out;
- if (d_inode(dentry) == state->inode) {
+ if (d_inode(dentry) == state->inode)
nfs_inode_attach_open_context(ctx);
- if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
- nfs4_schedule_stateid_recovery(server, state);
- }
out:
if (!opendata->cancelled) {
@@ -3368,13 +3477,18 @@ static int nfs4_do_setattr(struct inode *inode, const struct cred *cred,
.inode = inode,
.stateid = &arg.stateid,
};
- unsigned long adjust_flags = NFS_INO_INVALID_CHANGE;
+ unsigned long adjust_flags = NFS_INO_INVALID_CHANGE |
+ NFS_INO_INVALID_CTIME;
int err;
if (sattr->ia_valid & (ATTR_MODE | ATTR_KILL_SUID | ATTR_KILL_SGID))
adjust_flags |= NFS_INO_INVALID_MODE;
if (sattr->ia_valid & (ATTR_UID | ATTR_GID))
adjust_flags |= NFS_INO_INVALID_OTHER;
+ if (sattr->ia_valid & ATTR_ATIME)
+ adjust_flags |= NFS_INO_INVALID_ATIME;
+ if (sattr->ia_valid & ATTR_MTIME)
+ adjust_flags |= NFS_INO_INVALID_MTIME;
do {
nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, fattr->label),
@@ -3486,7 +3600,7 @@ static bool nfs4_refresh_open_old_stateid(nfs4_stateid *dst,
write_sequnlock(&state->seqlock);
trace_nfs4_close_stateid_update_wait(state->inode, dst, 0);
- if (fatal_signal_pending(current))
+ if (fatal_signal_pending(current) || nfs_current_task_exiting())
status = -EINTR;
else
if (schedule_timeout(5*HZ) != 0)
@@ -3522,6 +3636,7 @@ struct nfs4_closedata {
} lr;
struct nfs_fattr fattr;
unsigned long timestamp;
+ unsigned short retrans;
};
static void nfs4_free_closedata(void *data)
@@ -3550,6 +3665,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
.state = state,
.inode = calldata->inode,
.stateid = &calldata->arg.stateid,
+ .retrans = calldata->retrans,
};
if (!nfs4_sequence_done(task, &calldata->res.seq_res))
@@ -3597,6 +3713,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
default:
task->tk_status = nfs4_async_handle_exception(task,
server, task->tk_status, &exception);
+ calldata->retrans = exception.retrans;
if (exception.retry)
goto out_restart;
}
@@ -3674,7 +3791,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
if (calldata->arg.fmode == 0 || calldata->arg.fmode == FMODE_READ) {
/* Close-to-open cache consistency revalidation */
- if (!nfs4_have_delegation(inode, FMODE_READ)) {
+ if (!nfs4_have_delegation(inode, FMODE_READ, 0)) {
nfs4_bitmask_set(calldata->arg.bitmask_store,
server->cache_consistency_bitmask,
inode, 0);
@@ -3684,8 +3801,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
}
calldata->arg.share_access =
- nfs4_map_atomic_open_share(NFS_SERVER(inode),
- calldata->arg.fmode, 0);
+ nfs4_fmode_to_share_access(calldata->arg.fmode);
if (calldata->res.fattr == NULL)
calldata->arg.bitmask = NULL;
@@ -3816,8 +3932,11 @@ nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx,
static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync)
{
+ struct dentry *dentry = ctx->dentry;
if (ctx->state == NULL)
return;
+ if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
+ nfs4_inode_set_return_delegation_on_close(d_inode(dentry));
if (is_sync)
nfs4_close_sync(ctx->state, _nfs4_ctx_to_openmode(ctx));
else
@@ -3826,11 +3945,26 @@ static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync)
#define FATTR4_WORD1_NFS40_MASK (2*FATTR4_WORD1_MOUNTED_ON_FILEID - 1UL)
#define FATTR4_WORD2_NFS41_MASK (2*FATTR4_WORD2_SUPPATTR_EXCLCREAT - 1UL)
-#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_XATTR_SUPPORT - 1UL)
+#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_OPEN_ARGUMENTS - 1UL)
+
+#define FATTR4_WORD2_NFS42_TIME_DELEG_MASK \
+ (FATTR4_WORD2_TIME_DELEG_MODIFY|FATTR4_WORD2_TIME_DELEG_ACCESS)
+static bool nfs4_server_delegtime_capable(struct nfs4_server_caps_res *res)
+{
+ u32 share_access_want = res->open_caps.oa_share_access_want[0];
+ u32 attr_bitmask = res->attr_bitmask[2];
+
+ return (share_access_want & NFS4_SHARE_WANT_DELEG_TIMESTAMPS) &&
+ ((attr_bitmask & FATTR4_WORD2_NFS42_TIME_DELEG_MASK) ==
+ FATTR4_WORD2_NFS42_TIME_DELEG_MASK);
+}
static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
{
- u32 bitmask[3] = {}, minorversion = server->nfs_client->cl_minorversion;
+ u32 minorversion = server->nfs_client->cl_minorversion;
+ u32 bitmask[3] = {
+ [0] = FATTR4_WORD0_SUPPORTED_ATTRS,
+ };
struct nfs4_server_caps_arg args = {
.fhandle = fhandle,
.bitmask = bitmask,
@@ -3853,9 +3987,19 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
FATTR4_WORD0_CASE_PRESERVING;
if (minorversion)
bitmask[2] = FATTR4_WORD2_SUPPATTR_EXCLCREAT;
+ if (minorversion > 1)
+ bitmask[2] |= FATTR4_WORD2_OPEN_ARGUMENTS;
status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
if (status == 0) {
+ bitmask[0] = (FATTR4_WORD0_SUPPORTED_ATTRS |
+ FATTR4_WORD0_FH_EXPIRE_TYPE |
+ FATTR4_WORD0_LINK_SUPPORT |
+ FATTR4_WORD0_SYMLINK_SUPPORT |
+ FATTR4_WORD0_ACLSUPPORT |
+ FATTR4_WORD0_CASE_INSENSITIVE |
+ FATTR4_WORD0_CASE_PRESERVING) &
+ res.attr_bitmask[0];
/* Sanity check the server answers */
switch (minorversion) {
case 0:
@@ -3864,13 +4008,20 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
break;
case 1:
res.attr_bitmask[2] &= FATTR4_WORD2_NFS41_MASK;
+ bitmask[2] = FATTR4_WORD2_SUPPATTR_EXCLCREAT &
+ res.attr_bitmask[2];
break;
case 2:
res.attr_bitmask[2] &= FATTR4_WORD2_NFS42_MASK;
+ bitmask[2] = (FATTR4_WORD2_SUPPATTR_EXCLCREAT |
+ FATTR4_WORD2_OPEN_ARGUMENTS) &
+ res.attr_bitmask[2];
}
memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask));
- server->caps &= ~(NFS_CAP_ACLS | NFS_CAP_HARDLINKS |
- NFS_CAP_SYMLINKS| NFS_CAP_SECURITY_LABEL);
+ server->caps &=
+ ~(NFS_CAP_ACLS | NFS_CAP_HARDLINKS | NFS_CAP_SYMLINKS |
+ NFS_CAP_SECURITY_LABEL | NFS_CAP_FS_LOCATIONS |
+ NFS_CAP_OPEN_XOR | NFS_CAP_DELEGTIME);
server->fattr_valid = NFS_ATTR_FATTR_V4;
if (res.attr_bitmask[0] & FATTR4_WORD0_ACL &&
res.acl_bitmask & ACL4_SUPPORT_ALLOW_ACL)
@@ -3909,10 +4060,20 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
server->fattr_valid &= ~NFS_ATTR_FATTR_CTIME;
if (!(res.attr_bitmask[1] & FATTR4_WORD1_TIME_MODIFY))
server->fattr_valid &= ~NFS_ATTR_FATTR_MTIME;
+ if (!(res.attr_bitmask[1] & FATTR4_WORD1_TIME_MODIFY))
+ server->fattr_valid &= ~NFS_ATTR_FATTR_MTIME;
+ if (!(res.attr_bitmask[1] & FATTR4_WORD1_TIME_CREATE))
+ server->fattr_valid &= ~NFS_ATTR_FATTR_BTIME;
memcpy(server->attr_bitmask_nl, res.attr_bitmask,
sizeof(server->attr_bitmask));
server->attr_bitmask_nl[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
+ if (res.open_caps.oa_share_access_want[0] &
+ NFS4_SHARE_WANT_OPEN_XOR_DELEGATION)
+ server->caps |= NFS_CAP_OPEN_XOR;
+ if (nfs4_server_delegtime_capable(&res))
+ server->caps |= NFS_CAP_DELEGTIME;
+
memcpy(server->cache_consistency_bitmask, res.attr_bitmask, sizeof(server->cache_consistency_bitmask));
server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE;
server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY;
@@ -3938,7 +4099,6 @@ int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
};
int err;
- nfs4_server_set_init_caps(server);
do {
err = nfs4_handle_exception(server,
_nfs4_server_capabilities(server, fhandle),
@@ -3997,6 +4157,23 @@ static void test_fs_location_for_trunking(struct nfs4_fs_location *location,
}
}
+static bool _is_same_nfs4_pathname(struct nfs4_pathname *path1,
+ struct nfs4_pathname *path2)
+{
+ int i;
+
+ if (path1->ncomponents != path2->ncomponents)
+ return false;
+ for (i = 0; i < path1->ncomponents; i++) {
+ if (path1->components[i].len != path2->components[i].len)
+ return false;
+ if (memcmp(path1->components[i].data, path2->components[i].data,
+ path1->components[i].len))
+ return false;
+ }
+ return true;
+}
+
static int _nfs4_discover_trunking(struct nfs_server *server,
struct nfs_fh *fhandle)
{
@@ -4030,9 +4207,13 @@ static int _nfs4_discover_trunking(struct nfs_server *server,
if (status)
goto out_free_3;
- for (i = 0; i < locations->nlocations; i++)
+ for (i = 0; i < locations->nlocations; i++) {
+ if (!_is_same_nfs4_pathname(&locations->fs_path,
+ &locations->locations[i].rootpath))
+ continue;
test_fs_location_for_trunking(&locations->locations[i], clp,
server);
+ }
out_free_3:
kfree(locations->fattr);
out_free_2:
@@ -4065,15 +4246,18 @@ out:
}
static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
- struct nfs_fsinfo *info)
+ struct nfs_fattr *fattr)
{
- u32 bitmask[3];
+ u32 bitmask[3] = {
+ [0] = FATTR4_WORD0_TYPE | FATTR4_WORD0_CHANGE |
+ FATTR4_WORD0_SIZE | FATTR4_WORD0_FSID,
+ };
struct nfs4_lookup_root_arg args = {
.bitmask = bitmask,
};
struct nfs4_lookup_res res = {
.server = server,
- .fattr = info->fattr,
+ .fattr = fattr,
.fh = fhandle,
};
struct rpc_message msg = {
@@ -4082,27 +4266,20 @@ static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
.rpc_resp = &res,
};
- bitmask[0] = nfs4_fattr_bitmap[0];
- bitmask[1] = nfs4_fattr_bitmap[1];
- /*
- * Process the label in the upcoming getfattr
- */
- bitmask[2] = nfs4_fattr_bitmap[2] & ~FATTR4_WORD2_SECURITY_LABEL;
-
- nfs_fattr_init(info->fattr);
+ nfs_fattr_init(fattr);
return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
}
static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
- struct nfs_fsinfo *info)
+ struct nfs_fattr *fattr)
{
struct nfs4_exception exception = {
.interruptible = true,
};
int err;
do {
- err = _nfs4_lookup_root(server, fhandle, info);
- trace_nfs4_lookup_root(server, fhandle, info->fattr, err);
+ err = _nfs4_lookup_root(server, fhandle, fattr);
+ trace_nfs4_lookup_root(server, fhandle, fattr, err);
switch (err) {
case 0:
case -NFS4ERR_WRONGSEC:
@@ -4115,8 +4292,9 @@ out:
return err;
}
-static int nfs4_lookup_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
- struct nfs_fsinfo *info, rpc_authflavor_t flavor)
+static int nfs4_lookup_root_sec(struct nfs_server *server,
+ struct nfs_fh *fhandle, struct nfs_fattr *fattr,
+ rpc_authflavor_t flavor)
{
struct rpc_auth_create_args auth_args = {
.pseudoflavor = flavor,
@@ -4126,7 +4304,7 @@ static int nfs4_lookup_root_sec(struct nfs_server *server, struct nfs_fh *fhandl
auth = rpcauth_create(&auth_args, server->client);
if (IS_ERR(auth))
return -EACCES;
- return nfs4_lookup_root(server, fhandle, info);
+ return nfs4_lookup_root(server, fhandle, fattr);
}
/*
@@ -4139,7 +4317,7 @@ static int nfs4_lookup_root_sec(struct nfs_server *server, struct nfs_fh *fhandl
* negative errno value.
*/
static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
- struct nfs_fsinfo *info)
+ struct nfs_fattr *fattr)
{
/* Per 3530bis 15.33.5 */
static const rpc_authflavor_t flav_array[] = {
@@ -4155,8 +4333,9 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
if (server->auth_info.flavor_len > 0) {
/* try each flavor specified by user */
for (i = 0; i < server->auth_info.flavor_len; i++) {
- status = nfs4_lookup_root_sec(server, fhandle, info,
- server->auth_info.flavors[i]);
+ status = nfs4_lookup_root_sec(
+ server, fhandle, fattr,
+ server->auth_info.flavors[i]);
if (status == -NFS4ERR_WRONGSEC || status == -EACCES)
continue;
break;
@@ -4164,7 +4343,7 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
} else {
/* no flavors specified by user, try default list */
for (i = 0; i < ARRAY_SIZE(flav_array); i++) {
- status = nfs4_lookup_root_sec(server, fhandle, info,
+ status = nfs4_lookup_root_sec(server, fhandle, fattr,
flav_array[i]);
if (status == -NFS4ERR_WRONGSEC || status == -EACCES)
continue;
@@ -4188,28 +4367,22 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
* nfs4_proc_get_rootfh - get file handle for server's pseudoroot
* @server: initialized nfs_server handle
* @fhandle: we fill in the pseudo-fs root file handle
- * @info: we fill in an FSINFO struct
+ * @fattr: we fill in a bare bones struct fattr
* @auth_probe: probe the auth flavours
*
* Returns zero on success, or a negative errno.
*/
int nfs4_proc_get_rootfh(struct nfs_server *server, struct nfs_fh *fhandle,
- struct nfs_fsinfo *info,
- bool auth_probe)
+ struct nfs_fattr *fattr, bool auth_probe)
{
int status = 0;
if (!auth_probe)
- status = nfs4_lookup_root(server, fhandle, info);
+ status = nfs4_lookup_root(server, fhandle, fattr);
if (auth_probe || status == NFS4ERR_WRONGSEC)
- status = server->nfs_client->cl_mvops->find_root_sec(server,
- fhandle, info);
-
- if (status == 0)
- status = nfs4_server_capabilities(server, fhandle);
- if (status == 0)
- status = nfs4_do_fsinfo(server, fhandle, info);
+ status = server->nfs_client->cl_mvops->find_root_sec(
+ server, fhandle, fattr);
return nfs4_map_errors(status);
}
@@ -4398,15 +4571,15 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
}
static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir,
- struct dentry *dentry, struct nfs_fh *fhandle,
- struct nfs_fattr *fattr)
+ struct dentry *dentry, const struct qstr *name,
+ struct nfs_fh *fhandle, struct nfs_fattr *fattr)
{
struct nfs_server *server = NFS_SERVER(dir);
int status;
struct nfs4_lookup_arg args = {
.bitmask = server->attr_bitmask,
.dir_fh = NFS_FH(dir),
- .name = &dentry->d_name,
+ .name = name,
};
struct nfs4_lookup_res res = {
.server = server,
@@ -4448,17 +4621,16 @@ static void nfs_fixup_secinfo_attributes(struct nfs_fattr *fattr)
}
static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir,
- struct dentry *dentry, struct nfs_fh *fhandle,
- struct nfs_fattr *fattr)
+ struct dentry *dentry, const struct qstr *name,
+ struct nfs_fh *fhandle, struct nfs_fattr *fattr)
{
struct nfs4_exception exception = {
.interruptible = true,
};
struct rpc_clnt *client = *clnt;
- const struct qstr *name = &dentry->d_name;
int err;
do {
- err = _nfs4_proc_lookup(client, dir, dentry, fhandle, fattr);
+ err = _nfs4_proc_lookup(client, dir, dentry, name, fhandle, fattr);
trace_nfs4_lookup(dir, name, err);
switch (err) {
case -NFS4ERR_BADNAME:
@@ -4493,13 +4665,13 @@ out:
return err;
}
-static int nfs4_proc_lookup(struct inode *dir, struct dentry *dentry,
+static int nfs4_proc_lookup(struct inode *dir, struct dentry *dentry, const struct qstr *name,
struct nfs_fh *fhandle, struct nfs_fattr *fattr)
{
int status;
struct rpc_clnt *client = NFS_CLIENT(dir);
- status = nfs4_proc_lookup_common(&client, dir, dentry, fhandle, fattr);
+ status = nfs4_proc_lookup_common(&client, dir, dentry, name, fhandle, fattr);
if (client != NFS_CLIENT(dir)) {
rpc_shutdown_client(client);
nfs_fixup_secinfo_attributes(fattr);
@@ -4514,7 +4686,8 @@ nfs4_proc_lookup_mountpoint(struct inode *dir, struct dentry *dentry,
struct rpc_clnt *client = NFS_CLIENT(dir);
int status;
- status = nfs4_proc_lookup_common(&client, dir, dentry, fhandle, fattr);
+ status = nfs4_proc_lookup_common(&client, dir, dentry, &dentry->d_name,
+ fhandle, fattr);
if (status < 0)
return ERR_PTR(status);
return (client == NFS_CLIENT(dir)) ? rpc_clone_client(client) : client;
@@ -4542,16 +4715,19 @@ static int _nfs4_proc_lookupp(struct inode *inode,
};
unsigned short task_flags = 0;
- if (NFS_SERVER(inode)->flags & NFS_MOUNT_SOFTREVAL)
+ if (server->flags & NFS_MOUNT_SOFTREVAL)
task_flags |= RPC_TASK_TIMEOUT;
+ if (server->caps & NFS_CAP_MOVEABLE)
+ task_flags |= RPC_TASK_MOVEABLE;
args.bitmask = nfs4_bitmask(server, fattr->label);
nfs_fattr_init(fattr);
+ nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 0);
dprintk("NFS call lookupp ino=0x%lx\n", inode->i_ino);
- status = nfs4_call_sync(clnt, server, &msg, &args.seq_args,
- &res.seq_res, task_flags);
+ status = nfs4_do_call_sync(clnt, server, &msg, &args.seq_args,
+ &res.seq_res, task_flags);
dprintk("NFS reply lookupp: %d\n", status);
return status;
}
@@ -4591,7 +4767,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry
};
int status = 0;
- if (!nfs4_have_delegation(inode, FMODE_READ)) {
+ if (!nfs4_have_delegation(inode, FMODE_READ, 0)) {
res.fattr = nfs_alloc_fattr();
if (res.fattr == NULL)
return -ENOMEM;
@@ -4909,8 +5085,9 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, const struct
goto out;
nfs4_inode_make_writeable(inode);
- nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, res.fattr->label), inode,
- NFS_INO_INVALID_CHANGE);
+ nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, res.fattr->label),
+ inode,
+ NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME);
status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
if (!status) {
nfs4_update_changeattr(dir, &res.cinfo, res.fattr->time_start,
@@ -4988,9 +5165,6 @@ static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_
&data->arg.seq_args, &data->res.seq_res, 1);
if (status == 0) {
spin_lock(&dir->i_lock);
- /* Creating a directory bumps nlink in the parent */
- if (data->arg.ftype == NF4DIR)
- nfs4_inc_nlink_locked(dir);
nfs4_update_changeattr_locked(dir, &data->res.dir_cinfo,
data->res.fattr->time_start,
NFS_INO_INVALID_DATA);
@@ -5000,6 +5174,31 @@ static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_
return status;
}
+static struct dentry *nfs4_do_mkdir(struct inode *dir, struct dentry *dentry,
+ struct nfs4_createdata *data, int *statusp)
+{
+ struct dentry *ret;
+
+ *statusp = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &data->msg,
+ &data->arg.seq_args, &data->res.seq_res, 1);
+
+ if (*statusp)
+ return NULL;
+
+ spin_lock(&dir->i_lock);
+ /* Creating a directory bumps nlink in the parent */
+ nfs4_inc_nlink_locked(dir);
+ nfs4_update_changeattr_locked(dir, &data->res.dir_cinfo,
+ data->res.fattr->time_start,
+ NFS_INO_INVALID_DATA);
+ spin_unlock(&dir->i_lock);
+ ret = nfs_add_or_obtain(dentry, data->res.fh, data->res.fattr);
+ if (!IS_ERR(ret))
+ return ret;
+ *statusp = PTR_ERR(ret);
+ return NULL;
+}
+
static void nfs4_free_createdata(struct nfs4_createdata *data)
{
nfs4_label_free(data->fattr.label);
@@ -5007,9 +5206,10 @@ static void nfs4_free_createdata(struct nfs4_createdata *data)
}
static int _nfs4_proc_symlink(struct inode *dir, struct dentry *dentry,
- struct page *page, unsigned int len, struct iattr *sattr,
+ struct folio *folio, unsigned int len, struct iattr *sattr,
struct nfs4_label *label)
{
+ struct page *page = &folio->page;
struct nfs4_createdata *data;
int status = -ENAMETOOLONG;
@@ -5034,7 +5234,7 @@ out:
}
static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry,
- struct page *page, unsigned int len, struct iattr *sattr)
+ struct folio *folio, unsigned int len, struct iattr *sattr)
{
struct nfs4_exception exception = {
.interruptible = true,
@@ -5045,7 +5245,7 @@ static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry,
label = nfs4_label_init_security(dir, dentry, sattr, &l);
do {
- err = _nfs4_proc_symlink(dir, dentry, page, len, sattr, label);
+ err = _nfs4_proc_symlink(dir, dentry, folio, len, sattr, label);
trace_nfs4_symlink(dir, &dentry->d_name, err);
err = nfs4_handle_exception(NFS_SERVER(dir), err,
&exception);
@@ -5055,32 +5255,35 @@ static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry,
return err;
}
-static int _nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
- struct iattr *sattr, struct nfs4_label *label)
+static struct dentry *_nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
+ struct iattr *sattr,
+ struct nfs4_label *label, int *statusp)
{
struct nfs4_createdata *data;
- int status = -ENOMEM;
+ struct dentry *ret = NULL;
+ *statusp = -ENOMEM;
data = nfs4_alloc_createdata(dir, &dentry->d_name, sattr, NF4DIR);
if (data == NULL)
goto out;
data->arg.label = label;
- status = nfs4_do_create(dir, dentry, data);
+ ret = nfs4_do_mkdir(dir, dentry, data, statusp);
nfs4_free_createdata(data);
out:
- return status;
+ return ret;
}
-static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
- struct iattr *sattr)
+static struct dentry *nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
+ struct iattr *sattr)
{
struct nfs_server *server = NFS_SERVER(dir);
struct nfs4_exception exception = {
.interruptible = true,
};
struct nfs4_label l, *label;
+ struct dentry *alias;
int err;
label = nfs4_label_init_security(dir, dentry, sattr, &l);
@@ -5088,14 +5291,16 @@ static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
if (!(server->attr_bitmask[2] & FATTR4_WORD2_MODE_UMASK))
sattr->ia_mode &= ~current_umask();
do {
- err = _nfs4_proc_mkdir(dir, dentry, sattr, label);
+ alias = _nfs4_proc_mkdir(dir, dentry, sattr, label, &err);
trace_nfs4_mkdir(dir, &dentry->d_name, err);
- err = nfs4_handle_exception(NFS_SERVER(dir), err,
- &exception);
+ if (err)
+ alias = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir),
+ err,
+ &exception));
} while (exception.retry);
nfs4_label_release_security(label);
- return err;
+ return alias;
}
static int _nfs4_proc_readdir(struct nfs_readdir_arg *nr_arg,
@@ -5394,9 +5599,11 @@ static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_header *hdr)
.inode = hdr->inode,
.state = hdr->args.context->state,
.stateid = &hdr->args.stateid,
+ .retrans = hdr->retrans,
};
task->tk_status = nfs4_async_handle_exception(task,
server, task->tk_status, &exception);
+ hdr->retrans = exception.retrans;
if (exception.retry) {
rpc_restart_call_prepare(task);
return -EAGAIN;
@@ -5429,7 +5636,7 @@ static bool nfs4_read_plus_not_supported(struct rpc_task *task,
struct rpc_message *msg = &task->tk_msg;
if (msg->rpc_proc == &nfs4_procedures[NFSPROC4_CLNT_READ_PLUS] &&
- server->caps & NFS_CAP_READ_PLUS && task->tk_status == -ENOTSUPP) {
+ task->tk_status == -ENOTSUPP) {
server->caps &= ~NFS_CAP_READ_PLUS;
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
rpc_restart_call_prepare(task);
@@ -5453,17 +5660,21 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
}
#if defined CONFIG_NFS_V4_2 && defined CONFIG_NFS_V4_2_READ_PLUS
-static void nfs42_read_plus_support(struct nfs_pgio_header *hdr,
+static bool nfs42_read_plus_support(struct nfs_pgio_header *hdr,
struct rpc_message *msg)
{
/* Note: We don't use READ_PLUS with pNFS yet */
- if (nfs_server_capable(hdr->inode, NFS_CAP_READ_PLUS) && !hdr->ds_clp)
+ if (nfs_server_capable(hdr->inode, NFS_CAP_READ_PLUS) && !hdr->ds_clp) {
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ_PLUS];
+ return nfs_read_alloc_scratch(hdr, READ_PLUS_SCRATCH_SIZE);
+ }
+ return false;
}
#else
-static void nfs42_read_plus_support(struct nfs_pgio_header *hdr,
+static bool nfs42_read_plus_support(struct nfs_pgio_header *hdr,
struct rpc_message *msg)
{
+ return false;
}
#endif /* CONFIG_NFS_V4_2 */
@@ -5473,8 +5684,8 @@ static void nfs4_proc_read_setup(struct nfs_pgio_header *hdr,
hdr->timestamp = jiffies;
if (!hdr->pgio_done_cb)
hdr->pgio_done_cb = nfs4_read_done_cb;
- msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
- nfs42_read_plus_support(hdr, msg);
+ if (!nfs42_read_plus_support(hdr, msg))
+ msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0, 0);
}
@@ -5506,10 +5717,12 @@ static int nfs4_write_done_cb(struct rpc_task *task,
.inode = hdr->inode,
.state = hdr->args.context->state,
.stateid = &hdr->args.stateid,
+ .retrans = hdr->retrans,
};
task->tk_status = nfs4_async_handle_exception(task,
NFS_SERVER(inode), task->tk_status,
&exception);
+ hdr->retrans = exception.retrans;
if (exception.retry) {
rpc_restart_call_prepare(task);
return -EAGAIN;
@@ -5555,7 +5768,7 @@ bool nfs4_write_need_cache_consistency_data(struct nfs_pgio_header *hdr)
/* Otherwise, request attributes if and only if we don't hold
* a delegation
*/
- return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0;
+ return nfs4_have_delegation(hdr->inode, FMODE_READ, 0) == 0;
}
void nfs4_bitmask_set(__u32 bitmask[], const __u32 src[],
@@ -5583,6 +5796,8 @@ void nfs4_bitmask_set(__u32 bitmask[], const __u32 src[],
bitmask[1] |= FATTR4_WORD1_TIME_MODIFY;
if (cache_validity & NFS_INO_INVALID_BLOCKS)
bitmask[1] |= FATTR4_WORD1_SPACE_USED;
+ if (cache_validity & NFS_INO_INVALID_BTIME)
+ bitmask[1] |= FATTR4_WORD1_TIME_CREATE;
if (cache_validity & NFS_INO_INVALID_SIZE)
bitmask[0] |= FATTR4_WORD0_SIZE;
@@ -5614,7 +5829,7 @@ static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr,
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE];
nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0, 0);
- nfs4_state_protect_write(server->nfs_client, clnt, msg, hdr);
+ nfs4_state_protect_write(hdr->ds_clp ? hdr->ds_clp : server->nfs_client, clnt, msg, hdr);
}
static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
@@ -5655,7 +5870,8 @@ static void nfs4_proc_commit_setup(struct nfs_commit_data *data, struct rpc_mess
data->res.server = server;
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT];
nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1, 0);
- nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_COMMIT, clnt, msg);
+ nfs4_state_protect(data->ds_clp ? data->ds_clp : server->nfs_client,
+ NFS_SP4_MACH_CRED_COMMIT, clnt, msg);
}
static int _nfs4_proc_commit(struct file *dst, struct nfs_commitargs *args,
@@ -5956,7 +6172,7 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf,
}
/* for decoding across pages */
- res.acl_scratch = alloc_page(GFP_KERNEL);
+ res.acl_scratch = folio_alloc(GFP_KERNEL, 0);
if (!res.acl_scratch)
goto out_free;
@@ -5989,11 +6205,10 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf,
out_ok:
ret = res.acl_len;
out_free:
- for (i = 0; i < npages; i++)
- if (pages[i])
- __free_page(pages[i]);
+ while (--i >= 0)
+ __free_page(pages[i]);
if (res.acl_scratch)
- __free_page(res.acl_scratch);
+ folio_put(res.acl_scratch);
kfree(pages);
return ret;
}
@@ -6021,6 +6236,8 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen,
struct nfs_server *server = NFS_SERVER(inode);
int ret;
+ if (unlikely(NFS_FH(inode)->size == 0))
+ return -ENODATA;
if (!nfs4_server_supports_acls(server, type))
return -EOPNOTSUPP;
ret = nfs_revalidate_inode(inode, NFS_INO_INVALID_CHANGE);
@@ -6095,6 +6312,9 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf,
{
struct nfs4_exception exception = { };
int err;
+
+ if (unlikely(NFS_FH(inode)->size == 0))
+ return -ENODATA;
do {
err = __nfs4_proc_set_acl(inode, buf, buflen, type);
trace_nfs4_set_acl(inode, err);
@@ -6117,7 +6337,7 @@ static int _nfs4_get_security_label(struct inode *inode, void *buf,
size_t buflen)
{
struct nfs_server *server = NFS_SERVER(inode);
- struct nfs4_label label = {0, 0, buflen, buf};
+ struct nfs4_label label = {0, 0, 0, buflen, buf};
u32 bitmask[3] = { 0, 0, FATTR4_WORD2_SECURITY_LABEL };
struct nfs_fattr fattr = {
@@ -6222,7 +6442,7 @@ static int nfs4_do_set_security_label(struct inode *inode,
static int
nfs4_set_security_label(struct inode *inode, const void *buf, size_t buflen)
{
- struct nfs4_label ilabel = {0, 0, buflen, (char *)buf };
+ struct nfs4_label ilabel = {0, 0, 0, buflen, (char *)buf };
struct nfs_fattr *fattr;
int status;
@@ -6237,6 +6457,7 @@ nfs4_set_security_label(struct inode *inode, const void *buf, size_t buflen)
if (status == 0)
nfs_setsecurity(inode, fattr);
+ nfs_free_fattr(fattr);
return status;
}
#endif /* CONFIG_NFS_V4_SECURITY_LABEL */
@@ -6515,6 +6736,7 @@ struct nfs4_delegreturndata {
struct nfs_fh fh;
nfs4_stateid stateid;
unsigned long timestamp;
+ unsigned short retrans;
struct {
struct nfs4_layoutreturn_args arg;
struct nfs4_layoutreturn_res res;
@@ -6522,6 +6744,7 @@ struct nfs4_delegreturndata {
u32 roc_barrier;
bool roc;
} lr;
+ struct nfs4_delegattr sattr;
struct nfs_fattr fattr;
int rpc_status;
struct inode *inode;
@@ -6534,6 +6757,7 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
.inode = data->inode,
.stateid = &data->stateid,
.task_is_privileged = data->args.seq_args.sa_privileged,
+ .retrans = data->retrans,
};
if (!nfs4_sequence_done(task, &data->res.seq_res))
@@ -6546,6 +6770,30 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
&data->res.lr_ret) == -EAGAIN)
goto out_restart;
+ if (data->args.sattr_args && task->tk_status != 0) {
+ switch(data->res.sattr_ret) {
+ case 0:
+ data->args.sattr_args = NULL;
+ data->res.sattr_res = false;
+ break;
+ case -NFS4ERR_ADMIN_REVOKED:
+ case -NFS4ERR_DELEG_REVOKED:
+ case -NFS4ERR_EXPIRED:
+ case -NFS4ERR_BAD_STATEID:
+ /* Let the main handler below do stateid recovery */
+ break;
+ case -NFS4ERR_OLD_STATEID:
+ if (nfs4_refresh_delegation_stateid(&data->stateid,
+ data->inode))
+ goto out_restart;
+ fallthrough;
+ default:
+ data->args.sattr_args = NULL;
+ data->res.sattr_res = false;
+ goto out_restart;
+ }
+ }
+
switch (task->tk_status) {
case 0:
renew_lease(data->res.server, data->timestamp);
@@ -6581,6 +6829,7 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
task->tk_status = nfs4_async_handle_exception(task,
data->res.server, task->tk_status,
&exception);
+ data->retrans = exception.retrans;
if (exception.retry)
goto out_restart;
}
@@ -6639,7 +6888,10 @@ static const struct rpc_call_ops nfs4_delegreturn_ops = {
.rpc_release = nfs4_delegreturn_release,
};
-static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, const nfs4_stateid *stateid, int issync)
+static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred,
+ const nfs4_stateid *stateid,
+ struct nfs_delegation *delegation,
+ int issync)
{
struct nfs4_delegreturndata *data;
struct nfs_server *server = NFS_SERVER(inode);
@@ -6691,12 +6943,27 @@ static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred,
}
}
+ if (delegation &&
+ test_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags)) {
+ if (delegation->type & FMODE_READ) {
+ data->sattr.atime = inode_get_atime(inode);
+ data->sattr.atime_set = true;
+ }
+ if (delegation->type & FMODE_WRITE) {
+ data->sattr.mtime = inode_get_mtime(inode);
+ data->sattr.mtime_set = true;
+ }
+ data->args.sattr_args = &data->sattr;
+ data->res.sattr_res = true;
+ }
+
if (!data->inode)
nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1,
1);
else
nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1,
0);
+
task_setup_data.callback_data = data;
msg.rpc_argp = &data->args;
msg.rpc_resp = &data->res;
@@ -6714,13 +6981,16 @@ out:
return status;
}
-int nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, const nfs4_stateid *stateid, int issync)
+int nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred,
+ const nfs4_stateid *stateid,
+ struct nfs_delegation *delegation, int issync)
{
struct nfs_server *server = NFS_SERVER(inode);
struct nfs4_exception exception = { };
int err;
do {
- err = _nfs4_proc_delegreturn(inode, cred, stateid, issync);
+ err = _nfs4_proc_delegreturn(inode, cred, stateid,
+ delegation, issync);
trace_nfs4_delegreturn(inode, stateid, err);
switch (err) {
case -NFS4ERR_STALE_STATEID:
@@ -6764,7 +7034,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
switch (status) {
case 0:
- request->fl_type = F_UNLCK;
+ request->c.flc_type = F_UNLCK;
break;
case -NFS4ERR_DENIED:
status = 0;
@@ -6836,6 +7106,7 @@ struct nfs4_unlockdata {
struct file_lock fl;
struct nfs_server *server;
unsigned long timestamp;
+ unsigned short retrans;
};
static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl,
@@ -6846,10 +7117,18 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl,
struct nfs4_unlockdata *p;
struct nfs4_state *state = lsp->ls_state;
struct inode *inode = state->inode;
+ struct nfs_lock_context *l_ctx;
p = kzalloc(sizeof(*p), GFP_KERNEL);
if (p == NULL)
return NULL;
+ l_ctx = nfs_get_lock_context(ctx);
+ if (!IS_ERR(l_ctx)) {
+ p->l_ctx = l_ctx;
+ } else {
+ kfree(p);
+ return NULL;
+ }
p->arg.fh = NFS_FH(inode);
p->arg.fl = &p->fl;
p->arg.seqid = seqid;
@@ -6857,7 +7136,6 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl,
p->lsp = lsp;
/* Ensure we don't close file until we're done freeing locks! */
p->ctx = get_nfs_open_context(ctx);
- p->l_ctx = nfs_get_lock_context(ctx);
locks_init_lock(&p->fl);
locks_copy_lock(&p->fl, fl);
p->server = NFS_SERVER(inode);
@@ -6883,6 +7161,7 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
struct nfs4_exception exception = {
.inode = calldata->lsp->ls_state->inode,
.stateid = &calldata->arg.stateid,
+ .retrans = calldata->retrans,
};
if (!nfs4_sequence_done(task, &calldata->res.seq_res))
@@ -6916,6 +7195,7 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
task->tk_status = nfs4_async_handle_exception(task,
calldata->server, task->tk_status,
&exception);
+ calldata->retrans = exception.retrans;
if (exception.retry)
rpc_restart_call_prepare(task);
}
@@ -6982,8 +7262,8 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
/* Ensure this is an unlock - when canceling a lock, the
* canceled lock is passed in, and it won't be an unlock.
*/
- fl->fl_type = F_UNLCK;
- if (fl->fl_flags & FL_CLOSE)
+ fl->c.flc_type = F_UNLCK;
+ if (fl->c.flc_flags & FL_CLOSE)
set_bit(NFS_CONTEXT_UNLOCK, &ctx->flags);
data = nfs4_alloc_unlockdata(fl, ctx, lsp, seqid);
@@ -7009,11 +7289,11 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
struct rpc_task *task;
struct nfs_seqid *(*alloc_seqid)(struct nfs_seqid_counter *, gfp_t);
int status = 0;
- unsigned char fl_flags = request->fl_flags;
+ unsigned char saved_flags = request->c.flc_flags;
status = nfs4_set_lock_state(state, request);
/* Unlock _before_ we do the RPC call */
- request->fl_flags |= FL_EXISTS;
+ request->c.flc_flags |= FL_EXISTS;
/* Exclude nfs_delegation_claim_locks() */
mutex_lock(&sp->so_delegreturn_mutex);
/* Exclude nfs4_reclaim_open_stateid() - note nesting! */
@@ -7037,14 +7317,16 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
status = -ENOMEM;
if (IS_ERR(seqid))
goto out;
- task = nfs4_do_unlck(request, nfs_file_open_context(request->fl_file), lsp, seqid);
+ task = nfs4_do_unlck(request,
+ nfs_file_open_context(request->c.flc_file),
+ lsp, seqid);
status = PTR_ERR(task);
if (IS_ERR(task))
goto out;
status = rpc_wait_for_completion_task(task);
rpc_put_task(task);
out:
- request->fl_flags = fl_flags;
+ request->c.flc_flags = saved_flags;
trace_nfs4_unlock(request, state, F_SETLK, status);
return status;
}
@@ -7145,7 +7427,6 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
{
struct nfs4_lockdata *data = calldata;
struct nfs4_lock_state *lsp = data->lsp;
- struct nfs_server *server = NFS_SERVER(d_inode(data->ctx->dentry));
if (!nfs4_sequence_done(task, &data->res.seq_res))
return;
@@ -7153,9 +7434,10 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
data->rpc_status = task->tk_status;
switch (task->tk_status) {
case 0:
- renew_lease(server, data->timestamp);
+ renew_lease(NFS_SERVER(d_inode(data->ctx->dentry)),
+ data->timestamp);
if (data->arg.new_lock && !data->cancelled) {
- data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS);
+ data->fl.c.flc_flags &= ~(FL_SLEEP | FL_ACCESS);
if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0)
goto out_restart;
}
@@ -7166,16 +7448,21 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
} else if (!nfs4_update_lock_stateid(lsp, &data->res.stateid))
goto out_restart;
break;
- case -NFS4ERR_BAD_STATEID:
case -NFS4ERR_OLD_STATEID:
+ if (data->arg.new_lock_owner != 0 &&
+ nfs4_refresh_open_old_stateid(&data->arg.open_stateid,
+ lsp->ls_state))
+ goto out_restart;
+ if (nfs4_refresh_lock_old_stateid(&data->arg.lock_stateid, lsp))
+ goto out_restart;
+ fallthrough;
+ case -NFS4ERR_BAD_STATEID:
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_EXPIRED:
if (data->arg.new_lock_owner != 0) {
if (!nfs4_stateid_match(&data->arg.open_stateid,
&lsp->ls_state->open_stateid))
goto out_restart;
- else if (nfs4_async_handle_error(task, server, lsp->ls_state, NULL) == -EAGAIN)
- goto out_restart;
} else if (!nfs4_stateid_match(&data->arg.lock_stateid,
&lsp->ls_stateid))
goto out_restart;
@@ -7251,7 +7538,8 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
if (nfs_server_capable(state->inode, NFS_CAP_MOVEABLE))
task_setup_data.flags |= RPC_TASK_MOVEABLE;
- data = nfs4_alloc_lockdata(fl, nfs_file_open_context(fl->fl_file),
+ data = nfs4_alloc_lockdata(fl,
+ nfs_file_open_context(fl->c.flc_file),
fl->fl_u.nfs4_fl.owner, GFP_KERNEL);
if (data == NULL)
return -ENOMEM;
@@ -7357,10 +7645,10 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
{
struct nfs_inode *nfsi = NFS_I(state->inode);
struct nfs4_state_owner *sp = state->owner;
- unsigned char fl_flags = request->fl_flags;
+ unsigned char flags = request->c.flc_flags;
int status;
- request->fl_flags |= FL_ACCESS;
+ request->c.flc_flags |= FL_ACCESS;
status = locks_lock_inode_wait(state->inode, request);
if (status < 0)
goto out;
@@ -7369,7 +7657,7 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
if (test_bit(NFS_DELEGATED_STATE, &state->flags)) {
/* Yes: cache locks! */
/* ...but avoid races with delegation recall... */
- request->fl_flags = fl_flags & ~FL_SLEEP;
+ request->c.flc_flags = flags & ~FL_SLEEP;
status = locks_lock_inode_wait(state->inode, request);
up_read(&nfsi->rwsem);
mutex_unlock(&sp->so_delegreturn_mutex);
@@ -7379,7 +7667,7 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
mutex_unlock(&sp->so_delegreturn_mutex);
status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW);
out:
- request->fl_flags = fl_flags;
+ request->c.flc_flags = flags;
return status;
}
@@ -7521,7 +7809,7 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request)
if (!(IS_SETLK(cmd) || IS_SETLKW(cmd)))
return -EINVAL;
- if (request->fl_type == F_UNLCK) {
+ if (lock_is_unlock(request)) {
if (state != NULL)
return nfs4_proc_unlck(state, cmd, request);
return 0;
@@ -7530,7 +7818,7 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request)
if (state == NULL)
return -ENOLCK;
- if ((request->fl_flags & FL_POSIX) &&
+ if ((request->c.flc_flags & FL_POSIX) &&
!test_bit(NFS_STATE_POSIX_LOCKS, &state->flags))
return -ENOLCK;
@@ -7538,7 +7826,7 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request)
* Don't rely on the VFS having checked the file open mode,
* since it won't do this for flock() locks.
*/
- switch (request->fl_type) {
+ switch (request->c.flc_type) {
case F_RDLCK:
if (!(filp->f_mode & FMODE_READ))
return -EBADF;
@@ -7560,7 +7848,7 @@ static int nfs4_delete_lease(struct file *file, void **priv)
return generic_setlease(file, F_UNLCK, NULL, priv);
}
-static int nfs4_add_lease(struct file *file, long arg, struct file_lock **lease,
+static int nfs4_add_lease(struct file *file, int arg, struct file_lease **lease,
void **priv)
{
struct inode *inode = file_inode(file);
@@ -7568,17 +7856,17 @@ static int nfs4_add_lease(struct file *file, long arg, struct file_lock **lease,
int ret;
/* No delegation, no lease */
- if (!nfs4_have_delegation(inode, type))
+ if (!nfs4_have_delegation(inode, type, 0))
return -EAGAIN;
ret = generic_setlease(file, arg, lease, priv);
- if (ret || nfs4_have_delegation(inode, type))
+ if (ret || nfs4_have_delegation(inode, type, 0))
return ret;
/* We raced with a delegation return */
nfs4_delete_lease(file, priv);
return -EAGAIN;
}
-int nfs4_proc_setlease(struct file *file, long arg, struct file_lock **lease,
+int nfs4_proc_setlease(struct file *file, int arg, struct file_lease **lease,
void **priv)
{
switch (arg) {
@@ -7602,10 +7890,10 @@ int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state,
return err;
do {
err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW);
- if (err != -NFS4ERR_DELAY)
+ if (err != -NFS4ERR_DELAY && err != -NFS4ERR_GRACE)
break;
ssleep(1);
- } while (err == -NFS4ERR_DELAY);
+ } while (err == -NFS4ERR_DELAY || err == -NFSERR_GRACE);
return nfs4_handle_delegation_recall_error(server, state, stateid, fl, err);
}
@@ -7692,7 +7980,7 @@ nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_state *lsp)
#define XATTR_NAME_NFSV4_ACL "system.nfs4_acl"
static int nfs4_xattr_set_nfs4_acl(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *unused, struct inode *inode,
const char *key, const void *buf,
size_t buflen, int flags)
@@ -7716,7 +8004,7 @@ static bool nfs4_xattr_list_nfs4_acl(struct dentry *dentry)
#define XATTR_NAME_NFSV4_DACL "system.nfs4_dacl"
static int nfs4_xattr_set_nfs4_dacl(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *unused, struct inode *inode,
const char *key, const void *buf,
size_t buflen, int flags)
@@ -7739,7 +8027,7 @@ static bool nfs4_xattr_list_nfs4_dacl(struct dentry *dentry)
#define XATTR_NAME_NFSV4_SACL "system.nfs4_sacl"
static int nfs4_xattr_set_nfs4_sacl(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *unused, struct inode *inode,
const char *key, const void *buf,
size_t buflen, int flags)
@@ -7764,7 +8052,7 @@ static bool nfs4_xattr_list_nfs4_sacl(struct dentry *dentry)
#ifdef CONFIG_NFS_V4_SECURITY_LABEL
static int nfs4_xattr_set_nfs4_label(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *unused, struct inode *inode,
const char *key, const void *buf,
size_t buflen, int flags)
@@ -7815,7 +8103,7 @@ nfs4_listxattr_nfs4_label(struct inode *inode, char *list, size_t list_len)
#ifdef CONFIG_NFS_V4_2
static int nfs4_xattr_set_nfs4_user(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *unused, struct inode *inode,
const char *key, const void *buf,
size_t buflen, int flags)
@@ -8779,6 +9067,8 @@ nfs4_run_exchange_id(struct nfs_client *clp, const struct cred *cred,
#ifdef CONFIG_NFS_V4_1_MIGRATION
calldata->args.flags |= EXCHGID4_FLAG_SUPP_MOVED_MIGR;
#endif
+ if (test_bit(NFS_CS_PNFS, &clp->cl_flags))
+ calldata->args.flags |= EXCHGID4_FLAG_USE_PNFS_DS;
msg.rpc_argp = &calldata->args;
msg.rpc_resp = &calldata->res;
task_setup_data.callback_data = calldata;
@@ -8920,23 +9210,30 @@ void nfs4_test_session_trunk(struct rpc_clnt *clnt, struct rpc_xprt *xprt,
sp4_how = (adata->clp->cl_sp4_flags == 0 ? SP4_NONE : SP4_MACH_CRED);
+try_again:
/* Test connection for session trunking. Async exchange_id call */
task = nfs4_run_exchange_id(adata->clp, adata->cred, sp4_how, xprt);
if (IS_ERR(task))
return;
status = task->tk_status;
- if (status == 0)
+ if (status == 0) {
status = nfs4_detect_session_trunking(adata->clp,
task->tk_msg.rpc_resp, xprt);
-
+ trace_nfs4_trunked_exchange_id(adata->clp,
+ xprt->address_strings[RPC_DISPLAY_ADDR], status);
+ }
if (status == 0)
rpc_clnt_xprt_switch_add_xprt(clnt, xprt);
- else if (rpc_clnt_xprt_switch_has_addr(clnt,
+ else if (status != -NFS4ERR_DELAY && rpc_clnt_xprt_switch_has_addr(clnt,
(struct sockaddr *)&xprt->addr))
rpc_clnt_xprt_switch_remove_xprt(clnt, xprt);
rpc_put_task(task);
+ if (status == -NFS4ERR_DELAY) {
+ ssleep(1);
+ goto try_again;
+ }
}
EXPORT_SYMBOL_GPL(nfs4_test_session_trunk);
@@ -9163,7 +9460,7 @@ static int nfs4_verify_back_channel_attrs(struct nfs41_create_session_args *args
goto out;
if (rcvd->max_rqst_sz > sent->max_rqst_sz)
return -EINVAL;
- if (rcvd->max_resp_sz < sent->max_resp_sz)
+ if (rcvd->max_resp_sz > sent->max_resp_sz)
return -EINVAL;
if (rcvd->max_resp_sz_cached > sent->max_resp_sz_cached)
return -EINVAL;
@@ -9357,7 +9654,7 @@ static void nfs41_sequence_call_done(struct rpc_task *task, void *data)
return;
trace_nfs4_sequence(clp, task->tk_status);
- if (task->tk_status < 0) {
+ if (task->tk_status < 0 && clp->cl_cons_state >= 0) {
dprintk("%s ERROR %d\n", __func__, task->tk_status);
if (refcount_read(&clp->cl_count) == 1)
return;
@@ -9607,6 +9904,9 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,
nfs4_sequence_free_slot(&lgp->res.seq_res);
+ exception->state = NULL;
+ exception->stateid = NULL;
+
switch (nfs4err) {
case 0:
goto out;
@@ -9642,6 +9942,7 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,
status = -EBUSY;
break;
case -NFS4ERR_RECALLCONFLICT:
+ case -NFS4ERR_RETURNCONFLICT:
status = -ERECALLCONFLICT;
break;
case -NFS4ERR_DELEG_REVOKED:
@@ -9702,7 +10003,8 @@ static const struct rpc_call_ops nfs4_layoutget_call_ops = {
};
struct pnfs_layout_segment *
-nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout)
+nfs4_proc_layoutget(struct nfs4_layoutget *lgp,
+ struct nfs4_exception *exception)
{
struct inode *inode = lgp->args.inode;
struct nfs_server *server = NFS_SERVER(inode);
@@ -9722,13 +10024,10 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout)
RPC_TASK_MOVEABLE,
};
struct pnfs_layout_segment *lseg = NULL;
- struct nfs4_exception exception = {
- .inode = inode,
- .timeout = *timeout,
- };
int status = 0;
nfs4_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0, 0);
+ exception->retry = 0;
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
@@ -9739,11 +10038,12 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout)
goto out;
if (task->tk_status < 0) {
- status = nfs4_layoutget_handle_exception(task, lgp, &exception);
- *timeout = exception.timeout;
+ exception->retry = 1;
+ status = nfs4_layoutget_handle_exception(task, lgp, exception);
} else if (lgp->res.layoutp->len == 0) {
+ exception->retry = 1;
status = -EAGAIN;
- *timeout = nfs4_update_delay(&exception.timeout);
+ nfs4_update_delay(&exception->timeout);
} else
lseg = pnfs_layout_process(lgp);
out:
@@ -9781,6 +10081,11 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
if (!nfs41_sequence_process(task, &lrp->res.seq_res))
return;
+ if (task->tk_rpc_status == -ETIMEDOUT) {
+ lrp->rpc_status = -EAGAIN;
+ lrp->res.lrs_present = 0;
+ return;
+ }
/*
* Was there an RPC level error? Assume the call succeeded,
* and that we need to release the layout
@@ -9800,13 +10105,25 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
fallthrough;
default:
task->tk_status = 0;
+ lrp->res.lrs_present = 0;
fallthrough;
case 0:
break;
+ case -NFS4ERR_BADSESSION:
+ case -NFS4ERR_DEADSESSION:
+ case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
+ nfs4_schedule_session_recovery(server->nfs_client->cl_session,
+ task->tk_status);
+ lrp->res.lrs_present = 0;
+ lrp->rpc_status = -EAGAIN;
+ task->tk_status = 0;
+ break;
case -NFS4ERR_DELAY:
- if (nfs4_async_handle_error(task, server, NULL, NULL) != -EAGAIN)
- break;
- goto out_restart;
+ if (nfs4_async_handle_error(task, server, NULL, NULL) ==
+ -EAGAIN)
+ goto out_restart;
+ lrp->res.lrs_present = 0;
+ break;
}
return;
out_restart:
@@ -9820,8 +10137,13 @@ static void nfs4_layoutreturn_release(void *calldata)
struct nfs4_layoutreturn *lrp = calldata;
struct pnfs_layout_hdr *lo = lrp->args.layout;
- pnfs_layoutreturn_free_lsegs(lo, &lrp->args.stateid, &lrp->args.range,
+ if (lrp->rpc_status == 0 || !lrp->inode)
+ pnfs_layoutreturn_free_lsegs(
+ lo, &lrp->args.stateid, &lrp->args.range,
lrp->res.lrs_present ? &lrp->res.stateid : NULL);
+ else
+ pnfs_layoutreturn_retry_later(lo, &lrp->args.stateid,
+ &lrp->args.range);
nfs4_sequence_free_slot(&lrp->res.seq_res);
if (lrp->ld_private.ops && lrp->ld_private.ops->free)
lrp->ld_private.ops->free(&lrp->ld_private);
@@ -9837,7 +10159,7 @@ static const struct rpc_call_ops nfs4_layoutreturn_call_ops = {
.rpc_release = nfs4_layoutreturn_release,
};
-int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync)
+int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, unsigned int flags)
{
struct rpc_task *task;
struct rpc_message msg = {
@@ -9860,7 +10182,7 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync)
&task_setup_data.rpc_client, &msg);
lrp->inode = nfs_igrab_and_active(lrp->args.inode);
- if (!sync) {
+ if (flags & PNFS_FL_LAYOUTRETURN_ASYNC) {
if (!lrp->inode) {
nfs4_layoutreturn_release(lrp);
return -EAGAIN;
@@ -9868,6 +10190,8 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync)
task_setup_data.flags |= RPC_TASK_ASYNC;
}
if (!lrp->inode)
+ flags |= PNFS_FL_LAYOUTRETURN_PRIVILEGED;
+ if (flags & PNFS_FL_LAYOUTRETURN_PRIVILEGED)
nfs4_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1,
1);
else
@@ -9876,7 +10200,7 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync)
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
- if (sync)
+ if (!(flags & PNFS_FL_LAYOUTRETURN_ASYNC))
status = task->tk_status;
trace_nfs4_layoutreturn(lrp->args.inode, &lrp->args.stateid, status);
dprintk("<-- %s status=%d\n", __func__, status);
@@ -10038,10 +10362,10 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync)
* Use the state managment nfs_client cl_rpcclient, which uses krb5i (if
* possible) as per RFC3530bis and RFC5661 Security Considerations sections
*/
-static int
-_nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
- struct nfs_fsinfo *info,
- struct nfs4_secinfo_flavors *flavors, bool use_integrity)
+static int _nfs41_proc_secinfo_no_name(struct nfs_server *server,
+ struct nfs_fh *fhandle,
+ struct nfs4_secinfo_flavors *flavors,
+ bool use_integrity)
{
struct nfs41_secinfo_no_name_args args = {
.style = SECINFO_STYLE_CURRENT_FH,
@@ -10085,9 +10409,9 @@ _nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
return status;
}
-static int
-nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
- struct nfs_fsinfo *info, struct nfs4_secinfo_flavors *flavors)
+static int nfs41_proc_secinfo_no_name(struct nfs_server *server,
+ struct nfs_fh *fhandle,
+ struct nfs4_secinfo_flavors *flavors)
{
struct nfs4_exception exception = {
.interruptible = true,
@@ -10099,7 +10423,7 @@ nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
/* try to use integrity protection with machine cred */
if (_nfs4_is_integrity_protected(server->nfs_client))
- err = _nfs41_proc_secinfo_no_name(server, fhandle, info,
+ err = _nfs41_proc_secinfo_no_name(server, fhandle,
flavors, true);
/*
@@ -10109,7 +10433,7 @@ nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
* the current filesystem's rpc_client and the user cred.
*/
if (err == -NFS4ERR_WRONGSEC)
- err = _nfs41_proc_secinfo_no_name(server, fhandle, info,
+ err = _nfs41_proc_secinfo_no_name(server, fhandle,
flavors, false);
switch (err) {
@@ -10125,9 +10449,8 @@ out:
return err;
}
-static int
-nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
- struct nfs_fsinfo *info)
+static int nfs41_find_root_sec(struct nfs_server *server,
+ struct nfs_fh *fhandle, struct nfs_fattr *fattr)
{
int err;
struct page *page;
@@ -10143,14 +10466,14 @@ nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
}
flavors = page_address(page);
- err = nfs41_proc_secinfo_no_name(server, fhandle, info, flavors);
+ err = nfs41_proc_secinfo_no_name(server, fhandle, flavors);
/*
* Fall back on "guess and check" method if
* the server doesn't support SECINFO_NO_NAME
*/
if (err == -NFS4ERR_WRONGSEC || err == -ENOTSUPP) {
- err = nfs4_find_root_sec(server, fhandle, info);
+ err = nfs4_find_root_sec(server, fhandle, fattr);
goto out_freepage;
}
if (err)
@@ -10175,8 +10498,8 @@ nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
flavor = RPC_AUTH_MAXFLAVOR;
if (flavor != RPC_AUTH_MAXFLAVOR) {
- err = nfs4_lookup_root_sec(server, fhandle,
- info, flavor);
+ err = nfs4_lookup_root_sec(server, fhandle, fattr,
+ flavor);
if (!err)
break;
}
@@ -10194,12 +10517,12 @@ out:
}
static int _nfs41_test_stateid(struct nfs_server *server,
- nfs4_stateid *stateid,
- const struct cred *cred)
+ const nfs4_stateid *stateid,
+ const struct cred *cred)
{
int status;
struct nfs41_test_stateid_args args = {
- .stateid = stateid,
+ .stateid = *stateid,
};
struct nfs41_test_stateid_res res;
struct rpc_message msg = {
@@ -10255,8 +10578,8 @@ static void nfs4_handle_delay_or_session_error(struct nfs_server *server,
* failed or the state ID is not currently valid.
*/
static int nfs41_test_stateid(struct nfs_server *server,
- nfs4_stateid *stateid,
- const struct cred *cred)
+ const nfs4_stateid *stateid,
+ const struct cred *cred)
{
struct nfs4_exception exception = {
.interruptible = true,
@@ -10323,7 +10646,7 @@ static const struct rpc_call_ops nfs41_free_stateid_ops = {
* Note: this function is always asynchronous.
*/
static int nfs41_free_stateid(struct nfs_server *server,
- const nfs4_stateid *stateid,
+ nfs4_stateid *stateid,
const struct cred *cred,
bool privileged)
{
@@ -10363,6 +10686,7 @@ static int nfs41_free_stateid(struct nfs_server *server,
if (IS_ERR(task))
return PTR_ERR(task);
rpc_put_task(task);
+ stateid->type = NFS4_FREED_STATEID_TYPE;
return 0;
}
@@ -10378,6 +10702,8 @@ nfs41_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp)
static bool nfs41_match_stateid(const nfs4_stateid *s1,
const nfs4_stateid *s2)
{
+ trace_nfs41_match_stateid(s1, s2);
+
if (s1->type != s2->type)
return false;
@@ -10395,6 +10721,8 @@ static bool nfs41_match_stateid(const nfs4_stateid *s1,
static bool nfs4_match_stateid(const nfs4_stateid *s1,
const nfs4_stateid *s2)
{
+ trace_nfs4_match_stateid(s1, s2);
+
return nfs4_stateid_match(s1, s2);
}
@@ -10529,12 +10857,14 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = {
| NFS_CAP_OFFLOAD_CANCEL
| NFS_CAP_COPY_NOTIFY
| NFS_CAP_DEALLOCATE
+ | NFS_CAP_ZERO_RANGE
| NFS_CAP_SEEK
| NFS_CAP_LAYOUTSTATS
| NFS_CAP_CLONE
| NFS_CAP_LAYOUTERROR
| NFS_CAP_READ_PLUS
- | NFS_CAP_MOVEABLE,
+ | NFS_CAP_MOVEABLE
+ | NFS_CAP_OFFLOAD_STATUS,
.init_client = nfs41_init_client,
.shutdown_client = nfs41_shutdown_client,
.match_stateid = nfs41_match_stateid,
@@ -10563,30 +10893,44 @@ const struct nfs4_minor_version_ops *nfs_v4_minor_ops[] = {
static ssize_t nfs4_listxattr(struct dentry *dentry, char *list, size_t size)
{
- ssize_t error, error2, error3;
+ ssize_t error, error2, error3, error4 = 0;
+ size_t left = size;
- error = generic_listxattr(dentry, list, size);
+ error = generic_listxattr(dentry, list, left);
if (error < 0)
return error;
if (list) {
list += error;
- size -= error;
+ left -= error;
}
- error2 = nfs4_listxattr_nfs4_label(d_inode(dentry), list, size);
+ error2 = nfs4_listxattr_nfs4_label(d_inode(dentry), list, left);
if (error2 < 0)
return error2;
if (list) {
list += error2;
- size -= error2;
+ left -= error2;
}
- error3 = nfs4_listxattr_nfs4_user(d_inode(dentry), list, size);
+ error3 = nfs4_listxattr_nfs4_user(d_inode(dentry), list, left);
if (error3 < 0)
return error3;
+ if (list) {
+ list += error3;
+ left -= error3;
+ }
- return error + error2 + error3;
+ if (!nfs_server_capable(d_inode(dentry), NFS_CAP_SECURITY_LABEL)) {
+ error4 = security_inode_listsecurity(d_inode(dentry), list, left);
+ if (error4 < 0)
+ return error4;
+ }
+
+ error += error2 + error3 + error4;
+ if (size && error > size)
+ return -ERANGE;
+ return error;
}
static void nfs4_enable_swap(struct inode *inode)
@@ -10604,7 +10948,11 @@ static void nfs4_disable_swap(struct inode *inode)
/* The state manager thread will now exit once it is
* woken.
*/
- wake_up_var(&NFS_SERVER(inode)->nfs_client->cl_state);
+ struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+
+ set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
+ clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
+ wake_up_var(&clp->cl_state);
}
static const struct inode_operations nfs4_dir_inode_operations = {
@@ -10631,6 +10979,26 @@ static const struct inode_operations nfs4_file_inode_operations = {
.listxattr = nfs4_listxattr,
};
+static struct nfs_server *nfs4_clone_server(struct nfs_server *source,
+ struct nfs_fh *fh, struct nfs_fattr *fattr,
+ rpc_authflavor_t flavor)
+{
+ struct nfs_server *server;
+ int error;
+
+ server = nfs_clone_server(source, fh, fattr, flavor);
+ if (IS_ERR(server))
+ return server;
+
+ error = nfs4_delegation_hash_alloc(server);
+ if (error) {
+ nfs_free_server(server);
+ return ERR_PTR(error);
+ }
+
+ return server;
+}
+
const struct nfs_rpc_ops nfs_v4_clientops = {
.version = 4, /* protocol version */
.dentry_ops = &nfs4_dentry_operations,
@@ -10678,11 +11046,12 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
.close_context = nfs4_close_context,
.open_context = nfs4_atomic_open,
.have_delegation = nfs4_have_delegation,
+ .return_delegation = nfs4_inode_return_delegation,
.alloc_client = nfs4_alloc_client,
.init_client = nfs4_init_client,
.free_client = nfs4_free_client,
.create_server = nfs4_create_server,
- .clone_server = nfs_clone_server,
+ .clone_server = nfs4_clone_server,
.discover_trunking = nfs4_discover_trunking,
.enable_swap = nfs4_enable_swap,
.disable_swap = nfs4_disable_swap,
@@ -10719,7 +11088,7 @@ static const struct xattr_handler nfs4_xattr_nfs4_user_handler = {
};
#endif
-const struct xattr_handler *nfs4_xattr_handlers[] = {
+const struct xattr_handler * const nfs4_xattr_handlers[] = {
&nfs4_xattr_nfs4_acl_handler,
#if defined(CONFIG_NFS_V4_1)
&nfs4_xattr_nfs4_dacl_handler,
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index db3811af0796..18ae614e5a6c 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -122,7 +122,7 @@ nfs4_schedule_state_renewal(struct nfs_client *clp)
timeout = 5 * HZ;
dprintk("%s: requeueing work. Lease period = %ld\n",
__func__, (timeout + HZ - 1) / HZ);
- mod_delayed_work(system_wq, &clp->cl_renewd, timeout);
+ mod_delayed_work(system_percpu_wq, &clp->cl_renewd, timeout);
set_bit(NFS_CS_RENEWD, &clp->cl_res_state);
spin_unlock(&clp->cl_lock);
}
diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h
index 351616c61df5..f9c291e2165c 100644
--- a/fs/nfs/nfs4session.h
+++ b/fs/nfs/nfs4session.h
@@ -148,16 +148,12 @@ static inline void nfs4_copy_sessionid(struct nfs4_sessionid *dst,
memcpy(dst->data, src->data, NFS4_MAX_SESSIONID_LEN);
}
-#ifdef CONFIG_CRC32
/*
* nfs_session_id_hash - calculate the crc32 hash for the session id
* @session - pointer to session
*/
#define nfs_session_id_hash(sess_id) \
(~crc32_le(0xFFFFFFFF, &(sess_id)->data[0], sizeof((sess_id)->data)))
-#else
-#define nfs_session_id_hash(session) (0)
-#endif
#else /* defined(CONFIG_NFS_V4_1) */
static inline int nfs4_init_session(struct nfs_client *clp)
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 2a0ca5c7f082..01179f7de322 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -67,6 +67,8 @@
#define OPENOWNER_POOL_SIZE 8
+static void nfs4_state_start_reclaim_reboot(struct nfs_client *clp);
+
const nfs4_stateid zero_stateid = {
{ .data = { 0 } },
.type = NFS4_SPECIAL_STATEID_TYPE,
@@ -330,6 +332,8 @@ do_confirm:
status = nfs4_proc_create_session(clp, cred);
if (status != 0)
goto out;
+ if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_CONFIRMED_R))
+ nfs4_state_start_reclaim_reboot(clp);
nfs41_finish_session_reset(clp);
nfs_mark_client_ready(clp, NFS_CS_READY);
out:
@@ -497,11 +501,7 @@ nfs4_alloc_state_owner(struct nfs_server *server,
sp = kzalloc(sizeof(*sp), gfp_flags);
if (!sp)
return NULL;
- sp->so_seqid.owner_id = ida_alloc(&server->openowner_id, gfp_flags);
- if (sp->so_seqid.owner_id < 0) {
- kfree(sp);
- return NULL;
- }
+ sp->so_seqid.owner_id = atomic64_inc_return(&server->owner_ctr);
sp->so_server = server;
sp->so_cred = get_cred(cred);
spin_lock_init(&sp->so_lock);
@@ -509,7 +509,6 @@ nfs4_alloc_state_owner(struct nfs_server *server,
nfs4_init_seqid_counter(&sp->so_seqid);
atomic_set(&sp->so_count, 1);
INIT_LIST_HEAD(&sp->so_lru);
- seqcount_spinlock_init(&sp->so_reclaim_seqcount, &sp->so_lock);
mutex_init(&sp->so_delegreturn_mutex);
return sp;
}
@@ -533,7 +532,6 @@ static void nfs4_free_state_owner(struct nfs4_state_owner *sp)
{
nfs4_destroy_seqid_counter(&sp->so_seqid);
put_cred(sp->so_cred);
- ida_free(&sp->so_server->openowner_id, sp->so_seqid.owner_id);
kfree(sp);
}
@@ -843,15 +841,15 @@ void nfs4_close_sync(struct nfs4_state *state, fmode_t fmode)
*/
static struct nfs4_lock_state *
__nfs4_find_lock_state(struct nfs4_state *state,
- fl_owner_t fl_owner, fl_owner_t fl_owner2)
+ fl_owner_t owner, fl_owner_t owner2)
{
struct nfs4_lock_state *pos, *ret = NULL;
list_for_each_entry(pos, &state->lock_states, ls_locks) {
- if (pos->ls_owner == fl_owner) {
+ if (pos->ls_owner == owner) {
ret = pos;
break;
}
- if (pos->ls_owner == fl_owner2)
+ if (pos->ls_owner == owner2)
ret = pos;
}
if (ret)
@@ -864,7 +862,7 @@ __nfs4_find_lock_state(struct nfs4_state *state,
* exists, return an uninitialized one.
*
*/
-static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
+static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t owner)
{
struct nfs4_lock_state *lsp;
struct nfs_server *server = state->owner->so_server;
@@ -875,20 +873,14 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
nfs4_init_seqid_counter(&lsp->ls_seqid);
refcount_set(&lsp->ls_count, 1);
lsp->ls_state = state;
- lsp->ls_owner = fl_owner;
- lsp->ls_seqid.owner_id = ida_alloc(&server->lockowner_id, GFP_KERNEL_ACCOUNT);
- if (lsp->ls_seqid.owner_id < 0)
- goto out_free;
+ lsp->ls_owner = owner;
+ lsp->ls_seqid.owner_id = atomic64_inc_return(&server->owner_ctr);
INIT_LIST_HEAD(&lsp->ls_locks);
return lsp;
-out_free:
- kfree(lsp);
- return NULL;
}
void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp)
{
- ida_free(&server->lockowner_id, lsp->ls_seqid.owner_id);
nfs4_destroy_seqid_counter(&lsp->ls_seqid);
kfree(lsp);
}
@@ -976,7 +968,7 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl)
if (fl->fl_ops != NULL)
return 0;
- lsp = nfs4_get_lock_state(state, fl->fl_owner);
+ lsp = nfs4_get_lock_state(state, fl->c.flc_owner);
if (lsp == NULL)
return -ENOMEM;
fl->fl_u.nfs4_fl.owner = lsp;
@@ -989,7 +981,7 @@ static int nfs4_copy_lock_stateid(nfs4_stateid *dst,
const struct nfs_lock_context *l_ctx)
{
struct nfs4_lock_state *lsp;
- fl_owner_t fl_owner, fl_flock_owner;
+ fl_owner_t owner, fl_flock_owner;
int ret = -ENOENT;
if (l_ctx == NULL)
@@ -998,11 +990,11 @@ static int nfs4_copy_lock_stateid(nfs4_stateid *dst,
if (test_bit(LK_STATE_IN_USE, &state->flags) == 0)
goto out;
- fl_owner = l_ctx->lockowner;
+ owner = l_ctx->lockowner;
fl_flock_owner = l_ctx->open_context->flock_owner;
spin_lock(&state->state_lock);
- lsp = __nfs4_find_lock_state(state, fl_owner, fl_flock_owner);
+ lsp = __nfs4_find_lock_state(state, owner, fl_flock_owner);
if (lsp && test_bit(NFS_LOCK_LOST, &lsp->ls_flags))
ret = -EIO;
else if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) {
@@ -1091,14 +1083,12 @@ void nfs_release_seqid(struct nfs_seqid *seqid)
return;
sequence = seqid->sequence;
spin_lock(&sequence->lock);
- list_del_init(&seqid->list);
- if (!list_empty(&sequence->list)) {
- struct nfs_seqid *next;
-
- next = list_first_entry(&sequence->list,
- struct nfs_seqid, list);
+ if (list_is_first(&seqid->list, &sequence->list) &&
+ !list_is_singular(&sequence->list)) {
+ struct nfs_seqid *next = list_next_entry(seqid, list);
rpc_wake_up_queued_task(&sequence->wait, next->task);
}
+ list_del_init(&seqid->list);
spin_unlock(&sequence->lock);
}
@@ -1205,17 +1195,26 @@ void nfs4_schedule_state_manager(struct nfs_client *clp)
{
struct task_struct *task;
char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1];
- struct rpc_clnt *cl = clp->cl_rpcclient;
+ struct rpc_clnt *clnt = clp->cl_rpcclient;
+ bool swapon = false;
- while (cl != cl->cl_parent)
- cl = cl->cl_parent;
+ if (clp->cl_cons_state < 0)
+ return;
set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
- if (test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state) != 0) {
- wake_up_var(&clp->cl_state);
- return;
+
+ if (atomic_read(&clnt->cl_swapper)) {
+ swapon = !test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE,
+ &clp->cl_state);
+ if (!swapon) {
+ wake_up_var(&clp->cl_state);
+ return;
+ }
}
- set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state);
+
+ if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0)
+ return;
+
__module_get(THIS_MODULE);
refcount_inc(&clp->cl_count);
@@ -1232,8 +1231,9 @@ void nfs4_schedule_state_manager(struct nfs_client *clp)
__func__, PTR_ERR(task));
if (!nfs_client_init_is_complete(clp))
nfs_mark_client_ready(clp, PTR_ERR(task));
+ if (swapon)
+ clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
nfs4_clear_state_manager_bit(clp);
- clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
nfs_put_client(clp);
module_put(THIS_MODULE);
}
@@ -1403,7 +1403,7 @@ int nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4_
dprintk("%s: scheduling stateid recovery for server %s\n", __func__,
clp->cl_hostname);
nfs4_schedule_state_manager(clp);
- return 0;
+ return clp->cl_cons_state < 0 ? clp->cl_cons_state : 0;
}
EXPORT_SYMBOL_GPL(nfs4_schedule_stateid_recovery);
@@ -1515,8 +1515,8 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_
down_write(&nfsi->rwsem);
spin_lock(&flctx->flc_lock);
restart:
- list_for_each_entry(fl, list, fl_list) {
- if (nfs_file_open_context(fl->fl_file)->state != state)
+ for_each_file_lock(fl, list) {
+ if (nfs_file_open_context(fl->c.flc_file)->state != state)
continue;
spin_unlock(&flctx->flc_lock);
status = ops->recover_lock(state, fl);
@@ -1583,7 +1583,7 @@ static void nfs42_complete_copies(struct nfs4_state_owner *sp, struct nfs4_state
complete(&copy->completion);
}
}
- list_for_each_entry(copy, &sp->so_server->ss_copies, src_copies) {
+ list_for_each_entry(copy, &sp->so_server->ss_src_copies, src_copies) {
if ((test_bit(NFS_CLNT_SRC_SSC_COPY_STATE, &state->flags) &&
!nfs4_stateid_match_other(&state->stateid,
&copy->parent_src_state->stateid)))
@@ -1653,7 +1653,6 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp,
* server that doesn't support a grace period.
*/
spin_lock(&sp->so_lock);
- raw_write_seqcount_begin(&sp->so_reclaim_seqcount);
restart:
list_for_each_entry(state, &sp->so_states, open_states) {
if (!test_and_clear_bit(ops->state_flag_bit, &state->flags))
@@ -1721,7 +1720,6 @@ restart:
spin_lock(&sp->so_lock);
goto restart;
}
- raw_write_seqcount_end(&sp->so_reclaim_seqcount);
spin_unlock(&sp->so_lock);
#ifdef CONFIG_NFS_V4_2
if (found_ssc_copy_state)
@@ -1731,7 +1729,6 @@ restart:
out_err:
nfs4_put_open_state(state);
spin_lock(&sp->so_lock);
- raw_write_seqcount_end(&sp->so_reclaim_seqcount);
spin_unlock(&sp->so_lock);
return status;
}
@@ -1853,6 +1850,7 @@ static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp)
if (!nfs4_state_clear_reclaim_reboot(clp))
return;
+ pnfs_destroy_all_layouts(clp);
ops = clp->cl_mvops->reboot_recovery_ops;
cred = nfs4_get_clid_cred(clp);
err = nfs4_reclaim_complete(clp, ops, cred);
@@ -1914,9 +1912,12 @@ static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recov
struct nfs_server *server;
struct rb_node *pos;
LIST_HEAD(freeme);
- int status = 0;
int lost_locks = 0;
+ int status;
+ status = nfs4_begin_drain_session(clp);
+ if (status < 0)
+ return status;
restart:
rcu_read_lock();
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
@@ -1943,6 +1944,7 @@ restart:
set_bit(ops->owner_flag_bit, &sp->so_flags);
nfs4_put_state_owner(sp);
status = nfs4_recovery_handle_error(clp, status);
+ nfs4_free_state_owners(&freeme);
return (status != 0) ? status : -EAGAIN;
}
@@ -1953,6 +1955,7 @@ restart:
}
rcu_read_unlock();
nfs4_free_state_owners(&freeme);
+ nfs_local_probe_async(clp);
if (lost_locks)
pr_warn("NFS: %s: lost %d locks\n",
clp->cl_hostname, lost_locks);
@@ -2009,6 +2012,12 @@ static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status)
nfs_mark_client_ready(clp, -EPERM);
clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
return -EPERM;
+ case -ETIMEDOUT:
+ if (clp->cl_cons_state == NFS_CS_SESSION_INITING) {
+ nfs_mark_client_ready(clp, -EIO);
+ return -EIO;
+ }
+ fallthrough;
case -EACCES:
case -NFS4ERR_DELAY:
case -EAGAIN:
@@ -2055,7 +2064,6 @@ static int nfs4_establish_lease(struct nfs_client *clp)
put_cred(cred);
if (status != 0)
return status;
- pnfs_destroy_all_layouts(clp);
return 0;
}
@@ -2103,6 +2111,7 @@ static int nfs4_try_migration(struct nfs_server *server, const struct cred *cred
{
struct nfs_client *clp = server->nfs_client;
struct nfs4_fs_locations *locations = NULL;
+ struct nfs_fattr *fattr;
struct inode *inode;
struct page *page;
int status, result;
@@ -2112,19 +2121,16 @@ static int nfs4_try_migration(struct nfs_server *server, const struct cred *cred
(unsigned long long)server->fsid.minor,
clp->cl_hostname);
- result = 0;
page = alloc_page(GFP_KERNEL);
locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL);
- if (page == NULL || locations == NULL) {
- dprintk("<-- %s: no memory\n", __func__);
- goto out;
- }
- locations->fattr = nfs_alloc_fattr();
- if (locations->fattr == NULL) {
+ fattr = nfs_alloc_fattr();
+ if (page == NULL || locations == NULL || fattr == NULL) {
dprintk("<-- %s: no memory\n", __func__);
+ result = 0;
goto out;
}
+ locations->fattr = fattr;
inode = d_inode(server->super->s_root);
result = nfs4_proc_get_locations(server, NFS_FH(inode), locations,
page, cred);
@@ -2669,6 +2675,8 @@ static void nfs4_state_manager(struct nfs_client *clp)
section = "reclaim reboot";
status = nfs4_do_reclaim(clp,
clp->cl_mvops->reboot_recovery_ops);
+ if (status == 0)
+ status = pnfs_layout_handle_reboot(clp);
if (status == -EAGAIN)
continue;
if (status < 0)
@@ -2680,6 +2688,9 @@ static void nfs4_state_manager(struct nfs_client *clp)
/* Detect expired delegations... */
if (test_and_clear_bit(NFS4CLNT_DELEGATION_EXPIRED, &clp->cl_state)) {
section = "detect expired delegations";
+ status = nfs4_begin_drain_session(clp);
+ if (status < 0)
+ goto out_error;
nfs_reap_expired_delegations(clp);
continue;
}
@@ -2700,6 +2711,13 @@ static void nfs4_state_manager(struct nfs_client *clp)
nfs4_end_drain_session(clp);
nfs4_clear_state_manager_bit(clp);
+ if (test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state) &&
+ !test_and_set_bit(NFS4CLNT_MANAGER_RUNNING,
+ &clp->cl_state)) {
+ memflags = memalloc_nofs_save();
+ continue;
+ }
+
if (!test_and_set_bit(NFS4CLNT_RECALL_RUNNING, &clp->cl_state)) {
if (test_and_clear_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) {
nfs_client_return_marked_delegations(clp);
@@ -2721,7 +2739,18 @@ out_error:
pr_warn_ratelimited("NFS: state manager%s%s failed on NFSv4 server %s"
" with error %d\n", section_sep, section,
clp->cl_hostname, -status);
- ssleep(1);
+ switch (status) {
+ case -ENETDOWN:
+ case -ENETUNREACH:
+ nfs_mark_client_ready(clp, -EIO);
+ break;
+ case -EINVAL:
+ nfs_mark_client_ready(clp, status);
+ break;
+ default:
+ ssleep(1);
+ break;
+ }
out_drain:
memalloc_nofs_restore(memflags);
nfs4_end_drain_session(clp);
@@ -2738,22 +2767,25 @@ static int nfs4_run_state_manager(void *ptr)
allow_signal(SIGKILL);
again:
- set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state);
nfs4_state_manager(clp);
- if (atomic_read(&cl->cl_swapper)) {
+
+ if (test_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state) &&
+ !test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state)) {
wait_var_event_interruptible(&clp->cl_state,
test_bit(NFS4CLNT_RUN_MANAGER,
&clp->cl_state));
- if (atomic_read(&cl->cl_swapper) &&
- test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state))
+ if (!atomic_read(&cl->cl_swapper))
+ clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
+ if (refcount_read(&clp->cl_count) > 1 && !signalled() &&
+ !test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state))
goto again;
/* Either no longer a swapper, or were signalled */
+ clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
}
- clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
if (refcount_read(&clp->cl_count) > 1 && !signalled() &&
test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state) &&
- !test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state))
+ !test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state))
goto again;
nfs_put_client(clp);
diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c
index d09bcfd7db89..5ec9c83f1ef0 100644
--- a/fs/nfs/nfs4super.c
+++ b/fs/nfs/nfs4super.c
@@ -145,18 +145,13 @@ static int do_nfs4_mount(struct nfs_server *server,
const char *export_path)
{
struct nfs_fs_context *root_ctx;
+ struct nfs_fs_context *ctx;
struct fs_context *root_fc;
struct vfsmount *root_mnt;
struct dentry *dentry;
- size_t len;
+ char *source;
int ret;
- struct fs_parameter param = {
- .key = "source",
- .type = fs_value_is_string,
- .dirfd = -1,
- };
-
if (IS_ERR(server))
return PTR_ERR(server);
@@ -168,25 +163,32 @@ static int do_nfs4_mount(struct nfs_server *server,
kfree(root_fc->source);
root_fc->source = NULL;
+ ctx = nfs_fc2context(fc);
root_ctx = nfs_fc2context(root_fc);
root_ctx->internal = true;
root_ctx->server = server;
- /* We leave export_path unset as it's not used to find the root. */
- len = strlen(hostname) + 5;
- param.string = kmalloc(len, GFP_KERNEL);
- if (param.string == NULL) {
- put_fs_context(root_fc);
- return -ENOMEM;
+ if (ctx->fscache_uniq) {
+ ret = vfs_parse_fs_string(root_fc, "fsc", ctx->fscache_uniq);
+ if (ret < 0) {
+ put_fs_context(root_fc);
+ return ret;
+ }
}
+ /* We leave export_path unset as it's not used to find the root. */
/* Does hostname needs to be enclosed in brackets? */
if (strchr(hostname, ':'))
- param.size = snprintf(param.string, len, "[%s]:/", hostname);
+ source = kasprintf(GFP_KERNEL, "[%s]:/", hostname);
else
- param.size = snprintf(param.string, len, "%s:/", hostname);
- ret = vfs_parse_fs_param(root_fc, &param);
- kfree(param.string);
+ source = kasprintf(GFP_KERNEL, "%s:/", hostname);
+
+ if (!source) {
+ put_fs_context(root_fc);
+ return -ENOMEM;
+ }
+ ret = vfs_parse_fs_string(root_fc, "source", source);
+ kfree(source);
if (ret < 0) {
put_fs_context(root_fc);
return ret;
@@ -308,6 +310,7 @@ static void __exit exit_nfs_v4(void)
nfs_dns_resolver_destroy();
}
+MODULE_DESCRIPTION("NFSv4 client support");
MODULE_LICENSE("GPL");
module_init(init_nfs_v4);
diff --git a/fs/nfs/nfs4sysctl.c b/fs/nfs/nfs4sysctl.c
index c394e4447100..d1a92d8f8ba4 100644
--- a/fs/nfs/nfs4sysctl.c
+++ b/fs/nfs/nfs4sysctl.c
@@ -17,7 +17,7 @@ static const int nfs_set_port_min;
static const int nfs_set_port_max = 65535;
static struct ctl_table_header *nfs4_callback_sysctl_table;
-static struct ctl_table nfs4_cb_sysctls[] = {
+static const struct ctl_table nfs4_cb_sysctls[] = {
{
.procname = "nfs_callback_tcpport",
.data = &nfs_callback_set_tcpport,
@@ -34,30 +34,12 @@ static struct ctl_table nfs4_cb_sysctls[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- { }
-};
-
-static struct ctl_table nfs4_cb_sysctl_dir[] = {
- {
- .procname = "nfs",
- .mode = 0555,
- .child = nfs4_cb_sysctls,
- },
- { }
-};
-
-static struct ctl_table nfs4_cb_sysctl_root[] = {
- {
- .procname = "fs",
- .mode = 0555,
- .child = nfs4_cb_sysctl_dir,
- },
- { }
};
int nfs4_register_sysctl(void)
{
- nfs4_callback_sysctl_table = register_sysctl_table(nfs4_cb_sysctl_root);
+ nfs4_callback_sysctl_table = register_sysctl("fs/nfs",
+ nfs4_cb_sysctls);
if (nfs4_callback_sysctl_table == NULL)
return -ENOMEM;
return 0;
diff --git a/fs/nfs/nfs4trace.c b/fs/nfs/nfs4trace.c
index d9ac556bebcf..987c92d6364b 100644
--- a/fs/nfs/nfs4trace.c
+++ b/fs/nfs/nfs4trace.c
@@ -2,6 +2,8 @@
/*
* Copyright (c) 2013 Trond Myklebust <Trond.Myklebust@netapp.com>
*/
+#include <uapi/linux/pr.h>
+#include <linux/blkdev.h>
#include <linux/nfs_fs.h>
#include "nfs4_fs.h"
#include "internal.h"
@@ -24,8 +26,17 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_read_done);
EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_write_done);
EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_read_pagelist);
EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_write_pagelist);
+EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_ds_connect);
EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_read_error);
EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_write_error);
EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_commit_error);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bl_ext_tree_prepare_commit);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bl_pr_key_reg);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bl_pr_key_reg_err);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bl_pr_key_unreg);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bl_pr_key_unreg_err);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(fl_getdevinfo);
#endif
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index 214bc56f92d2..9776d220cec3 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -14,6 +14,8 @@
#include <trace/misc/fs.h>
#include <trace/misc/nfs.h>
+#include "delegation.h"
+
#define show_nfs_fattr_flags(valid) \
__print_flags((unsigned long)valid, "|", \
{ NFS_ATTR_FATTR_TYPE, "TYPE" }, \
@@ -30,7 +32,8 @@
{ NFS_ATTR_FATTR_CTIME, "CTIME" }, \
{ NFS_ATTR_FATTR_CHANGE, "CHANGE" }, \
{ NFS_ATTR_FATTR_OWNER_NAME, "OWNER_NAME" }, \
- { NFS_ATTR_FATTR_GROUP_NAME, "GROUP_NAME" })
+ { NFS_ATTR_FATTR_GROUP_NAME, "GROUP_NAME" }, \
+ { NFS_ATTR_FATTR_BTIME, "BTIME" })
DECLARE_EVENT_CLASS(nfs4_clientid_event,
TP_PROTO(
@@ -47,7 +50,7 @@ DECLARE_EVENT_CLASS(nfs4_clientid_event,
TP_fast_assign(
__entry->error = error < 0 ? -error : 0;
- __assign_str(dstaddr, clp->cl_hostname);
+ __assign_str(dstaddr);
),
TP_printk(
@@ -77,6 +80,36 @@ DEFINE_NFS4_CLIENTID_EVENT(nfs4_bind_conn_to_session);
DEFINE_NFS4_CLIENTID_EVENT(nfs4_sequence);
DEFINE_NFS4_CLIENTID_EVENT(nfs4_reclaim_complete);
+TRACE_EVENT(nfs4_trunked_exchange_id,
+ TP_PROTO(
+ const struct nfs_client *clp,
+ const char *addr,
+ int error
+ ),
+
+ TP_ARGS(clp, addr, error),
+
+ TP_STRUCT__entry(
+ __string(main_addr, clp->cl_hostname)
+ __string(trunk_addr, addr)
+ __field(unsigned long, error)
+ ),
+
+ TP_fast_assign(
+ __entry->error = error < 0 ? -error : 0;
+ __assign_str(main_addr);
+ __assign_str(trunk_addr);
+ ),
+
+ TP_printk(
+ "error=%ld (%s) main_addr=%s trunk_addr=%s",
+ -__entry->error,
+ show_nfs4_status(__entry->error),
+ __get_str(main_addr),
+ __get_str(trunk_addr)
+ )
+);
+
TRACE_EVENT(nfs4_sequence_done,
TP_PROTO(
const struct nfs4_session *session,
@@ -243,6 +276,32 @@ TRACE_EVENT(nfs4_cb_offload,
show_nfs_stable_how(__entry->cb_how)
)
);
+
+TRACE_EVENT(pnfs_ds_connect,
+ TP_PROTO(
+ char *ds_remotestr,
+ int status
+ ),
+
+ TP_ARGS(ds_remotestr, status),
+
+ TP_STRUCT__entry(
+ __string(ds_ips, ds_remotestr)
+ __field(int, status)
+ ),
+
+ TP_fast_assign(
+ __assign_str(ds_ips);
+ __entry->status = status;
+ ),
+
+ TP_printk(
+ "ds_ips=%s, status=%d",
+ __get_str(ds_ips),
+ __entry->status
+ )
+);
+
#endif /* CONFIG_NFS_V4_1 */
TRACE_EVENT(nfs4_setup_sequence,
@@ -292,32 +351,34 @@ TRACE_DEFINE_ENUM(NFS4CLNT_MOVED);
TRACE_DEFINE_ENUM(NFS4CLNT_LEASE_MOVED);
TRACE_DEFINE_ENUM(NFS4CLNT_DELEGATION_EXPIRED);
TRACE_DEFINE_ENUM(NFS4CLNT_RUN_MANAGER);
+TRACE_DEFINE_ENUM(NFS4CLNT_MANAGER_AVAILABLE);
TRACE_DEFINE_ENUM(NFS4CLNT_RECALL_RUNNING);
TRACE_DEFINE_ENUM(NFS4CLNT_RECALL_ANY_LAYOUT_READ);
TRACE_DEFINE_ENUM(NFS4CLNT_RECALL_ANY_LAYOUT_RW);
+TRACE_DEFINE_ENUM(NFS4CLNT_DELEGRETURN_DELAYED);
#define show_nfs4_clp_state(state) \
__print_flags(state, "|", \
- { NFS4CLNT_MANAGER_RUNNING, "MANAGER_RUNNING" }, \
- { NFS4CLNT_CHECK_LEASE, "CHECK_LEASE" }, \
- { NFS4CLNT_LEASE_EXPIRED, "LEASE_EXPIRED" }, \
- { NFS4CLNT_RECLAIM_REBOOT, "RECLAIM_REBOOT" }, \
- { NFS4CLNT_RECLAIM_NOGRACE, "RECLAIM_NOGRACE" }, \
- { NFS4CLNT_DELEGRETURN, "DELEGRETURN" }, \
- { NFS4CLNT_SESSION_RESET, "SESSION_RESET" }, \
- { NFS4CLNT_LEASE_CONFIRM, "LEASE_CONFIRM" }, \
- { NFS4CLNT_SERVER_SCOPE_MISMATCH, \
- "SERVER_SCOPE_MISMATCH" }, \
- { NFS4CLNT_PURGE_STATE, "PURGE_STATE" }, \
- { NFS4CLNT_BIND_CONN_TO_SESSION, \
- "BIND_CONN_TO_SESSION" }, \
- { NFS4CLNT_MOVED, "MOVED" }, \
- { NFS4CLNT_LEASE_MOVED, "LEASE_MOVED" }, \
- { NFS4CLNT_DELEGATION_EXPIRED, "DELEGATION_EXPIRED" }, \
- { NFS4CLNT_RUN_MANAGER, "RUN_MANAGER" }, \
- { NFS4CLNT_RECALL_RUNNING, "RECALL_RUNNING" }, \
- { NFS4CLNT_RECALL_ANY_LAYOUT_READ, "RECALL_ANY_LAYOUT_READ" }, \
- { NFS4CLNT_RECALL_ANY_LAYOUT_RW, "RECALL_ANY_LAYOUT_RW" })
+ { BIT(NFS4CLNT_MANAGER_RUNNING), "MANAGER_RUNNING" }, \
+ { BIT(NFS4CLNT_CHECK_LEASE), "CHECK_LEASE" }, \
+ { BIT(NFS4CLNT_LEASE_EXPIRED), "LEASE_EXPIRED" }, \
+ { BIT(NFS4CLNT_RECLAIM_REBOOT), "RECLAIM_REBOOT" }, \
+ { BIT(NFS4CLNT_RECLAIM_NOGRACE), "RECLAIM_NOGRACE" }, \
+ { BIT(NFS4CLNT_DELEGRETURN), "DELEGRETURN" }, \
+ { BIT(NFS4CLNT_SESSION_RESET), "SESSION_RESET" }, \
+ { BIT(NFS4CLNT_LEASE_CONFIRM), "LEASE_CONFIRM" }, \
+ { BIT(NFS4CLNT_SERVER_SCOPE_MISMATCH), "SERVER_SCOPE_MISMATCH" }, \
+ { BIT(NFS4CLNT_PURGE_STATE), "PURGE_STATE" }, \
+ { BIT(NFS4CLNT_BIND_CONN_TO_SESSION), "BIND_CONN_TO_SESSION" }, \
+ { BIT(NFS4CLNT_MOVED), "MOVED" }, \
+ { BIT(NFS4CLNT_LEASE_MOVED), "LEASE_MOVED" }, \
+ { BIT(NFS4CLNT_DELEGATION_EXPIRED), "DELEGATION_EXPIRED" }, \
+ { BIT(NFS4CLNT_RUN_MANAGER), "RUN_MANAGER" }, \
+ { BIT(NFS4CLNT_MANAGER_AVAILABLE), "MANAGER_AVAILABLE" }, \
+ { BIT(NFS4CLNT_RECALL_RUNNING), "RECALL_RUNNING" }, \
+ { BIT(NFS4CLNT_RECALL_ANY_LAYOUT_READ), "RECALL_ANY_LAYOUT_READ" }, \
+ { BIT(NFS4CLNT_RECALL_ANY_LAYOUT_RW), "RECALL_ANY_LAYOUT_RW" }, \
+ { BIT(NFS4CLNT_DELEGRETURN_DELAYED), "DELERETURN_DELAYED" })
TRACE_EVENT(nfs4_state_mgr,
TP_PROTO(
@@ -333,7 +394,7 @@ TRACE_EVENT(nfs4_state_mgr,
TP_fast_assign(
__entry->state = clp->cl_state;
- __assign_str(hostname, clp->cl_hostname);
+ __assign_str(hostname);
),
TP_printk(
@@ -361,8 +422,8 @@ TRACE_EVENT(nfs4_state_mgr_failed,
TP_fast_assign(
__entry->error = status < 0 ? -status : 0;
__entry->state = clp->cl_state;
- __assign_str(hostname, clp->cl_hostname);
- __assign_str(section, section);
+ __assign_str(hostname);
+ __assign_str(section);
),
TP_printk(
@@ -546,7 +607,7 @@ DECLARE_EVENT_CLASS(nfs4_open_event,
__entry->fhandle = 0;
}
__entry->dir = NFS_FILEID(d_inode(ctx->dentry->d_parent));
- __assign_str(name, ctx->dentry->d_name.name);
+ __assign_str(name);
),
TP_printk(
@@ -697,7 +758,7 @@ DECLARE_EVENT_CLASS(nfs4_lock_event,
__entry->error = error < 0 ? -error : 0;
__entry->cmd = cmd;
- __entry->type = request->fl_type;
+ __entry->type = request->c.flc_type;
__entry->start = request->fl_start;
__entry->end = request->fl_end;
__entry->dev = inode->i_sb->s_dev;
@@ -769,7 +830,7 @@ TRACE_EVENT(nfs4_set_lock,
__entry->error = error < 0 ? -error : 0;
__entry->cmd = cmd;
- __entry->type = request->fl_type;
+ __entry->type = request->c.flc_type;
__entry->start = request->fl_start;
__entry->end = request->fl_end;
__entry->dev = inode->i_sb->s_dev;
@@ -924,6 +985,52 @@ DECLARE_EVENT_CLASS(nfs4_set_delegation_event,
TP_ARGS(inode, fmode))
DEFINE_NFS4_SET_DELEGATION_EVENT(nfs4_set_delegation);
DEFINE_NFS4_SET_DELEGATION_EVENT(nfs4_reclaim_delegation);
+DEFINE_NFS4_SET_DELEGATION_EVENT(nfs4_detach_delegation);
+
+#define show_delegation_flags(flags) \
+ __print_flags(flags, "|", \
+ { BIT(NFS_DELEGATION_NEED_RECLAIM), "NEED_RECLAIM" }, \
+ { BIT(NFS_DELEGATION_RETURN), "RETURN" }, \
+ { BIT(NFS_DELEGATION_RETURN_IF_CLOSED), "RETURN_IF_CLOSED" }, \
+ { BIT(NFS_DELEGATION_REFERENCED), "REFERENCED" }, \
+ { BIT(NFS_DELEGATION_RETURNING), "RETURNING" }, \
+ { BIT(NFS_DELEGATION_REVOKED), "REVOKED" }, \
+ { BIT(NFS_DELEGATION_TEST_EXPIRED), "TEST_EXPIRED" }, \
+ { BIT(NFS_DELEGATION_INODE_FREEING), "INODE_FREEING" }, \
+ { BIT(NFS_DELEGATION_RETURN_DELAYED), "RETURN_DELAYED" })
+
+DECLARE_EVENT_CLASS(nfs4_delegation_event,
+ TP_PROTO(
+ const struct nfs_delegation *delegation
+ ),
+
+ TP_ARGS(delegation),
+
+ TP_STRUCT__entry(
+ __field(u32, fhandle)
+ __field(unsigned int, fmode)
+ __field(unsigned long, flags)
+ ),
+
+ TP_fast_assign(
+ __entry->fhandle = nfs_fhandle_hash(NFS_FH(delegation->inode));
+ __entry->fmode = delegation->type;
+ __entry->flags = delegation->flags;
+ ),
+
+ TP_printk(
+ "fhandle=0x%08x fmode=%s flags=%s",
+ __entry->fhandle, show_fs_fmode_flags(__entry->fmode),
+ show_delegation_flags(__entry->flags)
+ )
+);
+#define DEFINE_NFS4_DELEGATION_EVENT(name) \
+ DEFINE_EVENT(nfs4_delegation_event, name, \
+ TP_PROTO( \
+ const struct nfs_delegation *delegation \
+ ), \
+ TP_ARGS(delegation))
+DEFINE_NFS4_DELEGATION_EVENT(nfs_delegation_need_return);
TRACE_EVENT(nfs4_delegreturn_exit,
TP_PROTO(
@@ -1040,7 +1147,7 @@ DECLARE_EVENT_CLASS(nfs4_lookup_event,
__entry->dev = dir->i_sb->s_dev;
__entry->dir = NFS_FILEID(dir);
__entry->error = -error;
- __assign_str(name, name->name);
+ __assign_str(name);
),
TP_printk(
@@ -1124,8 +1231,8 @@ TRACE_EVENT(nfs4_rename,
__entry->olddir = NFS_FILEID(olddir);
__entry->newdir = NFS_FILEID(newdir);
__entry->error = error < 0 ? -error : 0;
- __assign_str(oldname, oldname->name);
- __assign_str(newname, newname->name);
+ __assign_str(oldname);
+ __assign_str(newname);
),
TP_printk(
@@ -1327,7 +1434,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_callback_event,
__entry->fileid = 0;
__entry->dev = 0;
}
- __assign_str(dstaddr, clp ? clp->cl_hostname : "unknown");
+ __assign_str(dstaddr);
),
TP_printk(
@@ -1384,7 +1491,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_callback_event,
__entry->fileid = 0;
__entry->dev = 0;
}
- __assign_str(dstaddr, clp ? clp->cl_hostname : "unknown");
+ __assign_str(dstaddr);
__entry->stateid_seq =
be32_to_cpu(stateid->seqid);
__entry->stateid_hash =
@@ -1417,6 +1524,63 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_callback_event,
DEFINE_NFS4_INODE_STATEID_CALLBACK_EVENT(nfs4_cb_recall);
DEFINE_NFS4_INODE_STATEID_CALLBACK_EVENT(nfs4_cb_layoutrecall_file);
+#define show_stateid_type(type) \
+ __print_symbolic(type, \
+ { NFS4_INVALID_STATEID_TYPE, "INVALID" }, \
+ { NFS4_SPECIAL_STATEID_TYPE, "SPECIAL" }, \
+ { NFS4_OPEN_STATEID_TYPE, "OPEN" }, \
+ { NFS4_LOCK_STATEID_TYPE, "LOCK" }, \
+ { NFS4_DELEGATION_STATEID_TYPE, "DELEGATION" }, \
+ { NFS4_LAYOUT_STATEID_TYPE, "LAYOUT" }, \
+ { NFS4_PNFS_DS_STATEID_TYPE, "PNFS_DS" }, \
+ { NFS4_REVOKED_STATEID_TYPE, "REVOKED" }, \
+ { NFS4_FREED_STATEID_TYPE, "FREED" })
+
+DECLARE_EVENT_CLASS(nfs4_match_stateid_event,
+ TP_PROTO(
+ const nfs4_stateid *s1,
+ const nfs4_stateid *s2
+ ),
+
+ TP_ARGS(s1, s2),
+
+ TP_STRUCT__entry(
+ __field(int, s1_seq)
+ __field(int, s2_seq)
+ __field(u32, s1_hash)
+ __field(u32, s2_hash)
+ __field(int, s1_type)
+ __field(int, s2_type)
+ ),
+
+ TP_fast_assign(
+ __entry->s1_seq = s1->seqid;
+ __entry->s1_hash = nfs_stateid_hash(s1);
+ __entry->s1_type = s1->type;
+ __entry->s2_seq = s2->seqid;
+ __entry->s2_hash = nfs_stateid_hash(s2);
+ __entry->s2_type = s2->type;
+ ),
+
+ TP_printk(
+ "s1=%s:%x:%u s2=%s:%x:%u",
+ show_stateid_type(__entry->s1_type),
+ __entry->s1_hash, __entry->s1_seq,
+ show_stateid_type(__entry->s2_type),
+ __entry->s2_hash, __entry->s2_seq
+ )
+);
+
+#define DEFINE_NFS4_MATCH_STATEID_EVENT(name) \
+ DEFINE_EVENT(nfs4_match_stateid_event, name, \
+ TP_PROTO( \
+ const nfs4_stateid *s1, \
+ const nfs4_stateid *s2 \
+ ), \
+ TP_ARGS(s1, s2))
+DEFINE_NFS4_MATCH_STATEID_EVENT(nfs41_match_stateid);
+DEFINE_NFS4_MATCH_STATEID_EVENT(nfs4_match_stateid);
+
DECLARE_EVENT_CLASS(nfs4_idmap_event,
TP_PROTO(
const char *name,
@@ -1928,7 +2092,7 @@ DECLARE_EVENT_CLASS(nfs4_deviceid_event,
),
TP_fast_assign(
- __assign_str(dstaddr, clp->cl_hostname);
+ __assign_str(dstaddr);
memcpy(__entry->deviceid, deviceid->data,
NFS4_DEVICEID4_SIZE);
),
@@ -1966,7 +2130,7 @@ DECLARE_EVENT_CLASS(nfs4_deviceid_status,
TP_fast_assign(
__entry->dev = server->s_dev;
__entry->status = status;
- __assign_str(dstaddr, server->nfs_client->cl_hostname);
+ __assign_str(dstaddr);
memcpy(__entry->deviceid, deviceid->data,
NFS4_DEVICEID4_SIZE);
),
@@ -1989,15 +2153,45 @@ DECLARE_EVENT_CLASS(nfs4_deviceid_status,
DEFINE_PNFS_DEVICEID_STATUS(nfs4_getdeviceinfo);
DEFINE_PNFS_DEVICEID_STATUS(nfs4_find_deviceid);
+TRACE_EVENT(fl_getdevinfo,
+ TP_PROTO(
+ const struct nfs_server *server,
+ const struct nfs4_deviceid *deviceid,
+ char *ds_remotestr
+ ),
+ TP_ARGS(server, deviceid, ds_remotestr),
+
+ TP_STRUCT__entry(
+ __string(mds_addr, server->nfs_client->cl_hostname)
+ __array(unsigned char, deviceid, NFS4_DEVICEID4_SIZE)
+ __string(ds_ips, ds_remotestr)
+ ),
+
+ TP_fast_assign(
+ __assign_str(mds_addr);
+ __assign_str(ds_ips);
+ memcpy(__entry->deviceid, deviceid->data,
+ NFS4_DEVICEID4_SIZE);
+ ),
+ TP_printk(
+ "deviceid=%s, mds_addr=%s, ds_ips=%s",
+ __print_hex(__entry->deviceid, NFS4_DEVICEID4_SIZE),
+ __get_str(mds_addr),
+ __get_str(ds_ips)
+ )
+);
+
DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event,
TP_PROTO(
- const struct nfs_pgio_header *hdr
+ const struct nfs_pgio_header *hdr,
+ int error
),
- TP_ARGS(hdr),
+ TP_ARGS(hdr, error),
TP_STRUCT__entry(
__field(unsigned long, error)
+ __field(unsigned long, nfs_error)
__field(dev_t, dev)
__field(u32, fhandle)
__field(u64, fileid)
@@ -2013,7 +2207,8 @@ DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event,
TP_fast_assign(
const struct inode *inode = hdr->inode;
- __entry->error = hdr->res.op_status;
+ __entry->error = -error;
+ __entry->nfs_error = hdr->res.op_status;
__entry->fhandle = nfs_fhandle_hash(hdr->args.fh);
__entry->fileid = NFS_FILEID(inode);
__entry->dev = inode->i_sb->s_dev;
@@ -2023,14 +2218,13 @@ DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event,
be32_to_cpu(hdr->args.stateid.seqid);
__entry->stateid_hash =
nfs_stateid_hash(&hdr->args.stateid);
- __assign_str(dstaddr, hdr->ds_clp ?
- rpc_peeraddr2str(hdr->ds_clp->cl_rpcclient,
- RPC_DISPLAY_ADDR) : "unknown");
+ __assign_str(dstaddr);
),
TP_printk(
"error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
- "offset=%llu count=%u stateid=%d:0x%08x dstaddr=%s",
+ "offset=%llu count=%u stateid=%d:0x%08x dstaddr=%s "
+ "nfs_error=%lu (%s)",
-__entry->error,
show_nfs4_status(__entry->error),
MAJOR(__entry->dev), MINOR(__entry->dev),
@@ -2038,28 +2232,32 @@ DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event,
__entry->fhandle,
__entry->offset, __entry->count,
__entry->stateid_seq, __entry->stateid_hash,
- __get_str(dstaddr)
+ __get_str(dstaddr), __entry->nfs_error,
+ show_nfs4_status(__entry->nfs_error)
)
);
#define DEFINE_NFS4_FLEXFILES_IO_EVENT(name) \
DEFINE_EVENT(nfs4_flexfiles_io_event, name, \
TP_PROTO( \
- const struct nfs_pgio_header *hdr \
+ const struct nfs_pgio_header *hdr, \
+ int error \
), \
- TP_ARGS(hdr))
+ TP_ARGS(hdr, error))
DEFINE_NFS4_FLEXFILES_IO_EVENT(ff_layout_read_error);
DEFINE_NFS4_FLEXFILES_IO_EVENT(ff_layout_write_error);
TRACE_EVENT(ff_layout_commit_error,
TP_PROTO(
- const struct nfs_commit_data *data
+ const struct nfs_commit_data *data,
+ int error
),
- TP_ARGS(data),
+ TP_ARGS(data, error),
TP_STRUCT__entry(
__field(unsigned long, error)
+ __field(unsigned long, nfs_error)
__field(dev_t, dev)
__field(u32, fhandle)
__field(u64, fileid)
@@ -2073,30 +2271,152 @@ TRACE_EVENT(ff_layout_commit_error,
TP_fast_assign(
const struct inode *inode = data->inode;
- __entry->error = data->res.op_status;
+ __entry->error = -error;
+ __entry->nfs_error = data->res.op_status;
__entry->fhandle = nfs_fhandle_hash(data->args.fh);
__entry->fileid = NFS_FILEID(inode);
__entry->dev = inode->i_sb->s_dev;
__entry->offset = data->args.offset;
__entry->count = data->args.count;
- __assign_str(dstaddr, data->ds_clp ?
- rpc_peeraddr2str(data->ds_clp->cl_rpcclient,
- RPC_DISPLAY_ADDR) : "unknown");
+ __assign_str(dstaddr);
),
TP_printk(
"error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
- "offset=%llu count=%u dstaddr=%s",
+ "offset=%llu count=%u dstaddr=%s nfs_error=%lu (%s)",
-__entry->error,
show_nfs4_status(__entry->error),
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle,
__entry->offset, __entry->count,
- __get_str(dstaddr)
+ __get_str(dstaddr), __entry->nfs_error,
+ show_nfs4_status(__entry->nfs_error)
+ )
+);
+
+TRACE_EVENT(bl_ext_tree_prepare_commit,
+ TP_PROTO(
+ int ret,
+ size_t count,
+ u64 lwb,
+ bool not_all_ranges
+ ),
+
+ TP_ARGS(ret, count, lwb, not_all_ranges),
+
+ TP_STRUCT__entry(
+ __field(int, ret)
+ __field(size_t, count)
+ __field(u64, lwb)
+ __field(bool, not_all_ranges)
+ ),
+
+ TP_fast_assign(
+ __entry->ret = ret;
+ __entry->count = count;
+ __entry->lwb = lwb;
+ __entry->not_all_ranges = not_all_ranges;
+ ),
+
+ TP_printk(
+ "ret=%d, found %zu ranges, lwb=%llu%s",
+ __entry->ret,
+ __entry->count,
+ __entry->lwb,
+ __entry->not_all_ranges ? ", not all ranges encoded" :
+ ""
)
);
+DECLARE_EVENT_CLASS(pnfs_bl_pr_key_class,
+ TP_PROTO(
+ const struct block_device *bdev,
+ u64 key
+ ),
+ TP_ARGS(bdev, key),
+ TP_STRUCT__entry(
+ __field(u64, key)
+ __field(dev_t, dev)
+ __string(device, bdev->bd_disk->disk_name)
+ ),
+ TP_fast_assign(
+ __entry->key = key;
+ __entry->dev = bdev->bd_dev;
+ __assign_str(device);
+ ),
+ TP_printk("dev=%d,%d (%s) key=0x%016llx",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __get_str(device), __entry->key
+ )
+);
+
+#define DEFINE_NFS4_BLOCK_PRKEY_EVENT(name) \
+ DEFINE_EVENT(pnfs_bl_pr_key_class, name, \
+ TP_PROTO( \
+ const struct block_device *bdev, \
+ u64 key \
+ ), \
+ TP_ARGS(bdev, key))
+DEFINE_NFS4_BLOCK_PRKEY_EVENT(bl_pr_key_reg);
+DEFINE_NFS4_BLOCK_PRKEY_EVENT(bl_pr_key_unreg);
+
+/*
+ * From uapi/linux/pr.h
+ */
+TRACE_DEFINE_ENUM(PR_STS_SUCCESS);
+TRACE_DEFINE_ENUM(PR_STS_IOERR);
+TRACE_DEFINE_ENUM(PR_STS_RESERVATION_CONFLICT);
+TRACE_DEFINE_ENUM(PR_STS_RETRY_PATH_FAILURE);
+TRACE_DEFINE_ENUM(PR_STS_PATH_FAST_FAILED);
+TRACE_DEFINE_ENUM(PR_STS_PATH_FAILED);
+
+#define show_pr_status(x) \
+ __print_symbolic(x, \
+ { PR_STS_SUCCESS, "SUCCESS" }, \
+ { PR_STS_IOERR, "IOERR" }, \
+ { PR_STS_RESERVATION_CONFLICT, "RESERVATION_CONFLICT" }, \
+ { PR_STS_RETRY_PATH_FAILURE, "RETRY_PATH_FAILURE" }, \
+ { PR_STS_PATH_FAST_FAILED, "PATH_FAST_FAILED" }, \
+ { PR_STS_PATH_FAILED, "PATH_FAILED" })
+
+DECLARE_EVENT_CLASS(pnfs_bl_pr_key_err_class,
+ TP_PROTO(
+ const struct block_device *bdev,
+ u64 key,
+ int status
+ ),
+ TP_ARGS(bdev, key, status),
+ TP_STRUCT__entry(
+ __field(u64, key)
+ __field(dev_t, dev)
+ __field(unsigned long, status)
+ __string(device, bdev->bd_disk->disk_name)
+ ),
+ TP_fast_assign(
+ __entry->key = key;
+ __entry->dev = bdev->bd_dev;
+ __entry->status = status;
+ __assign_str(device);
+ ),
+ TP_printk("dev=%d,%d (%s) key=0x%016llx status=%s",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __get_str(device), __entry->key,
+ show_pr_status(__entry->status)
+ )
+);
+
+#define DEFINE_NFS4_BLOCK_PRKEY_ERR_EVENT(name) \
+ DEFINE_EVENT(pnfs_bl_pr_key_err_class, name, \
+ TP_PROTO( \
+ const struct block_device *bdev, \
+ u64 key, \
+ int status \
+ ), \
+ TP_ARGS(bdev, key, status))
+DEFINE_NFS4_BLOCK_PRKEY_ERR_EVENT(bl_pr_key_reg_err);
+DEFINE_NFS4_BLOCK_PRKEY_ERR_EVENT(bl_pr_key_unreg_err);
+
#ifdef CONFIG_NFS_V4_2
TRACE_DEFINE_ENUM(NFS4_CONTENT_DATA);
TRACE_DEFINE_ENUM(NFS4_CONTENT_HOLE);
@@ -2464,7 +2784,7 @@ TRACE_EVENT(nfs4_copy_notify,
)
);
-TRACE_EVENT(nfs4_offload_cancel,
+DECLARE_EVENT_CLASS(nfs4_offload_class,
TP_PROTO(
const struct nfs42_offload_status_args *args,
int error
@@ -2496,6 +2816,15 @@ TRACE_EVENT(nfs4_offload_cancel,
__entry->stateid_seq, __entry->stateid_hash
)
);
+#define DEFINE_NFS4_OFFLOAD_EVENT(name) \
+ DEFINE_EVENT(nfs4_offload_class, name, \
+ TP_PROTO( \
+ const struct nfs42_offload_status_args *args, \
+ int error \
+ ), \
+ TP_ARGS(args, error))
+DEFINE_NFS4_OFFLOAD_EVENT(nfs4_offload_cancel);
+DEFINE_NFS4_OFFLOAD_EVENT(nfs4_offload_status);
DECLARE_EVENT_CLASS(nfs4_xattr_event,
TP_PROTO(
@@ -2519,7 +2848,7 @@ DECLARE_EVENT_CLASS(nfs4_xattr_event,
__entry->dev = inode->i_sb->s_dev;
__entry->fileid = NFS_FILEID(inode);
__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
- __assign_str(name, name);
+ __assign_str(name);
),
TP_printk(
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index deec76cf5afe..1d0e6c10f921 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -52,6 +52,7 @@
#include <linux/nfs.h>
#include <linux/nfs4.h>
#include <linux/nfs_fs.h>
+#include <linux/nfs_common.h>
#include "nfs4_fs.h"
#include "nfs4trace.h"
@@ -63,11 +64,7 @@
#define NFSDBG_FACILITY NFSDBG_XDR
-/* Mapping from NFS error code to "errno" error code. */
-#define errno_NFSERR_IO EIO
-
struct compound_hdr;
-static int nfs4_stat_to_errno(int);
static void encode_layoutget(struct xdr_stream *xdr,
const struct nfs4_layoutget_args *args,
struct compound_hdr *hdr);
@@ -85,9 +82,8 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
* we currently use size 2 (u64) out of (NFS4_OPAQUE_LIMIT >> 2)
*/
#define pagepad_maxsz (1)
-#define open_owner_id_maxsz (1 + 2 + 1 + 1 + 2)
-#define lock_owner_id_maxsz (1 + 1 + 4)
-#define decode_lockowner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
+#define open_owner_id_maxsz (2 + 1 + 2 + 2)
+#define lock_owner_id_maxsz (2 + 1 + 2)
#define compound_encode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2))
#define compound_decode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2))
#define op_encode_hdr_maxsz (1)
@@ -188,7 +184,7 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
#define encode_claim_null_maxsz (1 + nfs4_name_maxsz)
#define encode_open_maxsz (op_encode_hdr_maxsz + \
2 + encode_share_access_maxsz + 2 + \
- open_owner_id_maxsz + \
+ 1 + open_owner_id_maxsz + \
encode_opentype_maxsz + \
encode_claim_null_maxsz)
#define decode_space_limit_maxsz (3)
@@ -224,6 +220,11 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
encode_attrs_maxsz)
#define decode_setattr_maxsz (op_decode_hdr_maxsz + \
nfs4_fattr_bitmap_maxsz)
+#define encode_delegattr_maxsz (op_encode_hdr_maxsz + \
+ encode_stateid_maxsz + \
+ nfs4_fattr_bitmap_maxsz + \
+ 2*nfstime4_maxsz)
+#define decode_delegattr_maxsz (decode_setattr_maxsz)
#define encode_read_maxsz (op_encode_hdr_maxsz + \
encode_stateid_maxsz + 3)
#define decode_read_maxsz (op_decode_hdr_maxsz + 2 + pagepad_maxsz)
@@ -253,13 +254,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
#define encode_link_maxsz (op_encode_hdr_maxsz + \
nfs4_name_maxsz)
#define decode_link_maxsz (op_decode_hdr_maxsz + decode_change_info_maxsz)
-#define encode_lockowner_maxsz (7)
+#define encode_lockowner_maxsz (2 + 1 + lock_owner_id_maxsz)
+
#define encode_lock_maxsz (op_encode_hdr_maxsz + \
7 + \
1 + encode_stateid_maxsz + 1 + \
encode_lockowner_maxsz)
#define decode_lock_denied_maxsz \
- (8 + decode_lockowner_maxsz)
+ (2 + 2 + 1 + 2 + 1 + lock_owner_id_maxsz)
#define decode_lock_maxsz (op_decode_hdr_maxsz + \
decode_lock_denied_maxsz)
#define encode_lockt_maxsz (op_encode_hdr_maxsz + 5 + \
@@ -615,7 +617,7 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
encode_lockowner_maxsz)
#define NFS4_dec_release_lockowner_sz \
(compound_decode_hdr_maxsz + \
- decode_lockowner_maxsz)
+ decode_release_lockowner_maxsz)
#define NFS4_enc_access_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_putfh_maxsz + \
@@ -758,12 +760,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
encode_sequence_maxsz + \
encode_putfh_maxsz + \
encode_layoutreturn_maxsz + \
+ encode_delegattr_maxsz + \
encode_delegreturn_maxsz + \
encode_getattr_maxsz)
#define NFS4_dec_delegreturn_sz (compound_decode_hdr_maxsz + \
decode_sequence_maxsz + \
decode_putfh_maxsz + \
decode_layoutreturn_maxsz + \
+ decode_delegattr_maxsz + \
decode_delegreturn_maxsz + \
decode_getattr_maxsz)
#define NFS4_enc_getacl_sz (compound_encode_hdr_maxsz + \
@@ -968,11 +972,6 @@ static __be32 *reserve_space(struct xdr_stream *xdr, size_t nbytes)
return p;
}
-static void encode_opaque_fixed(struct xdr_stream *xdr, const void *buf, size_t len)
-{
- WARN_ON_ONCE(xdr_stream_encode_opaque_fixed(xdr, buf, len) < 0);
-}
-
static void encode_string(struct xdr_stream *xdr, unsigned int len, const char *str)
{
WARN_ON_ONCE(xdr_stream_encode_opaque(xdr, str, len) < 0);
@@ -1060,9 +1059,10 @@ static void encode_nops(struct compound_hdr *hdr)
*hdr->nops_p = htonl(hdr->nops);
}
-static void encode_nfs4_stateid(struct xdr_stream *xdr, const nfs4_stateid *stateid)
+static void encode_nfs4_stateid(struct xdr_stream *xdr,
+ const nfs4_stateid *stateid)
{
- encode_opaque_fixed(xdr, stateid, NFS4_STATEID_SIZE);
+ encode_opaque_fixed(xdr, stateid->data, NFS4_STATEID_SIZE);
}
static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *verf)
@@ -1305,7 +1305,7 @@ static void encode_link(struct xdr_stream *xdr, const struct qstr *name, struct
static inline int nfs4_lock_type(struct file_lock *fl, int block)
{
- if (fl->fl_type == F_RDLCK)
+ if (lock_is_read(fl))
return block ? NFS4_READW_LT : NFS4_READ_LT;
return block ? NFS4_WRITEW_LT : NFS4_WRITE_LT;
}
@@ -1412,16 +1412,16 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena
__be32 *p;
/*
* opcode 4, seqid 4, share_access 4, share_deny 4, clientid 8, ownerlen 4,
- * owner 4 = 32
+ * owner 28
*/
encode_nfs4_seqid(xdr, arg->seqid);
encode_share_access(xdr, arg->share_access);
- p = reserve_space(xdr, 36);
+ p = reserve_space(xdr, 40);
p = xdr_encode_hyper(p, arg->clientid);
- *p++ = cpu_to_be32(24);
+ *p++ = cpu_to_be32(28);
p = xdr_encode_opaque_fixed(p, "open id:", 8);
*p++ = cpu_to_be32(arg->server->s_dev);
- *p++ = cpu_to_be32(arg->id.uniquifier);
+ p = xdr_encode_hyper(p, arg->id.uniquifier);
xdr_encode_hyper(p, arg->id.create_time);
}
@@ -1468,20 +1468,18 @@ static void encode_opentype(struct xdr_stream *xdr, const struct nfs_openargs *a
}
}
-static inline void encode_delegation_type(struct xdr_stream *xdr, fmode_t delegation_type)
+static inline void encode_delegation_type(struct xdr_stream *xdr, u32 delegation_type)
{
__be32 *p;
p = reserve_space(xdr, 4);
switch (delegation_type) {
- case 0:
- *p = cpu_to_be32(NFS4_OPEN_DELEGATE_NONE);
- break;
- case FMODE_READ:
- *p = cpu_to_be32(NFS4_OPEN_DELEGATE_READ);
- break;
- case FMODE_WRITE|FMODE_READ:
- *p = cpu_to_be32(NFS4_OPEN_DELEGATE_WRITE);
+ case NFS4_OPEN_DELEGATE_NONE:
+ case NFS4_OPEN_DELEGATE_READ:
+ case NFS4_OPEN_DELEGATE_WRITE:
+ case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG:
+ case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG:
+ *p = cpu_to_be32(delegation_type);
break;
default:
BUG();
@@ -1497,7 +1495,7 @@ static inline void encode_claim_null(struct xdr_stream *xdr, const struct qstr *
encode_string(xdr, name->len, name->name);
}
-static inline void encode_claim_previous(struct xdr_stream *xdr, fmode_t type)
+static inline void encode_claim_previous(struct xdr_stream *xdr, u32 type)
{
__be32 *p;
@@ -1602,7 +1600,8 @@ static void encode_read(struct xdr_stream *xdr, const struct nfs_pgio_args *args
static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr)
{
uint32_t attrs[3] = {
- FATTR4_WORD0_RDATTR_ERROR,
+ FATTR4_WORD0_TYPE
+ | FATTR4_WORD0_RDATTR_ERROR,
FATTR4_WORD1_MOUNTED_ON_FILEID,
};
uint32_t dircount = readdir->count;
@@ -1612,12 +1611,21 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
unsigned int i;
if (readdir->plus) {
- attrs[0] |= FATTR4_WORD0_TYPE|FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE|
- FATTR4_WORD0_FSID|FATTR4_WORD0_FILEHANDLE|FATTR4_WORD0_FILEID;
- attrs[1] |= FATTR4_WORD1_MODE|FATTR4_WORD1_NUMLINKS|FATTR4_WORD1_OWNER|
- FATTR4_WORD1_OWNER_GROUP|FATTR4_WORD1_RAWDEV|
- FATTR4_WORD1_SPACE_USED|FATTR4_WORD1_TIME_ACCESS|
- FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY;
+ attrs[0] |= FATTR4_WORD0_CHANGE
+ | FATTR4_WORD0_SIZE
+ | FATTR4_WORD0_FSID
+ | FATTR4_WORD0_FILEHANDLE
+ | FATTR4_WORD0_FILEID;
+ attrs[1] |= FATTR4_WORD1_MODE
+ | FATTR4_WORD1_NUMLINKS
+ | FATTR4_WORD1_OWNER
+ | FATTR4_WORD1_OWNER_GROUP
+ | FATTR4_WORD1_RAWDEV
+ | FATTR4_WORD1_SPACE_USED
+ | FATTR4_WORD1_TIME_ACCESS
+ | FATTR4_WORD1_TIME_CREATE
+ | FATTR4_WORD1_TIME_METADATA
+ | FATTR4_WORD1_TIME_MODIFY;
attrs[2] |= FATTR4_WORD2_SECURITY_LABEL;
}
/* Use mounted_on_fileid only if the server supports it */
@@ -1726,6 +1734,33 @@ static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs
server->attr_bitmask);
}
+static void encode_delegattr(struct xdr_stream *xdr,
+ const nfs4_stateid *stateid,
+ const struct nfs4_delegattr *attr,
+ struct compound_hdr *hdr)
+{
+ uint32_t bitmap[3] = { 0 };
+ uint32_t len = 0;
+ __be32 *p;
+
+ encode_op_hdr(xdr, OP_SETATTR, encode_delegattr_maxsz, hdr);
+ encode_nfs4_stateid(xdr, stateid);
+ if (attr->atime_set) {
+ bitmap[2] |= FATTR4_WORD2_TIME_DELEG_ACCESS;
+ len += (nfstime4_maxsz << 2);
+ }
+ if (attr->mtime_set) {
+ bitmap[2] |= FATTR4_WORD2_TIME_DELEG_MODIFY;
+ len += (nfstime4_maxsz << 2);
+ }
+ xdr_encode_bitmap4(xdr, bitmap, ARRAY_SIZE(bitmap));
+ xdr_stream_encode_opaque_inline(xdr, (void **)&p, len);
+ if (bitmap[2] & FATTR4_WORD2_TIME_DELEG_ACCESS)
+ p = xdr_encode_nfstime4(p, &attr->atime);
+ if (bitmap[2] & FATTR4_WORD2_TIME_DELEG_MODIFY)
+ p = xdr_encode_nfstime4(p, &attr->mtime);
+}
+
static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclientid *setclientid, struct compound_hdr *hdr)
{
__be32 *p;
@@ -2096,7 +2131,7 @@ static void encode_test_stateid(struct xdr_stream *xdr,
{
encode_op_hdr(xdr, OP_TEST_STATEID, decode_test_stateid_maxsz, hdr);
encode_uint32(xdr, 1);
- encode_nfs4_stateid(xdr, args->stateid);
+ encode_nfs4_stateid(xdr, &args->stateid);
}
static void encode_free_stateid(struct xdr_stream *xdr,
@@ -2803,6 +2838,8 @@ static void nfs4_xdr_enc_delegreturn(struct rpc_rqst *req,
encode_putfh(xdr, args->fhandle, &hdr);
if (args->lr_args)
encode_layoutreturn(xdr, args->lr_args, &hdr);
+ if (args->sattr_args)
+ encode_delegattr(xdr, args->stateid, args->sattr_args, &hdr);
if (args->bitmask)
encode_getfattr(xdr, args->bitmask, &hdr);
encode_delegreturn(xdr, args->stateid, &hdr);
@@ -3403,7 +3440,7 @@ static int decode_attr_link_support(struct xdr_stream *xdr, uint32_t *bitmap, ui
*res = be32_to_cpup(p);
bitmap[0] &= ~FATTR4_WORD0_LINK_SUPPORT;
}
- dprintk("%s: link support=%s\n", __func__, *res == 0 ? "false" : "true");
+ dprintk("%s: link support=%s\n", __func__, str_false_true(*res == 0));
return 0;
}
@@ -3421,7 +3458,7 @@ static int decode_attr_symlink_support(struct xdr_stream *xdr, uint32_t *bitmap,
*res = be32_to_cpup(p);
bitmap[0] &= ~FATTR4_WORD0_SYMLINK_SUPPORT;
}
- dprintk("%s: symlink support=%s\n", __func__, *res == 0 ? "false" : "true");
+ dprintk("%s: symlink support=%s\n", __func__, str_false_true(*res == 0));
return 0;
}
@@ -3563,7 +3600,7 @@ static int decode_attr_case_insensitive(struct xdr_stream *xdr, uint32_t *bitmap
*res = be32_to_cpup(p);
bitmap[0] &= ~FATTR4_WORD0_CASE_INSENSITIVE;
}
- dprintk("%s: case_insensitive=%s\n", __func__, *res == 0 ? "false" : "true");
+ dprintk("%s: case_insensitive=%s\n", __func__, str_false_true(*res == 0));
return 0;
}
@@ -3581,7 +3618,7 @@ static int decode_attr_case_preserving(struct xdr_stream *xdr, uint32_t *bitmap,
*res = be32_to_cpup(p);
bitmap[0] &= ~FATTR4_WORD0_CASE_PRESERVING;
}
- dprintk("%s: case_preserving=%s\n", __func__, *res == 0 ? "false" : "true");
+ dprintk("%s: case_preserving=%s\n", __func__, str_false_true(*res == 0));
return 0;
}
@@ -4171,6 +4208,24 @@ static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, str
return status;
}
+static int decode_attr_time_create(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec64 *time)
+{
+ int status = 0;
+
+ time->tv_sec = 0;
+ time->tv_nsec = 0;
+ if (unlikely(bitmap[1] & (FATTR4_WORD1_TIME_CREATE - 1U)))
+ return -EIO;
+ if (likely(bitmap[1] & FATTR4_WORD1_TIME_CREATE)) {
+ status = decode_attr_time(xdr, time);
+ if (status == 0)
+ status = NFS_ATTR_FATTR_BTIME;
+ bitmap[1] &= ~FATTR4_WORD1_TIME_CREATE;
+ }
+ dprintk("%s: btime=%lld\n", __func__, time->tv_sec);
+ return status;
+}
+
static int decode_attr_time_metadata(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec64 *time)
{
int status = 0;
@@ -4289,8 +4344,29 @@ static int decode_attr_xattrsupport(struct xdr_stream *xdr, uint32_t *bitmap,
*res = be32_to_cpup(p);
bitmap[2] &= ~FATTR4_WORD2_XATTR_SUPPORT;
}
- dprintk("%s: XATTR support=%s\n", __func__,
- *res == 0 ? "false" : "true");
+ dprintk("%s: XATTR support=%s\n", __func__, str_false_true(*res == 0));
+ return 0;
+}
+
+static int decode_attr_open_arguments(struct xdr_stream *xdr, uint32_t *bitmap,
+ struct nfs4_open_caps *res)
+{
+ memset(res, 0, sizeof(*res));
+ if (unlikely(bitmap[2] & (FATTR4_WORD2_OPEN_ARGUMENTS - 1U)))
+ return -EIO;
+ if (likely(bitmap[2] & FATTR4_WORD2_OPEN_ARGUMENTS)) {
+ if (decode_bitmap4(xdr, res->oa_share_access, ARRAY_SIZE(res->oa_share_access)) < 0)
+ return -EIO;
+ if (decode_bitmap4(xdr, res->oa_share_deny, ARRAY_SIZE(res->oa_share_deny)) < 0)
+ return -EIO;
+ if (decode_bitmap4(xdr, res->oa_share_access_want, ARRAY_SIZE(res->oa_share_access_want)) < 0)
+ return -EIO;
+ if (decode_bitmap4(xdr, res->oa_open_claim, ARRAY_SIZE(res->oa_open_claim)) < 0)
+ return -EIO;
+ if (decode_bitmap4(xdr, res->oa_createmode, ARRAY_SIZE(res->oa_createmode)) < 0)
+ return -EIO;
+ bitmap[2] &= ~FATTR4_WORD2_OPEN_ARGUMENTS;
+ }
return 0;
}
@@ -4343,14 +4419,6 @@ static int decode_access(struct xdr_stream *xdr, u32 *supported, u32 *access)
return 0;
}
-static int decode_opaque_fixed(struct xdr_stream *xdr, void *buf, size_t len)
-{
- ssize_t ret = xdr_stream_decode_opaque_fixed(xdr, buf, len);
- if (unlikely(ret < 0))
- return -EIO;
- return 0;
-}
-
static int decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
{
return decode_opaque_fixed(xdr, stateid, NFS4_STATEID_SIZE);
@@ -4468,6 +4536,8 @@ static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_re
if ((status = decode_attr_exclcreat_supported(xdr, bitmap,
res->exclcreat_bitmask)) != 0)
goto xdr_error;
+ if ((status = decode_attr_open_arguments(xdr, bitmap, &res->open_caps)) != 0)
+ goto xdr_error;
status = verify_attr_len(xdr, savep, attrlen);
xdr_error:
dprintk("%s: xdr returned %d!\n", __func__, -status);
@@ -4730,6 +4800,11 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
goto xdr_error;
fattr->valid |= status;
+ status = decode_attr_time_create(xdr, bitmap, &fattr->btime);
+ if (status < 0)
+ goto xdr_error;
+ fattr->valid |= status;
+
status = decode_attr_time_metadata(xdr, bitmap, &fattr->ctime);
if (status < 0)
goto xdr_error;
@@ -4855,7 +4930,7 @@ static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap,
}
/*
- * The prefered block size for layout directed io
+ * The preferred block size for layout directed io
*/
static int decode_attr_layout_blksize(struct xdr_stream *xdr, uint32_t *bitmap,
uint32_t *res)
@@ -5026,7 +5101,7 @@ static int decode_link(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
/*
* We create the owner, so we know a proper owner.id length is 4.
*/
-static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl)
+static int decode_lock_denied(struct xdr_stream *xdr, struct file_lock *fl)
{
uint64_t offset, length, clientid;
__be32 *p;
@@ -5043,10 +5118,10 @@ static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl)
fl->fl_end = fl->fl_start + (loff_t)length - 1;
if (length == ~(uint64_t)0)
fl->fl_end = OFFSET_MAX;
- fl->fl_type = F_WRLCK;
+ fl->c.flc_type = F_WRLCK;
if (type & 1)
- fl->fl_type = F_RDLCK;
- fl->fl_pid = 0;
+ fl->c.flc_type = F_RDLCK;
+ fl->c.flc_pid = 0;
}
p = xdr_decode_hyper(p, &clientid); /* read 8 bytes */
namelen = be32_to_cpup(p); /* read 4 bytes */ /* have read all 32 bytes now */
@@ -5139,13 +5214,12 @@ static int decode_space_limit(struct xdr_stream *xdr,
}
static int decode_rw_delegation(struct xdr_stream *xdr,
- uint32_t delegation_type,
- struct nfs_openres *res)
+ struct nfs4_open_delegation *res)
{
__be32 *p;
int status;
- status = decode_delegation_stateid(xdr, &res->delegation);
+ status = decode_delegation_stateid(xdr, &res->stateid);
if (unlikely(status))
return status;
p = xdr_inline_decode(xdr, 4);
@@ -5153,52 +5227,57 @@ static int decode_rw_delegation(struct xdr_stream *xdr,
return -EIO;
res->do_recall = be32_to_cpup(p);
- switch (delegation_type) {
+ switch (res->open_delegation_type) {
case NFS4_OPEN_DELEGATE_READ:
- res->delegation_type = FMODE_READ;
+ case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG:
+ res->type = FMODE_READ;
break;
case NFS4_OPEN_DELEGATE_WRITE:
- res->delegation_type = FMODE_WRITE|FMODE_READ;
+ case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG:
+ res->type = FMODE_WRITE|FMODE_READ;
if (decode_space_limit(xdr, &res->pagemod_limit) < 0)
return -EIO;
}
return decode_ace(xdr, NULL);
}
-static int decode_no_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
+static int decode_no_delegation(struct xdr_stream *xdr,
+ struct nfs4_open_delegation *res)
{
__be32 *p;
- uint32_t why_no_delegation;
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
return -EIO;
- why_no_delegation = be32_to_cpup(p);
- switch (why_no_delegation) {
+ res->why_no_delegation = be32_to_cpup(p);
+ switch (res->why_no_delegation) {
case WND4_CONTENTION:
case WND4_RESOURCE:
- xdr_inline_decode(xdr, 4);
- /* Ignore for now */
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ return -EIO;
+ res->will_notify = be32_to_cpup(p);
}
return 0;
}
-static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
+static int decode_delegation(struct xdr_stream *xdr,
+ struct nfs4_open_delegation *res)
{
__be32 *p;
- uint32_t delegation_type;
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
return -EIO;
- delegation_type = be32_to_cpup(p);
- res->delegation_type = 0;
- switch (delegation_type) {
+ res->open_delegation_type = be32_to_cpup(p);
+ switch (res->open_delegation_type) {
case NFS4_OPEN_DELEGATE_NONE:
return 0;
case NFS4_OPEN_DELEGATE_READ:
case NFS4_OPEN_DELEGATE_WRITE:
- return decode_rw_delegation(xdr, delegation_type, res);
+ case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG:
+ case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG:
+ return decode_rw_delegation(xdr, res);
case NFS4_OPEN_DELEGATE_NONE_EXT:
return decode_no_delegation(xdr, res);
}
@@ -5239,7 +5318,7 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
for (; i < NFS4_BITMAP_SIZE; i++)
res->attrset[i] = 0;
- return decode_delegation(xdr, res);
+ return decode_delegation(xdr, &res->delegation);
xdr_error:
dprintk("%s: Bitmap too large! Length = %u\n", __func__, bmlen);
return -EIO;
@@ -5471,6 +5550,11 @@ static int decode_setattr(struct xdr_stream *xdr)
return -EIO;
}
+static int decode_delegattr(struct xdr_stream *xdr)
+{
+ return decode_setattr(xdr);
+}
+
static int decode_setclientid(struct xdr_stream *xdr, struct nfs4_setclientid_res *res)
{
__be32 *p;
@@ -6501,7 +6585,7 @@ nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
int status;
if (res->acl_scratch != NULL)
- xdr_set_scratch_page(xdr, res->acl_scratch);
+ xdr_set_scratch_folio(xdr, res->acl_scratch);
status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
@@ -7043,6 +7127,12 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp,
if (status)
goto out;
}
+ if (res->sattr_res) {
+ status = decode_delegattr(xdr);
+ res->sattr_ret = status;
+ if (status)
+ goto out;
+ }
if (res->fattr) {
status = decode_getfattr(xdr, res->fattr, res->server);
if (status != 0)
@@ -7538,72 +7628,6 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
return 0;
}
-/*
- * We need to translate between nfs status return values and
- * the local errno values which may not be the same.
- */
-static struct {
- int stat;
- int errno;
-} nfs_errtbl[] = {
- { NFS4_OK, 0 },
- { NFS4ERR_PERM, -EPERM },
- { NFS4ERR_NOENT, -ENOENT },
- { NFS4ERR_IO, -errno_NFSERR_IO},
- { NFS4ERR_NXIO, -ENXIO },
- { NFS4ERR_ACCESS, -EACCES },
- { NFS4ERR_EXIST, -EEXIST },
- { NFS4ERR_XDEV, -EXDEV },
- { NFS4ERR_NOTDIR, -ENOTDIR },
- { NFS4ERR_ISDIR, -EISDIR },
- { NFS4ERR_INVAL, -EINVAL },
- { NFS4ERR_FBIG, -EFBIG },
- { NFS4ERR_NOSPC, -ENOSPC },
- { NFS4ERR_ROFS, -EROFS },
- { NFS4ERR_MLINK, -EMLINK },
- { NFS4ERR_NAMETOOLONG, -ENAMETOOLONG },
- { NFS4ERR_NOTEMPTY, -ENOTEMPTY },
- { NFS4ERR_DQUOT, -EDQUOT },
- { NFS4ERR_STALE, -ESTALE },
- { NFS4ERR_BADHANDLE, -EBADHANDLE },
- { NFS4ERR_BAD_COOKIE, -EBADCOOKIE },
- { NFS4ERR_NOTSUPP, -ENOTSUPP },
- { NFS4ERR_TOOSMALL, -ETOOSMALL },
- { NFS4ERR_SERVERFAULT, -EREMOTEIO },
- { NFS4ERR_BADTYPE, -EBADTYPE },
- { NFS4ERR_LOCKED, -EAGAIN },
- { NFS4ERR_SYMLINK, -ELOOP },
- { NFS4ERR_OP_ILLEGAL, -EOPNOTSUPP },
- { NFS4ERR_DEADLOCK, -EDEADLK },
- { NFS4ERR_NOXATTR, -ENODATA },
- { NFS4ERR_XATTR2BIG, -E2BIG },
- { -1, -EIO }
-};
-
-/*
- * Convert an NFS error code to a local one.
- * This one is used jointly by NFSv2 and NFSv3.
- */
-static int
-nfs4_stat_to_errno(int stat)
-{
- int i;
- for (i = 0; nfs_errtbl[i].stat != -1; i++) {
- if (nfs_errtbl[i].stat == stat)
- return nfs_errtbl[i].errno;
- }
- if (stat <= 10000 || stat > 10100) {
- /* The server is looney tunes. */
- return -EREMOTEIO;
- }
- /* If we cannot translate the error, the recovery routines should
- * handle it.
- * Note: remaining NFSv4 error codes have values > 10000, so should
- * not conflict with native Linux error codes.
- */
- return -stat;
-}
-
#ifdef CONFIG_NFS_V4_2
#include "nfs42xdr.c"
#endif /* CONFIG_NFS_V4_2 */
@@ -7702,6 +7726,7 @@ const struct rpc_procinfo nfs4_procedures[] = {
PROC42(CLONE, enc_clone, dec_clone),
PROC42(COPY, enc_copy, dec_copy),
PROC42(OFFLOAD_CANCEL, enc_offload_cancel, dec_offload_cancel),
+ PROC42(OFFLOAD_STATUS, enc_offload_status, dec_offload_status),
PROC42(COPY_NOTIFY, enc_copy_notify, dec_copy_notify),
PROC(LOOKUPP, enc_lookupp, dec_lookupp),
PROC42(LAYOUTERROR, enc_layouterror, dec_layouterror),
@@ -7710,6 +7735,7 @@ const struct rpc_procinfo nfs4_procedures[] = {
PROC42(LISTXATTRS, enc_listxattrs, dec_listxattrs),
PROC42(REMOVEXATTR, enc_removexattr, dec_removexattr),
PROC42(READ_PLUS, enc_read_plus, dec_read_plus),
+ PROC42(ZERO_RANGE, enc_zero_range, dec_zero_range),
};
static unsigned int nfs_version4_counts[ARRAY_SIZE(nfs4_procedures)];
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 620329b7e6ae..432612d22437 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -164,7 +164,7 @@ __setup("nfsroot=", nfs_root_setup);
static int __init root_nfs_copy(char *dest, const char *src,
const size_t destlen)
{
- if (strlcpy(dest, src, destlen) > destlen)
+ if (strscpy(dest, src, destlen) == -E2BIG)
return -1;
return 0;
}
@@ -175,10 +175,10 @@ static int __init root_nfs_cat(char *dest, const char *src,
size_t len = strlen(dest);
if (len && dest[len - 1] != ',')
- if (strlcat(dest, ",", destlen) > destlen)
+ if (strlcat(dest, ",", destlen) >= destlen)
return -1;
- if (strlcat(dest, src, destlen) > destlen)
+ if (strlcat(dest, src, destlen) >= destlen)
return -1;
return 0;
}
diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
index 642f6921852f..6ce55e8e6b67 100644
--- a/fs/nfs/nfstrace.h
+++ b/fs/nfs/nfstrace.h
@@ -32,19 +32,36 @@
{ NFS_INO_INVALID_BLOCKS, "INVALID_BLOCKS" }, \
{ NFS_INO_INVALID_XATTR, "INVALID_XATTR" }, \
{ NFS_INO_INVALID_NLINK, "INVALID_NLINK" }, \
- { NFS_INO_INVALID_MODE, "INVALID_MODE" })
+ { NFS_INO_INVALID_MODE, "INVALID_MODE" }, \
+ { NFS_INO_INVALID_BTIME, "INVALID_BTIME" })
#define nfs_show_nfsi_flags(v) \
__print_flags(v, "|", \
{ BIT(NFS_INO_STALE), "STALE" }, \
{ BIT(NFS_INO_ACL_LRU_SET), "ACL_LRU_SET" }, \
{ BIT(NFS_INO_INVALIDATING), "INVALIDATING" }, \
- { BIT(NFS_INO_FSCACHE), "FSCACHE" }, \
{ BIT(NFS_INO_LAYOUTCOMMIT), "NEED_LAYOUTCOMMIT" }, \
{ BIT(NFS_INO_LAYOUTCOMMITTING), "LAYOUTCOMMIT" }, \
{ BIT(NFS_INO_LAYOUTSTATS), "LAYOUTSTATS" }, \
{ BIT(NFS_INO_ODIRECT), "ODIRECT" })
+#define nfs_show_wb_flags(v) \
+ __print_flags(v, "|", \
+ { BIT(PG_BUSY), "BUSY" }, \
+ { BIT(PG_MAPPED), "MAPPED" }, \
+ { BIT(PG_FOLIO), "FOLIO" }, \
+ { BIT(PG_CLEAN), "CLEAN" }, \
+ { BIT(PG_COMMIT_TO_DS), "COMMIT_TO_DS" }, \
+ { BIT(PG_INODE_REF), "INODE_REF" }, \
+ { BIT(PG_HEADLOCK), "HEADLOCK" }, \
+ { BIT(PG_TEARDOWN), "TEARDOWN" }, \
+ { BIT(PG_UNLOCKPAGE), "UNLOCKPAGE" }, \
+ { BIT(PG_UPTODATE), "UPTODATE" }, \
+ { BIT(PG_WB_END), "WB_END" }, \
+ { BIT(PG_REMOVE), "REMOVE" }, \
+ { BIT(PG_CONTENDED1), "CONTENDED1" }, \
+ { BIT(PG_CONTENDED2), "CONTENDED2" })
+
DECLARE_EVENT_CLASS(nfs_inode_event,
TP_PROTO(
const struct inode *inode
@@ -57,6 +74,7 @@ DECLARE_EVENT_CLASS(nfs_inode_event,
__field(u32, fhandle)
__field(u64, fileid)
__field(u64, version)
+ __field(unsigned long, cache_validity)
),
TP_fast_assign(
@@ -65,14 +83,17 @@ DECLARE_EVENT_CLASS(nfs_inode_event,
__entry->fileid = nfsi->fileid;
__entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
__entry->version = inode_peek_iversion_raw(inode);
+ __entry->cache_validity = nfsi->cache_validity;
),
TP_printk(
- "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu ",
+ "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu cache_validity=0x%lx (%s)",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle,
- (unsigned long long)__entry->version
+ (unsigned long long)__entry->version,
+ __entry->cache_validity,
+ nfs_show_cache_validity(__entry->cache_validity)
)
);
@@ -152,8 +173,6 @@ DEFINE_NFS_INODE_EVENT(nfs_getattr_enter);
DEFINE_NFS_INODE_EVENT_DONE(nfs_getattr_exit);
DEFINE_NFS_INODE_EVENT(nfs_setattr_enter);
DEFINE_NFS_INODE_EVENT_DONE(nfs_setattr_exit);
-DEFINE_NFS_INODE_EVENT(nfs_writeback_page_enter);
-DEFINE_NFS_INODE_EVENT_DONE(nfs_writeback_page_exit);
DEFINE_NFS_INODE_EVENT(nfs_writeback_inode_enter);
DEFINE_NFS_INODE_EVENT_DONE(nfs_writeback_inode_exit);
DEFINE_NFS_INODE_EVENT(nfs_fsync_enter);
@@ -270,6 +289,7 @@ DECLARE_EVENT_CLASS(nfs_update_size_class,
TP_ARGS(inode, new_size))
DEFINE_NFS_UPDATE_SIZE_EVENT(truncate);
+DEFINE_NFS_UPDATE_SIZE_EVENT(truncate_folio);
DEFINE_NFS_UPDATE_SIZE_EVENT(wcc);
DEFINE_NFS_UPDATE_SIZE_EVENT(update);
DEFINE_NFS_UPDATE_SIZE_EVENT(grow);
@@ -403,6 +423,7 @@ DECLARE_EVENT_CLASS(nfs_lookup_event,
__field(unsigned long, flags)
__field(dev_t, dev)
__field(u64, dir)
+ __field(u64, fileid)
__string(name, dentry->d_name.name)
),
@@ -410,16 +431,18 @@ DECLARE_EVENT_CLASS(nfs_lookup_event,
__entry->dev = dir->i_sb->s_dev;
__entry->dir = NFS_FILEID(dir);
__entry->flags = flags;
- __assign_str(name, dentry->d_name.name);
+ __entry->fileid = d_is_negative(dentry) ? 0 : NFS_FILEID(d_inode(dentry));
+ __assign_str(name);
),
TP_printk(
- "flags=0x%lx (%s) name=%02x:%02x:%llu/%s",
+ "flags=0x%lx (%s) name=%02x:%02x:%llu/%s fileid=%llu",
__entry->flags,
show_fs_lookup_flags(__entry->flags),
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->dir,
- __get_str(name)
+ __get_str(name),
+ __entry->fileid
)
);
@@ -447,6 +470,7 @@ DECLARE_EVENT_CLASS(nfs_lookup_event_done,
__field(unsigned long, flags)
__field(dev_t, dev)
__field(u64, dir)
+ __field(u64, fileid)
__string(name, dentry->d_name.name)
),
@@ -455,17 +479,19 @@ DECLARE_EVENT_CLASS(nfs_lookup_event_done,
__entry->dir = NFS_FILEID(dir);
__entry->error = error < 0 ? -error : 0;
__entry->flags = flags;
- __assign_str(name, dentry->d_name.name);
+ __entry->fileid = d_is_negative(dentry) ? 0 : NFS_FILEID(d_inode(dentry));
+ __assign_str(name);
),
TP_printk(
- "error=%ld (%s) flags=0x%lx (%s) name=%02x:%02x:%llu/%s",
+ "error=%ld (%s) flags=0x%lx (%s) name=%02x:%02x:%llu/%s fileid=%llu",
-__entry->error, show_nfs_status(__entry->error),
__entry->flags,
show_fs_lookup_flags(__entry->flags),
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->dir,
- __get_str(name)
+ __get_str(name),
+ __entry->fileid
)
);
@@ -509,7 +535,7 @@ TRACE_EVENT(nfs_atomic_open_enter,
__entry->dir = NFS_FILEID(dir);
__entry->flags = flags;
__entry->fmode = (__force unsigned long)ctx->mode;
- __assign_str(name, ctx->dentry->d_name.name);
+ __assign_str(name);
),
TP_printk(
@@ -548,7 +574,7 @@ TRACE_EVENT(nfs_atomic_open_exit,
__entry->dir = NFS_FILEID(dir);
__entry->flags = flags;
__entry->fmode = (__force unsigned long)ctx->mode;
- __assign_str(name, ctx->dentry->d_name.name);
+ __assign_str(name);
),
TP_printk(
@@ -584,7 +610,7 @@ TRACE_EVENT(nfs_create_enter,
__entry->dev = dir->i_sb->s_dev;
__entry->dir = NFS_FILEID(dir);
__entry->flags = flags;
- __assign_str(name, dentry->d_name.name);
+ __assign_str(name);
),
TP_printk(
@@ -620,7 +646,7 @@ TRACE_EVENT(nfs_create_exit,
__entry->dev = dir->i_sb->s_dev;
__entry->dir = NFS_FILEID(dir);
__entry->flags = flags;
- __assign_str(name, dentry->d_name.name);
+ __assign_str(name);
),
TP_printk(
@@ -651,7 +677,7 @@ DECLARE_EVENT_CLASS(nfs_directory_event,
TP_fast_assign(
__entry->dev = dir->i_sb->s_dev;
__entry->dir = NFS_FILEID(dir);
- __assign_str(name, dentry->d_name.name);
+ __assign_str(name);
),
TP_printk(
@@ -690,7 +716,7 @@ DECLARE_EVENT_CLASS(nfs_directory_event_done,
__entry->dev = dir->i_sb->s_dev;
__entry->dir = NFS_FILEID(dir);
__entry->error = error < 0 ? -error : 0;
- __assign_str(name, dentry->d_name.name);
+ __assign_str(name);
),
TP_printk(
@@ -744,7 +770,7 @@ TRACE_EVENT(nfs_link_enter,
__entry->dev = inode->i_sb->s_dev;
__entry->fileid = NFS_FILEID(inode);
__entry->dir = NFS_FILEID(dir);
- __assign_str(name, dentry->d_name.name);
+ __assign_str(name);
),
TP_printk(
@@ -780,7 +806,7 @@ TRACE_EVENT(nfs_link_exit,
__entry->fileid = NFS_FILEID(inode);
__entry->dir = NFS_FILEID(dir);
__entry->error = error < 0 ? -error : 0;
- __assign_str(name, dentry->d_name.name);
+ __assign_str(name);
),
TP_printk(
@@ -816,8 +842,8 @@ DECLARE_EVENT_CLASS(nfs_rename_event,
__entry->dev = old_dir->i_sb->s_dev;
__entry->old_dir = NFS_FILEID(old_dir);
__entry->new_dir = NFS_FILEID(new_dir);
- __assign_str(old_name, old_dentry->d_name.name);
- __assign_str(new_name, new_dentry->d_name.name);
+ __assign_str(old_name);
+ __assign_str(new_name);
),
TP_printk(
@@ -865,8 +891,8 @@ DECLARE_EVENT_CLASS(nfs_rename_event_done,
__entry->error = -error;
__entry->old_dir = NFS_FILEID(old_dir);
__entry->new_dir = NFS_FILEID(new_dir);
- __assign_str(old_name, old_dentry->d_name.name);
- __assign_str(new_name, new_dentry->d_name.name);
+ __assign_str(old_name);
+ __assign_str(new_name);
),
TP_printk(
@@ -896,7 +922,7 @@ DECLARE_EVENT_CLASS(nfs_rename_event_done,
DEFINE_NFS_RENAME_EVENT(nfs_rename_enter);
DEFINE_NFS_RENAME_EVENT_DONE(nfs_rename_exit);
-DEFINE_NFS_RENAME_EVENT_DONE(nfs_sillyrename_rename);
+DEFINE_NFS_RENAME_EVENT_DONE(nfs_async_rename_done);
TRACE_EVENT(nfs_sillyrename_unlink,
TP_PROTO(
@@ -933,13 +959,14 @@ TRACE_EVENT(nfs_sillyrename_unlink,
)
);
-TRACE_EVENT(nfs_aop_readpage,
+DECLARE_EVENT_CLASS(nfs_folio_event,
TP_PROTO(
const struct inode *inode,
- struct page *page
+ loff_t offset,
+ size_t count
),
- TP_ARGS(inode, page),
+ TP_ARGS(inode, offset, count),
TP_STRUCT__entry(
__field(dev_t, dev)
@@ -947,6 +974,7 @@ TRACE_EVENT(nfs_aop_readpage,
__field(u64, fileid)
__field(u64, version)
__field(loff_t, offset)
+ __field(size_t, count)
),
TP_fast_assign(
@@ -956,26 +984,38 @@ TRACE_EVENT(nfs_aop_readpage,
__entry->fileid = nfsi->fileid;
__entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
__entry->version = inode_peek_iversion_raw(inode);
- __entry->offset = page_index(page) << PAGE_SHIFT;
+ __entry->offset = offset;
+ __entry->count = count;
),
TP_printk(
- "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu offset=%lld",
+ "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu "
+ "offset=%lld count=%zu",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle, __entry->version,
- __entry->offset
+ __entry->offset, __entry->count
)
);
-TRACE_EVENT(nfs_aop_readpage_done,
+#define DEFINE_NFS_FOLIO_EVENT(name) \
+ DEFINE_EVENT(nfs_folio_event, name, \
+ TP_PROTO( \
+ const struct inode *inode, \
+ loff_t offset, \
+ size_t count \
+ ), \
+ TP_ARGS(inode, offset, count))
+
+DECLARE_EVENT_CLASS(nfs_folio_event_done,
TP_PROTO(
const struct inode *inode,
- struct page *page,
+ loff_t offset,
+ size_t count,
int ret
),
- TP_ARGS(inode, page, ret),
+ TP_ARGS(inode, offset, count, ret),
TP_STRUCT__entry(
__field(dev_t, dev)
@@ -984,6 +1024,7 @@ TRACE_EVENT(nfs_aop_readpage_done,
__field(u64, fileid)
__field(u64, version)
__field(loff_t, offset)
+ __field(size_t, count)
),
TP_fast_assign(
@@ -993,19 +1034,107 @@ TRACE_EVENT(nfs_aop_readpage_done,
__entry->fileid = nfsi->fileid;
__entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
__entry->version = inode_peek_iversion_raw(inode);
- __entry->offset = page_index(page) << PAGE_SHIFT;
+ __entry->offset = offset;
+ __entry->count = count;
__entry->ret = ret;
),
TP_printk(
- "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu offset=%lld ret=%d",
+ "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu "
+ "offset=%lld count=%zu ret=%d",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle, __entry->version,
- __entry->offset, __entry->ret
+ __entry->offset, __entry->count, __entry->ret
)
);
+#define DEFINE_NFS_FOLIO_EVENT_DONE(name) \
+ DEFINE_EVENT(nfs_folio_event_done, name, \
+ TP_PROTO( \
+ const struct inode *inode, \
+ loff_t offset, \
+ size_t count, \
+ int ret \
+ ), \
+ TP_ARGS(inode, offset, count, ret))
+
+DEFINE_NFS_FOLIO_EVENT(nfs_aop_readpage);
+DEFINE_NFS_FOLIO_EVENT_DONE(nfs_aop_readpage_done);
+
+DEFINE_NFS_FOLIO_EVENT(nfs_writeback_folio);
+DEFINE_NFS_FOLIO_EVENT_DONE(nfs_writeback_folio_done);
+
+DEFINE_NFS_FOLIO_EVENT(nfs_invalidate_folio);
+DEFINE_NFS_FOLIO_EVENT_DONE(nfs_launder_folio_done);
+
+DEFINE_NFS_FOLIO_EVENT(nfs_try_to_update_request);
+DEFINE_NFS_FOLIO_EVENT_DONE(nfs_try_to_update_request_done);
+
+DEFINE_NFS_FOLIO_EVENT(nfs_update_folio);
+DEFINE_NFS_FOLIO_EVENT_DONE(nfs_update_folio_done);
+
+DEFINE_NFS_FOLIO_EVENT(nfs_write_begin);
+DEFINE_NFS_FOLIO_EVENT_DONE(nfs_write_begin_done);
+
+DEFINE_NFS_FOLIO_EVENT(nfs_write_end);
+DEFINE_NFS_FOLIO_EVENT_DONE(nfs_write_end_done);
+
+DEFINE_NFS_FOLIO_EVENT(nfs_writepages);
+DEFINE_NFS_FOLIO_EVENT_DONE(nfs_writepages_done);
+
+DECLARE_EVENT_CLASS(nfs_kiocb_event,
+ TP_PROTO(
+ const struct kiocb *iocb,
+ const struct iov_iter *iter
+ ),
+
+ TP_ARGS(iocb, iter),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(u32, fhandle)
+ __field(u64, fileid)
+ __field(u64, version)
+ __field(loff_t, offset)
+ __field(size_t, count)
+ __field(int, flags)
+ ),
+
+ TP_fast_assign(
+ const struct inode *inode = file_inode(iocb->ki_filp);
+ const struct nfs_inode *nfsi = NFS_I(inode);
+
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->fileid = nfsi->fileid;
+ __entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
+ __entry->version = inode_peek_iversion_raw(inode);
+ __entry->offset = iocb->ki_pos;
+ __entry->count = iov_iter_count(iter);
+ __entry->flags = iocb->ki_flags;
+ ),
+
+ TP_printk(
+ "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu offset=%lld count=%zu ki_flags=%s",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle, __entry->version,
+ __entry->offset, __entry->count,
+ __print_flags(__entry->flags, "|", TRACE_IOCB_STRINGS)
+ )
+);
+
+#define DEFINE_NFS_KIOCB_EVENT(name) \
+ DEFINE_EVENT(nfs_kiocb_event, name, \
+ TP_PROTO( \
+ const struct kiocb *iocb, \
+ const struct iov_iter *iter \
+ ), \
+ TP_ARGS(iocb, iter))
+
+DEFINE_NFS_KIOCB_EVENT(nfs_file_read);
+DEFINE_NFS_KIOCB_EVENT(nfs_file_write);
+
TRACE_EVENT(nfs_aop_readahead,
TP_PROTO(
const struct inode *inode,
@@ -1213,96 +1342,6 @@ TRACE_EVENT(nfs_readpage_short,
)
);
-DECLARE_EVENT_CLASS(nfs_fscache_page_event,
- TP_PROTO(
- const struct inode *inode,
- struct page *page
- ),
-
- TP_ARGS(inode, page),
-
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(u32, fhandle)
- __field(u64, fileid)
- __field(loff_t, offset)
- ),
-
- TP_fast_assign(
- const struct nfs_inode *nfsi = NFS_I(inode);
- const struct nfs_fh *fh = &nfsi->fh;
-
- __entry->offset = page_index(page) << PAGE_SHIFT;
- __entry->dev = inode->i_sb->s_dev;
- __entry->fileid = nfsi->fileid;
- __entry->fhandle = nfs_fhandle_hash(fh);
- ),
-
- TP_printk(
- "fileid=%02x:%02x:%llu fhandle=0x%08x "
- "offset=%lld",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- (unsigned long long)__entry->fileid,
- __entry->fhandle,
- (long long)__entry->offset
- )
-);
-DECLARE_EVENT_CLASS(nfs_fscache_page_event_done,
- TP_PROTO(
- const struct inode *inode,
- struct page *page,
- int error
- ),
-
- TP_ARGS(inode, page, error),
-
- TP_STRUCT__entry(
- __field(int, error)
- __field(dev_t, dev)
- __field(u32, fhandle)
- __field(u64, fileid)
- __field(loff_t, offset)
- ),
-
- TP_fast_assign(
- const struct nfs_inode *nfsi = NFS_I(inode);
- const struct nfs_fh *fh = &nfsi->fh;
-
- __entry->offset = page_index(page) << PAGE_SHIFT;
- __entry->dev = inode->i_sb->s_dev;
- __entry->fileid = nfsi->fileid;
- __entry->fhandle = nfs_fhandle_hash(fh);
- __entry->error = error;
- ),
-
- TP_printk(
- "fileid=%02x:%02x:%llu fhandle=0x%08x "
- "offset=%lld error=%d",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- (unsigned long long)__entry->fileid,
- __entry->fhandle,
- (long long)__entry->offset, __entry->error
- )
-);
-#define DEFINE_NFS_FSCACHE_PAGE_EVENT(name) \
- DEFINE_EVENT(nfs_fscache_page_event, name, \
- TP_PROTO( \
- const struct inode *inode, \
- struct page *page \
- ), \
- TP_ARGS(inode, page))
-#define DEFINE_NFS_FSCACHE_PAGE_EVENT_DONE(name) \
- DEFINE_EVENT(nfs_fscache_page_event_done, name, \
- TP_PROTO( \
- const struct inode *inode, \
- struct page *page, \
- int error \
- ), \
- TP_ARGS(inode, page, error))
-DEFINE_NFS_FSCACHE_PAGE_EVENT(nfs_fscache_read_page);
-DEFINE_NFS_FSCACHE_PAGE_EVENT_DONE(nfs_fscache_read_page_exit);
-DEFINE_NFS_FSCACHE_PAGE_EVENT(nfs_fscache_write_page);
-DEFINE_NFS_FSCACHE_PAGE_EVENT_DONE(nfs_fscache_write_page_exit);
TRACE_EVENT(nfs_pgio_error,
TP_PROTO(
@@ -1443,6 +1482,55 @@ TRACE_EVENT(nfs_writeback_done,
)
);
+DECLARE_EVENT_CLASS(nfs_page_class,
+ TP_PROTO(
+ const struct nfs_page *req
+ ),
+
+ TP_ARGS(req),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(u32, fhandle)
+ __field(u64, fileid)
+ __field(const struct nfs_page *__private, req)
+ __field(loff_t, offset)
+ __field(unsigned int, count)
+ __field(unsigned long, flags)
+ ),
+
+ TP_fast_assign(
+ const struct inode *inode = folio_inode(req->wb_folio);
+ const struct nfs_inode *nfsi = NFS_I(inode);
+
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->fileid = nfsi->fileid;
+ __entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
+ __entry->req = req;
+ __entry->offset = req_offset(req);
+ __entry->count = req->wb_bytes;
+ __entry->flags = req->wb_flags;
+ ),
+
+ TP_printk(
+ "fileid=%02x:%02x:%llu fhandle=0x%08x req=%p offset=%lld count=%u flags=%s",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid, __entry->fhandle,
+ __entry->req, __entry->offset, __entry->count,
+ nfs_show_wb_flags(__entry->flags)
+ )
+);
+
+#define DEFINE_NFS_PAGE_EVENT(name) \
+ DEFINE_EVENT(nfs_page_class, name, \
+ TP_PROTO( \
+ const struct nfs_page *req \
+ ), \
+ TP_ARGS(req))
+
+DEFINE_NFS_PAGE_EVENT(nfs_writepage_setup);
+DEFINE_NFS_PAGE_EVENT(nfs_do_writepage);
+
DECLARE_EVENT_CLASS(nfs_page_error_class,
TP_PROTO(
const struct inode *inode,
@@ -1600,7 +1688,6 @@ DECLARE_EVENT_CLASS(nfs_direct_req_class,
__field(u32, fhandle)
__field(loff_t, offset)
__field(ssize_t, count)
- __field(ssize_t, bytes_left)
__field(ssize_t, error)
__field(int, flags)
),
@@ -1615,19 +1702,18 @@ DECLARE_EVENT_CLASS(nfs_direct_req_class,
__entry->fhandle = nfs_fhandle_hash(fh);
__entry->offset = dreq->io_start;
__entry->count = dreq->count;
- __entry->bytes_left = dreq->bytes_left;
__entry->error = dreq->error;
__entry->flags = dreq->flags;
),
TP_printk(
"error=%zd fileid=%02x:%02x:%llu fhandle=0x%08x "
- "offset=%lld count=%zd bytes_left=%zd flags=%s",
+ "offset=%lld count=%zd flags=%s",
__entry->error, MAJOR(__entry->dev),
MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle, __entry->offset,
- __entry->count, __entry->bytes_left,
+ __entry->count,
nfs_show_direct_req_flags(__entry->flags)
)
);
@@ -1646,6 +1732,76 @@ DEFINE_NFS_DIRECT_REQ_EVENT(nfs_direct_write_completion);
DEFINE_NFS_DIRECT_REQ_EVENT(nfs_direct_write_schedule_iovec);
DEFINE_NFS_DIRECT_REQ_EVENT(nfs_direct_write_reschedule_io);
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+
+DECLARE_EVENT_CLASS(nfs_local_dio_class,
+ TP_PROTO(
+ const struct inode *inode,
+ loff_t offset,
+ ssize_t count,
+ const struct nfs_local_dio *local_dio
+ ),
+ TP_ARGS(inode, offset, count, local_dio),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(u64, fileid)
+ __field(u32, fhandle)
+ __field(loff_t, offset)
+ __field(ssize_t, count)
+ __field(u32, mem_align)
+ __field(u32, offset_align)
+ __field(loff_t, start)
+ __field(ssize_t, start_len)
+ __field(loff_t, middle)
+ __field(ssize_t, middle_len)
+ __field(loff_t, end)
+ __field(ssize_t, end_len)
+ ),
+ TP_fast_assign(
+ const struct nfs_inode *nfsi = NFS_I(inode);
+ const struct nfs_fh *fh = &nfsi->fh;
+
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->fileid = nfsi->fileid;
+ __entry->fhandle = nfs_fhandle_hash(fh);
+ __entry->offset = offset;
+ __entry->count = count;
+ __entry->mem_align = local_dio->mem_align;
+ __entry->offset_align = local_dio->offset_align;
+ __entry->start = offset;
+ __entry->start_len = local_dio->start_len;
+ __entry->middle = local_dio->middle_offset;
+ __entry->middle_len = local_dio->middle_len;
+ __entry->end = local_dio->end_offset;
+ __entry->end_len = local_dio->end_len;
+ ),
+ TP_printk("fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "offset=%lld count=%zd "
+ "mem_align=%u offset_align=%u "
+ "start=%llu+%zd middle=%llu+%zd end=%llu+%zd",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle, __entry->offset, __entry->count,
+ __entry->mem_align, __entry->offset_align,
+ __entry->start, __entry->start_len,
+ __entry->middle, __entry->middle_len,
+ __entry->end, __entry->end_len)
+)
+
+#define DEFINE_NFS_LOCAL_DIO_EVENT(name) \
+DEFINE_EVENT(nfs_local_dio_class, nfs_local_dio_##name, \
+ TP_PROTO(const struct inode *inode, \
+ loff_t offset, \
+ ssize_t count, \
+ const struct nfs_local_dio *local_dio),\
+ TP_ARGS(inode, offset, count, local_dio))
+
+DEFINE_NFS_LOCAL_DIO_EVENT(read);
+DEFINE_NFS_LOCAL_DIO_EVENT(write);
+DEFINE_NFS_LOCAL_DIO_EVENT(misaligned);
+
+#endif /* CONFIG_NFS_LOCALIO */
+
TRACE_EVENT(nfs_fh_to_dentry,
TP_PROTO(
const struct super_block *sb,
@@ -1693,8 +1849,8 @@ TRACE_EVENT(nfs_mount_assign,
),
TP_fast_assign(
- __assign_str(option, option);
- __assign_str(value, value);
+ __assign_str(option);
+ __assign_str(value);
),
TP_printk("option %s=%s",
@@ -1714,7 +1870,7 @@ TRACE_EVENT(nfs_mount_option,
),
TP_fast_assign(
- __assign_str(option, param->key);
+ __assign_str(option);
),
TP_printk("option %s", __get_str(option))
@@ -1732,12 +1888,41 @@ TRACE_EVENT(nfs_mount_path,
),
TP_fast_assign(
- __assign_str(path, path);
+ __assign_str(path);
),
TP_printk("path='%s'", __get_str(path))
);
+TRACE_EVENT(nfs_local_open_fh,
+ TP_PROTO(
+ const struct nfs_fh *fh,
+ fmode_t fmode,
+ int error
+ ),
+
+ TP_ARGS(fh, fmode, error),
+
+ TP_STRUCT__entry(
+ __field(int, error)
+ __field(u32, fhandle)
+ __field(unsigned int, fmode)
+ ),
+
+ TP_fast_assign(
+ __entry->error = error;
+ __entry->fhandle = nfs_fhandle_hash(fh);
+ __entry->fmode = (__force unsigned int)fmode;
+ ),
+
+ TP_printk(
+ "fhandle=0x%08x mode=%s result=%d",
+ __entry->fhandle,
+ show_fs_fmode_flags(__entry->fmode),
+ __entry->error
+ )
+);
+
DECLARE_EVENT_CLASS(nfs_xdr_event,
TP_PROTO(
const struct xdr_stream *xdr,
@@ -1767,9 +1952,8 @@ DECLARE_EVENT_CLASS(nfs_xdr_event,
__entry->xid = be32_to_cpu(rqstp->rq_xid);
__entry->version = task->tk_client->cl_vers;
__entry->error = error;
- __assign_str(program,
- task->tk_client->cl_program->name);
- __assign_str(procedure, task->tk_msg.rpc_proc->p_name);
+ __assign_str(program);
+ __assign_str(procedure);
),
TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 16be6dae524f..6e69ce43a13f 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -21,16 +21,54 @@
#include <linux/nfs_page.h>
#include <linux/nfs_mount.h>
#include <linux/export.h>
+#include <linux/filelock.h>
#include "internal.h"
#include "pnfs.h"
#include "nfstrace.h"
+#include "fscache.h"
#define NFSDBG_FACILITY NFSDBG_PAGECACHE
static struct kmem_cache *nfs_page_cachep;
static const struct rpc_call_ops nfs_pgio_common_ops;
+struct nfs_page_iter_page {
+ const struct nfs_page *req;
+ size_t count;
+};
+
+static void nfs_page_iter_page_init(struct nfs_page_iter_page *i,
+ const struct nfs_page *req)
+{
+ i->req = req;
+ i->count = 0;
+}
+
+static void nfs_page_iter_page_advance(struct nfs_page_iter_page *i, size_t sz)
+{
+ const struct nfs_page *req = i->req;
+ size_t tmp = i->count + sz;
+
+ i->count = (tmp < req->wb_bytes) ? tmp : req->wb_bytes;
+}
+
+static struct page *nfs_page_iter_page_get(struct nfs_page_iter_page *i)
+{
+ const struct nfs_page *req = i->req;
+ struct page *page;
+
+ if (i->count != req->wb_bytes) {
+ size_t base = i->count + req->wb_pgbase;
+ size_t len = PAGE_SIZE - offset_in_page(base);
+
+ page = nfs_page_to_page(req, base);
+ nfs_page_iter_page_advance(i, len);
+ return page;
+ }
+ return NULL;
+}
+
static struct nfs_pgio_mirror *
nfs_pgio_get_mirror(struct nfs_pageio_descriptor *desc, u32 idx)
{
@@ -68,6 +106,7 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
hdr->good_bytes = mirror->pg_count;
hdr->io_completion = desc->pg_io_completion;
hdr->dreq = desc->pg_dreq;
+ nfs_netfs_set_pgio_header(hdr, desc);
hdr->release = release;
hdr->completion_ops = desc->pg_completion_ops;
if (hdr->completion_ops->init_hdr)
@@ -149,102 +188,6 @@ nfs_async_iocounter_wait(struct rpc_task *task, struct nfs_lock_context *l_ctx)
EXPORT_SYMBOL_GPL(nfs_async_iocounter_wait);
/*
- * nfs_page_lock_head_request - page lock the head of the page group
- * @req: any member of the page group
- */
-struct nfs_page *
-nfs_page_group_lock_head(struct nfs_page *req)
-{
- struct nfs_page *head = req->wb_head;
-
- while (!nfs_lock_request(head)) {
- int ret = nfs_wait_on_request(head);
- if (ret < 0)
- return ERR_PTR(ret);
- }
- if (head != req)
- kref_get(&head->wb_kref);
- return head;
-}
-
-/*
- * nfs_unroll_locks - unlock all newly locked reqs and wait on @req
- * @head: head request of page group, must be holding head lock
- * @req: request that couldn't lock and needs to wait on the req bit lock
- *
- * This is a helper function for nfs_lock_and_join_requests
- * returns 0 on success, < 0 on error.
- */
-static void
-nfs_unroll_locks(struct nfs_page *head, struct nfs_page *req)
-{
- struct nfs_page *tmp;
-
- /* relinquish all the locks successfully grabbed this run */
- for (tmp = head->wb_this_page ; tmp != req; tmp = tmp->wb_this_page) {
- if (!kref_read(&tmp->wb_kref))
- continue;
- nfs_unlock_and_release_request(tmp);
- }
-}
-
-/*
- * nfs_page_group_lock_subreq - try to lock a subrequest
- * @head: head request of page group
- * @subreq: request to lock
- *
- * This is a helper function for nfs_lock_and_join_requests which
- * must be called with the head request and page group both locked.
- * On error, it returns with the page group unlocked.
- */
-static int
-nfs_page_group_lock_subreq(struct nfs_page *head, struct nfs_page *subreq)
-{
- int ret;
-
- if (!kref_get_unless_zero(&subreq->wb_kref))
- return 0;
- while (!nfs_lock_request(subreq)) {
- nfs_page_group_unlock(head);
- ret = nfs_wait_on_request(subreq);
- if (!ret)
- ret = nfs_page_group_lock(head);
- if (ret < 0) {
- nfs_unroll_locks(head, subreq);
- nfs_release_request(subreq);
- return ret;
- }
- }
- return 0;
-}
-
-/*
- * nfs_page_group_lock_subrequests - try to lock the subrequests
- * @head: head request of page group
- *
- * This is a helper function for nfs_lock_and_join_requests which
- * must be called with the head request locked.
- */
-int nfs_page_group_lock_subrequests(struct nfs_page *head)
-{
- struct nfs_page *subreq;
- int ret;
-
- ret = nfs_page_group_lock(head);
- if (ret < 0)
- return ret;
- /* lock each request in the page group */
- for (subreq = head->wb_this_page; subreq != head;
- subreq = subreq->wb_this_page) {
- ret = nfs_page_group_lock_subreq(head, subreq);
- if (ret < 0)
- return ret;
- }
- nfs_page_group_unlock(head);
- return 0;
-}
-
-/*
* nfs_page_set_headlock - set the request PG_HEADLOCK
* @req: request that is to be locked
*
@@ -310,13 +253,14 @@ nfs_page_group_unlock(struct nfs_page *req)
nfs_page_clear_headlock(req);
}
-/*
- * nfs_page_group_sync_on_bit_locked
+/**
+ * nfs_page_group_sync_on_bit_locked - Test if all requests have @bit set
+ * @req: request in page group
+ * @bit: PG_* bit that is used to sync page group
*
* must be called with page group lock held
*/
-static bool
-nfs_page_group_sync_on_bit_locked(struct nfs_page *req, unsigned int bit)
+bool nfs_page_group_sync_on_bit_locked(struct nfs_page *req, unsigned int bit)
{
struct nfs_page *head = req->wb_head;
struct nfs_page *tmp;
@@ -390,7 +334,7 @@ nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev)
* has extra ref from the write/commit path to handle handoff
* between write and commit lists. */
if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags)) {
- inode = page_file_mapping(req->wb_page)->host;
+ inode = nfs_page_to_inode(req);
set_bit(PG_INODE_REF, &req->wb_flags);
kref_get(&req->wb_kref);
atomic_long_inc(&NFS_I(inode)->nrequests);
@@ -430,10 +374,9 @@ out:
nfs_release_request(head);
}
-static struct nfs_page *
-__nfs_create_request(struct nfs_lock_context *l_ctx, struct page *page,
- unsigned int pgbase, unsigned int offset,
- unsigned int count)
+static struct nfs_page *nfs_page_create(struct nfs_lock_context *l_ctx,
+ unsigned int pgbase, pgoff_t index,
+ unsigned int offset, unsigned int count)
{
struct nfs_page *req;
struct nfs_open_context *ctx = l_ctx->open_context;
@@ -452,42 +395,90 @@ __nfs_create_request(struct nfs_lock_context *l_ctx, struct page *page,
/* Initialize the request struct. Initially, we assume a
* long write-back delay. This will be adjusted in
* update_nfs_request below if the region is not locked. */
- req->wb_page = page;
- if (page) {
- req->wb_index = page_index(page);
- get_page(page);
- }
- req->wb_offset = offset;
- req->wb_pgbase = pgbase;
- req->wb_bytes = count;
+ req->wb_pgbase = pgbase;
+ req->wb_index = index;
+ req->wb_offset = offset;
+ req->wb_bytes = count;
kref_init(&req->wb_kref);
req->wb_nio = 0;
return req;
}
+static void nfs_page_assign_folio(struct nfs_page *req, struct folio *folio)
+{
+ if (folio != NULL) {
+ req->wb_folio = folio;
+ folio_get(folio);
+ set_bit(PG_FOLIO, &req->wb_flags);
+ }
+}
+
+static void nfs_page_assign_page(struct nfs_page *req, struct page *page)
+{
+ if (page != NULL) {
+ req->wb_page = page;
+ get_page(page);
+ }
+}
+
/**
- * nfs_create_request - Create an NFS read/write request.
+ * nfs_page_create_from_page - Create an NFS read/write request.
* @ctx: open context to use
* @page: page to write
- * @offset: starting offset within the page for the write
+ * @pgbase: starting offset within the page for the write
+ * @offset: file offset for the write
+ * @count: number of bytes to read/write
+ *
+ * The page must be locked by the caller. This makes sure we never
+ * create two different requests for the same page.
+ * User should ensure it is safe to sleep in this function.
+ */
+struct nfs_page *nfs_page_create_from_page(struct nfs_open_context *ctx,
+ struct page *page,
+ unsigned int pgbase, loff_t offset,
+ unsigned int count)
+{
+ struct nfs_lock_context *l_ctx = nfs_get_lock_context(ctx);
+ struct nfs_page *ret;
+
+ if (IS_ERR(l_ctx))
+ return ERR_CAST(l_ctx);
+ ret = nfs_page_create(l_ctx, pgbase, offset >> PAGE_SHIFT,
+ offset_in_page(offset), count);
+ if (!IS_ERR(ret)) {
+ nfs_page_assign_page(ret, page);
+ nfs_page_group_init(ret, NULL);
+ }
+ nfs_put_lock_context(l_ctx);
+ return ret;
+}
+
+/**
+ * nfs_page_create_from_folio - Create an NFS read/write request.
+ * @ctx: open context to use
+ * @folio: folio to write
+ * @offset: starting offset within the folio for the write
* @count: number of bytes to read/write
*
* The page must be locked by the caller. This makes sure we never
* create two different requests for the same page.
* User should ensure it is safe to sleep in this function.
*/
-struct nfs_page *
-nfs_create_request(struct nfs_open_context *ctx, struct page *page,
- unsigned int offset, unsigned int count)
+struct nfs_page *nfs_page_create_from_folio(struct nfs_open_context *ctx,
+ struct folio *folio,
+ unsigned int offset,
+ unsigned int count)
{
struct nfs_lock_context *l_ctx = nfs_get_lock_context(ctx);
struct nfs_page *ret;
if (IS_ERR(l_ctx))
return ERR_CAST(l_ctx);
- ret = __nfs_create_request(l_ctx, page, offset, offset, count);
- if (!IS_ERR(ret))
+ ret = nfs_page_create(l_ctx, offset, folio->index, offset, count);
+ if (!IS_ERR(ret)) {
+ nfs_page_assign_folio(ret, folio);
nfs_page_group_init(ret, NULL);
+ }
nfs_put_lock_context(l_ctx);
return ret;
}
@@ -500,10 +491,16 @@ nfs_create_subreq(struct nfs_page *req,
{
struct nfs_page *last;
struct nfs_page *ret;
+ struct folio *folio = nfs_page_to_folio(req);
+ struct page *page = nfs_page_to_page(req, pgbase);
- ret = __nfs_create_request(req->wb_lock_context, req->wb_page,
- pgbase, offset, count);
+ ret = nfs_page_create(req->wb_lock_context, pgbase, req->wb_index,
+ offset, count);
if (!IS_ERR(ret)) {
+ if (folio)
+ nfs_page_assign_folio(ret, folio);
+ else
+ nfs_page_assign_page(ret, page);
/* find the last request */
for (last = req->wb_head;
last->wb_this_page != req->wb_head;
@@ -511,7 +508,6 @@ nfs_create_subreq(struct nfs_page *req,
;
nfs_lock_request(ret);
- ret->wb_index = req->wb_index;
nfs_page_group_init(ret, last);
ret->wb_nio = req->wb_nio;
}
@@ -550,11 +546,16 @@ void nfs_unlock_and_release_request(struct nfs_page *req)
*/
static void nfs_clear_request(struct nfs_page *req)
{
+ struct folio *folio = nfs_page_to_folio(req);
struct page *page = req->wb_page;
struct nfs_lock_context *l_ctx = req->wb_lock_context;
struct nfs_open_context *ctx;
- if (page != NULL) {
+ if (folio != NULL) {
+ folio_put(folio);
+ req->wb_folio = NULL;
+ clear_bit(PG_FOLIO, &req->wb_flags);
+ } else if (page != NULL) {
put_page(page);
req->wb_page = NULL;
}
@@ -598,25 +599,6 @@ void nfs_release_request(struct nfs_page *req)
}
EXPORT_SYMBOL_GPL(nfs_release_request);
-/**
- * nfs_wait_on_request - Wait for a request to complete.
- * @req: request to wait upon.
- *
- * Interruptible by fatal signals only.
- * The user is responsible for holding a count on the request.
- */
-int
-nfs_wait_on_request(struct nfs_page *req)
-{
- if (!test_bit(PG_BUSY, &req->wb_flags))
- return 0;
- set_bit(PG_CONTENDED2, &req->wb_flags);
- smp_mb__after_atomic();
- return wait_on_bit_io(&req->wb_flags, PG_BUSY,
- TASK_UNINTERRUPTIBLE);
-}
-EXPORT_SYMBOL_GPL(nfs_wait_on_request);
-
/*
* nfs_generic_pg_test - determine if requests can be coalesced
* @desc: pointer to descriptor
@@ -692,13 +674,14 @@ EXPORT_SYMBOL_GPL(nfs_pgio_header_free);
/**
* nfs_pgio_rpcsetup - Set up arguments for a pageio call
* @hdr: The pageio hdr
+ * @pgbase: base
* @count: Number of bytes to read
* @how: How to commit data (writes only)
* @cinfo: Commit information for the call (writes only)
*/
-static void nfs_pgio_rpcsetup(struct nfs_pgio_header *hdr,
- unsigned int count,
- int how, struct nfs_commit_info *cinfo)
+static void nfs_pgio_rpcsetup(struct nfs_pgio_header *hdr, unsigned int pgbase,
+ unsigned int count, int how,
+ struct nfs_commit_info *cinfo)
{
struct nfs_page *req = hdr->req;
@@ -709,7 +692,7 @@ static void nfs_pgio_rpcsetup(struct nfs_pgio_header *hdr,
hdr->args.offset = req_offset(req);
/* pnfs_set_layoutcommit needs this */
hdr->mds_offset = hdr->args.offset;
- hdr->args.pgbase = req->wb_pgbase;
+ hdr->args.pgbase = pgbase;
hdr->args.pages = hdr->page_array.pagevec;
hdr->args.count = count;
hdr->args.context = get_nfs_open_context(nfs_req_openctx(req));
@@ -749,7 +732,8 @@ static void nfs_pgio_prepare(struct rpc_task *task, void *calldata)
int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr,
const struct cred *cred, const struct nfs_rpc_ops *rpc_ops,
- const struct rpc_call_ops *call_ops, int how, int flags)
+ const struct rpc_call_ops *call_ops, int how, int flags,
+ struct nfsd_file *localio)
{
struct rpc_task *task;
struct rpc_message msg = {
@@ -779,6 +763,10 @@ int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr,
hdr->args.count,
(unsigned long long)hdr->args.offset);
+ if (localio)
+ return nfs_local_doio(NFS_SERVER(hdr->inode)->nfs_client,
+ localio, hdr, call_ops);
+
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
@@ -846,6 +834,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
desc->pg_lseg = NULL;
desc->pg_io_completion = NULL;
desc->pg_dreq = NULL;
+ nfs_netfs_reset_pageio_descriptor(desc);
desc->pg_bsize = bsize;
desc->pg_mirror_count = 1;
@@ -895,9 +884,10 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
struct nfs_commit_info cinfo;
struct nfs_page_array *pg_array = &hdr->page_array;
unsigned int pagecount, pageused;
+ unsigned int pg_base = offset_in_page(mirror->pg_base);
gfp_t gfp_flags = nfs_io_gfp_mask();
- pagecount = nfs_page_array_len(mirror->pg_base, mirror->pg_count);
+ pagecount = nfs_page_array_len(pg_base, mirror->pg_count);
pg_array->npages = pagecount;
if (pagecount <= ARRAY_SIZE(pg_array->page_array))
@@ -917,16 +907,26 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
last_page = NULL;
pageused = 0;
while (!list_empty(head)) {
+ struct nfs_page_iter_page i;
+ struct page *page;
+
req = nfs_list_entry(head->next);
nfs_list_move_request(req, &hdr->pages);
- if (!last_page || last_page != req->wb_page) {
- pageused++;
- if (pageused > pagecount)
- break;
- *pages++ = last_page = req->wb_page;
+ if (req->wb_pgbase == 0)
+ last_page = NULL;
+
+ nfs_page_iter_page_init(&i, req);
+ while ((page = nfs_page_iter_page_get(&i)) != NULL) {
+ if (last_page != page) {
+ pageused++;
+ if (pageused > pagecount)
+ goto full;
+ *pages++ = last_page = page;
+ }
}
}
+full:
if (WARN_ON_ONCE(pageused != pagecount)) {
nfs_pgio_error(hdr);
desc->pg_error = -EINVAL;
@@ -938,7 +938,8 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
desc->pg_ioflags &= ~FLUSH_COND_STABLE;
/* Set up the argument struct */
- nfs_pgio_rpcsetup(hdr, mirror->pg_count, desc->pg_ioflags, &cinfo);
+ nfs_pgio_rpcsetup(hdr, pg_base, mirror->pg_count, desc->pg_ioflags,
+ &cinfo);
desc->pg_rpc_callops = &nfs_pgio_common_ops;
return 0;
}
@@ -958,6 +959,13 @@ static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc)
nfs_pgheader_init(desc, hdr, nfs_pgio_header_free);
ret = nfs_generic_pgio(desc, hdr);
if (ret == 0) {
+ struct nfs_client *clp = NFS_SERVER(hdr->inode)->nfs_client;
+
+ struct nfsd_file *localio =
+ nfs_local_open_fh(clp, hdr->cred, hdr->args.fh,
+ &hdr->args.context->nfl,
+ hdr->args.context->mode);
+
if (NFS_SERVER(hdr->inode)->nfs_client->cl_minorversion)
task_flags = RPC_TASK_MOVEABLE;
ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode),
@@ -966,7 +974,8 @@ static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc)
NFS_PROTO(hdr->inode),
desc->pg_rpc_callops,
desc->pg_ioflags,
- RPC_TASK_CRED_NOREF | task_flags);
+ RPC_TASK_CRED_NOREF | task_flags,
+ localio);
}
return ret;
}
@@ -1034,6 +1043,24 @@ static bool nfs_match_lock_context(const struct nfs_lock_context *l1,
return l1->lockowner == l2->lockowner;
}
+static bool nfs_page_is_contiguous(const struct nfs_page *prev,
+ const struct nfs_page *req)
+{
+ size_t prev_end = prev->wb_pgbase + prev->wb_bytes;
+
+ if (req_offset(req) != req_offset(prev) + prev->wb_bytes)
+ return false;
+ if (req->wb_pgbase == 0)
+ return prev_end == nfs_page_max_length(prev);
+ if (req->wb_pgbase == prev_end) {
+ struct folio *folio = nfs_page_to_folio(req);
+ if (folio)
+ return folio == nfs_page_to_folio(prev);
+ return req->wb_page == prev->wb_page;
+ }
+ return false;
+}
+
/**
* nfs_coalesce_size - test two requests for compatibility
* @prev: pointer to nfs_page
@@ -1062,16 +1089,8 @@ static unsigned int nfs_coalesce_size(struct nfs_page *prev,
!nfs_match_lock_context(req->wb_lock_context,
prev->wb_lock_context))
return 0;
- if (req_offset(req) != req_offset(prev) + prev->wb_bytes)
+ if (!nfs_page_is_contiguous(prev, req))
return 0;
- if (req->wb_page == prev->wb_page) {
- if (req->wb_pgbase != prev->wb_pgbase + prev->wb_bytes)
- return 0;
- } else {
- if (req->wb_pgbase != 0 ||
- prev->wb_pgbase + prev->wb_bytes != PAGE_SIZE)
- return 0;
- }
}
return pgio->pg_ops->pg_test(pgio, prev, req);
}
@@ -1360,6 +1379,7 @@ int nfs_pageio_resend(struct nfs_pageio_descriptor *desc,
desc->pg_io_completion = hdr->io_completion;
desc->pg_dreq = hdr->dreq;
+ nfs_netfs_set_pageio_descriptor(desc, hdr);
list_splice_init(&hdr->pages, &pages);
while (!list_empty(&pages)) {
struct nfs_page *req = nfs_list_entry(pages.next);
@@ -1411,16 +1431,26 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index)
{
struct nfs_pgio_mirror *mirror;
struct nfs_page *prev;
+ struct folio *folio;
u32 midx;
for (midx = 0; midx < desc->pg_mirror_count; midx++) {
mirror = nfs_pgio_get_mirror(desc, midx);
if (!list_empty(&mirror->pg_list)) {
prev = nfs_list_entry(mirror->pg_list.prev);
- if (index != prev->wb_index + 1) {
- nfs_pageio_complete(desc);
- break;
- }
+ folio = nfs_page_to_folio(prev);
+ if (folio) {
+ if (index == folio_next_index(folio))
+ continue;
+ } else if (index == prev->wb_index + 1)
+ continue;
+ /*
+ * We will submit more requests after these. Indicate
+ * this to the underlying layers.
+ */
+ desc->pg_moreio = 1;
+ nfs_pageio_complete(desc);
+ break;
}
}
}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index a5db5158c634..f157d43d1312 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -61,6 +61,7 @@ static void pnfs_free_returned_lsegs(struct pnfs_layout_hdr *lo,
u32 seq);
static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg,
struct list_head *tmp_list);
+static int pnfs_layout_return_on_reboot(struct pnfs_layout_hdr *lo);
/* Return the registered pnfs layout driver module matching given id */
static struct pnfs_layoutdriver_type *
@@ -305,7 +306,6 @@ void
pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo)
{
struct inode *inode;
- unsigned long i_state;
if (!lo)
return;
@@ -316,12 +316,11 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo)
if (!list_empty(&lo->plh_segs))
WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n");
pnfs_detach_layout_hdr(lo);
- i_state = inode->i_state;
+ /* Notify pnfs_destroy_layout_final() that we're done */
+ if (inode_state_read(inode) & (I_FREEING | I_CLEAR))
+ wake_up_var_locked(lo, &inode->i_lock);
spin_unlock(&inode->i_lock);
pnfs_free_layout_hdr(lo);
- /* Notify pnfs_destroy_layout_final() that we're done */
- if (i_state & (I_FREEING | I_CLEAR))
- wake_up_var(lo);
}
}
@@ -476,6 +475,18 @@ pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo,
return !list_empty(&lo->plh_segs);
}
+static int pnfs_mark_layout_stateid_return(struct pnfs_layout_hdr *lo,
+ struct list_head *lseg_list,
+ enum pnfs_iomode iomode, u32 seq)
+{
+ struct pnfs_layout_range range = {
+ .iomode = iomode,
+ .length = NFS4_MAX_UINT64,
+ };
+
+ return pnfs_mark_matching_lsegs_return(lo, lseg_list, &range, seq);
+}
+
static int
pnfs_iomode_to_fail_bit(u32 iomode)
{
@@ -511,7 +522,7 @@ pnfs_layout_io_set_failed(struct pnfs_layout_hdr *lo, u32 iomode)
spin_lock(&inode->i_lock);
pnfs_layout_set_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode));
- pnfs_mark_matching_lsegs_invalid(lo, &head, &range, 0);
+ pnfs_mark_matching_lsegs_return(lo, &head, &range, 0);
spin_unlock(&inode->i_lock);
pnfs_free_lseg_list(&head);
dprintk("%s Setting layout IOMODE_%s fail bit\n", __func__,
@@ -732,6 +743,14 @@ pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
return remaining;
}
+static void pnfs_reset_return_info(struct pnfs_layout_hdr *lo)
+{
+ struct pnfs_layout_segment *lseg;
+
+ list_for_each_entry(lseg, &lo->plh_return_segs, pls_list)
+ pnfs_set_plh_return_info(lo, lseg->pls_range.iomode, 0);
+}
+
static void
pnfs_free_returned_lsegs(struct pnfs_layout_hdr *lo,
struct list_head *free_me,
@@ -788,23 +807,17 @@ void pnfs_destroy_layout(struct nfs_inode *nfsi)
}
EXPORT_SYMBOL_GPL(pnfs_destroy_layout);
-static bool pnfs_layout_removed(struct nfs_inode *nfsi,
- struct pnfs_layout_hdr *lo)
-{
- bool ret;
-
- spin_lock(&nfsi->vfs_inode.i_lock);
- ret = nfsi->layout != lo;
- spin_unlock(&nfsi->vfs_inode.i_lock);
- return ret;
-}
-
void pnfs_destroy_layout_final(struct nfs_inode *nfsi)
{
struct pnfs_layout_hdr *lo = __pnfs_destroy_layout(nfsi);
+ struct inode *inode = &nfsi->vfs_inode;
- if (lo)
- wait_var_event(lo, pnfs_layout_removed(nfsi, lo));
+ if (lo) {
+ spin_lock(&inode->i_lock);
+ wait_var_event_spinlock(lo, nfsi->layout != lo,
+ &inode->i_lock);
+ spin_unlock(&inode->i_lock);
+ }
}
static bool
@@ -846,8 +859,6 @@ pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp,
break;
inode = pnfs_grab_inode_layout_hdr(lo);
if (inode != NULL) {
- if (test_and_clear_bit(NFS_LAYOUT_HASHED, &lo->plh_flags))
- list_del_rcu(&lo->plh_layouts);
if (pnfs_layout_add_bulk_destroy_list(inode,
layout_list))
continue;
@@ -868,7 +879,7 @@ pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp,
static int
pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,
- bool is_bulk_recall)
+ enum pnfs_layout_destroy_mode mode)
{
struct pnfs_layout_hdr *lo;
struct inode *inode;
@@ -886,8 +897,11 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,
spin_lock(&inode->i_lock);
list_del_init(&lo->plh_bulk_destroy);
- if (pnfs_mark_layout_stateid_invalid(lo, &lseg_list)) {
- if (is_bulk_recall)
+ if (mode == PNFS_LAYOUT_FILE_BULK_RETURN) {
+ pnfs_mark_layout_stateid_return(lo, &lseg_list,
+ IOMODE_ANY, 0);
+ } else if (pnfs_mark_layout_stateid_invalid(lo, &lseg_list)) {
+ if (mode == PNFS_LAYOUT_BULK_RETURN)
set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
ret = -EAGAIN;
}
@@ -901,10 +915,8 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,
return ret;
}
-int
-pnfs_destroy_layouts_byfsid(struct nfs_client *clp,
- struct nfs_fsid *fsid,
- bool is_recall)
+int pnfs_layout_destroy_byfsid(struct nfs_client *clp, struct nfs_fsid *fsid,
+ enum pnfs_layout_destroy_mode mode)
{
struct nfs_server *server;
LIST_HEAD(layout_list);
@@ -923,33 +935,40 @@ restart:
rcu_read_unlock();
spin_unlock(&clp->cl_lock);
- if (list_empty(&layout_list))
- return 0;
- return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall);
+ return pnfs_layout_free_bulk_destroy_list(&layout_list, mode);
}
-int
-pnfs_destroy_layouts_byclid(struct nfs_client *clp,
- bool is_recall)
+static void pnfs_layout_build_destroy_list_byclient(struct nfs_client *clp,
+ struct list_head *list)
{
struct nfs_server *server;
- LIST_HEAD(layout_list);
spin_lock(&clp->cl_lock);
rcu_read_lock();
restart:
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
- if (pnfs_layout_bulk_destroy_byserver_locked(clp,
- server,
- &layout_list) != 0)
+ if (pnfs_layout_bulk_destroy_byserver_locked(clp, server,
+ list) != 0)
goto restart;
}
rcu_read_unlock();
spin_unlock(&clp->cl_lock);
+}
- if (list_empty(&layout_list))
- return 0;
- return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall);
+static int pnfs_layout_do_destroy_byclid(struct nfs_client *clp,
+ struct list_head *list,
+ enum pnfs_layout_destroy_mode mode)
+{
+ pnfs_layout_build_destroy_list_byclient(clp, list);
+ return pnfs_layout_free_bulk_destroy_list(list, mode);
+}
+
+int pnfs_layout_destroy_byclid(struct nfs_client *clp,
+ enum pnfs_layout_destroy_mode mode)
+{
+ LIST_HEAD(layout_list);
+
+ return pnfs_layout_do_destroy_byclid(clp, &layout_list, mode);
}
/*
@@ -962,7 +981,68 @@ pnfs_destroy_all_layouts(struct nfs_client *clp)
nfs4_deviceid_mark_client_invalid(clp);
nfs4_deviceid_purge_client(clp);
- pnfs_destroy_layouts_byclid(clp, false);
+ pnfs_layout_destroy_byclid(clp, PNFS_LAYOUT_INVALIDATE);
+}
+
+static void pnfs_layout_build_recover_list_byclient(struct nfs_client *clp,
+ struct list_head *list)
+{
+ struct nfs_server *server;
+
+ spin_lock(&clp->cl_lock);
+ rcu_read_lock();
+restart:
+ list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+ if (!(server->caps & NFS_CAP_REBOOT_LAYOUTRETURN))
+ continue;
+ if (pnfs_layout_bulk_destroy_byserver_locked(clp, server,
+ list) != 0)
+ goto restart;
+ }
+ rcu_read_unlock();
+ spin_unlock(&clp->cl_lock);
+}
+
+static int pnfs_layout_bulk_list_reboot(struct list_head *list)
+{
+ struct pnfs_layout_hdr *lo;
+ struct nfs_server *server;
+ int ret;
+
+ list_for_each_entry(lo, list, plh_bulk_destroy) {
+ server = NFS_SERVER(lo->plh_inode);
+ ret = pnfs_layout_return_on_reboot(lo);
+ switch (ret) {
+ case 0:
+ continue;
+ case -NFS4ERR_BAD_STATEID:
+ server->caps &= ~NFS_CAP_REBOOT_LAYOUTRETURN;
+ break;
+ case -NFS4ERR_NO_GRACE:
+ break;
+ default:
+ goto err;
+ }
+ break;
+ }
+ return 0;
+err:
+ return ret;
+}
+
+int pnfs_layout_handle_reboot(struct nfs_client *clp)
+{
+ LIST_HEAD(list);
+ int ret = 0, ret2;
+
+ pnfs_layout_build_recover_list_byclient(clp, &list);
+ if (!list_empty(&list))
+ ret = pnfs_layout_bulk_list_reboot(&list);
+ ret2 = pnfs_layout_do_destroy_byclid(clp, &list,
+ PNFS_LAYOUT_INVALIDATE);
+ if (!ret)
+ ret = ret2;
+ return (ret == 0) ? 0 : -EAGAIN;
}
static void
@@ -1163,6 +1243,33 @@ static void pnfs_clear_layoutcommit(struct inode *inode,
}
}
+static void
+pnfs_layoutreturn_retry_later_locked(struct pnfs_layout_hdr *lo,
+ const nfs4_stateid *arg_stateid,
+ const struct pnfs_layout_range *range,
+ struct list_head *freeme)
+{
+ if (pnfs_layout_is_valid(lo) &&
+ nfs4_stateid_match_other(&lo->plh_stateid, arg_stateid))
+ pnfs_reset_return_info(lo);
+ else
+ pnfs_mark_layout_stateid_invalid(lo, freeme);
+ pnfs_clear_layoutreturn_waitbit(lo);
+}
+
+void pnfs_layoutreturn_retry_later(struct pnfs_layout_hdr *lo,
+ const nfs4_stateid *arg_stateid,
+ const struct pnfs_layout_range *range)
+{
+ struct inode *inode = lo->plh_inode;
+ LIST_HEAD(freeme);
+
+ spin_lock(&inode->i_lock);
+ pnfs_layoutreturn_retry_later_locked(lo, arg_stateid, range, &freeme);
+ spin_unlock(&inode->i_lock);
+ pnfs_free_lseg_list(&freeme);
+}
+
void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo,
const nfs4_stateid *arg_stateid,
const struct pnfs_layout_range *range,
@@ -1172,15 +1279,15 @@ void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo,
LIST_HEAD(freeme);
spin_lock(&inode->i_lock);
- if (!pnfs_layout_is_valid(lo) ||
- !nfs4_stateid_match_other(&lo->plh_stateid, arg_stateid))
+ if (!nfs4_stateid_match_other(&lo->plh_stateid, arg_stateid))
goto out_unlock;
- if (stateid) {
+ if (stateid && pnfs_layout_is_valid(lo)) {
u32 seq = be32_to_cpu(arg_stateid->seqid);
pnfs_mark_matching_lsegs_invalid(lo, &freeme, range, seq);
pnfs_free_returned_lsegs(lo, &freeme, range, seq);
pnfs_set_layout_stateid(lo, stateid, NULL, true);
+ pnfs_reset_return_info(lo);
} else
pnfs_mark_layout_stateid_invalid(lo, &freeme);
out_unlock:
@@ -1197,7 +1304,7 @@ pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo,
enum pnfs_iomode *iomode)
{
/* Serialise LAYOUTGET/LAYOUTRETURN */
- if (atomic_read(&lo->plh_outstanding) != 0)
+ if (atomic_read(&lo->plh_outstanding) != 0 && lo->plh_return_seq == 0)
return false;
if (test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags))
return false;
@@ -1239,7 +1346,7 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo,
const nfs4_stateid *stateid,
const struct cred **pcred,
enum pnfs_iomode iomode,
- bool sync)
+ unsigned int flags)
{
struct inode *ino = lo->plh_inode;
struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld;
@@ -1266,33 +1373,21 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo,
if (ld->prepare_layoutreturn)
ld->prepare_layoutreturn(&lrp->args);
- status = nfs4_proc_layoutreturn(lrp, sync);
+ status = nfs4_proc_layoutreturn(lrp, flags);
out:
dprintk("<-- %s status: %d\n", __func__, status);
return status;
}
-static bool
-pnfs_layout_segments_returnable(struct pnfs_layout_hdr *lo,
- enum pnfs_iomode iomode,
- u32 seq)
-{
- struct pnfs_layout_range recall_range = {
- .length = NFS4_MAX_UINT64,
- .iomode = iomode,
- };
- return pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs,
- &recall_range, seq) != -EBUSY;
-}
-
/* Return true if layoutreturn is needed */
static bool
pnfs_layout_need_return(struct pnfs_layout_hdr *lo)
{
if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
return false;
- return pnfs_layout_segments_returnable(lo, lo->plh_return_iomode,
- lo->plh_return_seq);
+ return pnfs_mark_layout_stateid_return(lo, &lo->plh_return_segs,
+ lo->plh_return_iomode,
+ lo->plh_return_seq) != EBUSY;
}
static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo)
@@ -1312,7 +1407,8 @@ static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo)
spin_unlock(&inode->i_lock);
if (send) {
/* Send an async layoutreturn so we dont deadlock */
- pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, false);
+ pnfs_send_layoutreturn(lo, &stateid, &cred, iomode,
+ PNFS_FL_LAYOUTRETURN_ASYNC);
}
} else
spin_unlock(&inode->i_lock);
@@ -1379,7 +1475,8 @@ _pnfs_return_layout(struct inode *ino)
send = pnfs_prepare_layoutreturn(lo, &stateid, &cred, NULL);
spin_unlock(&ino->i_lock);
if (send)
- status = pnfs_send_layoutreturn(lo, &stateid, &cred, IOMODE_ANY, true);
+ status = pnfs_send_layoutreturn(lo, &stateid, &cred, IOMODE_ANY,
+ 0);
out_wait_layoutreturn:
wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN, TASK_UNINTERRUPTIBLE);
out_put_layout_hdr:
@@ -1417,6 +1514,24 @@ pnfs_commit_and_return_layout(struct inode *inode)
return ret;
}
+static int pnfs_layout_return_on_reboot(struct pnfs_layout_hdr *lo)
+{
+ struct inode *inode = lo->plh_inode;
+ const struct cred *cred;
+
+ spin_lock(&inode->i_lock);
+ if (!pnfs_layout_is_valid(lo)) {
+ spin_unlock(&inode->i_lock);
+ return 0;
+ }
+ cred = get_cred(lo->plh_lc_cred);
+ pnfs_get_layout_hdr(lo);
+ spin_unlock(&inode->i_lock);
+
+ return pnfs_send_layoutreturn(lo, &zero_stateid, &cred, IOMODE_ANY,
+ PNFS_FL_LAYOUTRETURN_PRIVILEGED);
+}
+
bool pnfs_roc(struct inode *ino,
struct nfs4_layoutreturn_args *args,
struct nfs4_layoutreturn_res *res,
@@ -1520,7 +1635,7 @@ out_noroc:
return true;
}
if (layoutreturn)
- pnfs_send_layoutreturn(lo, &stateid, &lc_cred, iomode, true);
+ pnfs_send_layoutreturn(lo, &stateid, &lc_cred, iomode, 0);
pnfs_put_layout_hdr(lo);
return false;
}
@@ -1542,6 +1657,18 @@ int pnfs_roc_done(struct rpc_task *task, struct nfs4_layoutreturn_args **argpp,
/* Was there an RPC level error? If not, retry */
if (task->tk_rpc_status == 0)
break;
+ /*
+ * Is there a fatal network level error?
+ * If so release the layout, but flag the error.
+ */
+ if ((task->tk_rpc_status == -ENETDOWN ||
+ task->tk_rpc_status == -ENETUNREACH) &&
+ task->tk_flags & RPC_TASK_NETUNREACH_FATAL) {
+ *ret = 0;
+ (*respp)->lrs_present = 0;
+ retval = -EIO;
+ break;
+ }
/* If the call was not sent, let caller handle it */
if (!RPC_WAS_SENT(task))
return 0;
@@ -1570,22 +1697,24 @@ int pnfs_roc_done(struct rpc_task *task, struct nfs4_layoutreturn_args **argpp,
}
void pnfs_roc_release(struct nfs4_layoutreturn_args *args,
- struct nfs4_layoutreturn_res *res,
- int ret)
+ struct nfs4_layoutreturn_res *res, int ret)
{
struct pnfs_layout_hdr *lo = args->layout;
struct inode *inode = args->inode;
const nfs4_stateid *res_stateid = NULL;
struct nfs4_xdr_opaque_data *ld_private = args->ld_private;
+ LIST_HEAD(freeme);
switch (ret) {
+ case -NFS4ERR_BADSESSION:
+ case -NFS4ERR_DEADSESSION:
+ case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
case -NFS4ERR_NOMATCHING_LAYOUT:
spin_lock(&inode->i_lock);
- if (pnfs_layout_is_valid(lo) &&
- nfs4_stateid_match_other(&args->stateid, &lo->plh_stateid))
- pnfs_set_plh_return_info(lo, args->range.iomode, 0);
- pnfs_clear_layoutreturn_waitbit(lo);
+ pnfs_layoutreturn_retry_later_locked(lo, &args->stateid,
+ &args->range, &freeme);
spin_unlock(&inode->i_lock);
+ pnfs_free_lseg_list(&freeme);
break;
case 0:
if (res->lrs_present)
@@ -1922,8 +2051,10 @@ static void nfs_layoutget_begin(struct pnfs_layout_hdr *lo)
static void nfs_layoutget_end(struct pnfs_layout_hdr *lo)
{
if (atomic_dec_and_test(&lo->plh_outstanding) &&
- test_and_clear_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags))
+ test_and_clear_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags)) {
+ smp_mb__after_atomic();
wake_up_bit(&lo->plh_flags, NFS_LAYOUT_DRAIN);
+ }
}
static bool pnfs_is_first_layoutget(struct pnfs_layout_hdr *lo)
@@ -1980,7 +2111,9 @@ pnfs_update_layout(struct inode *ino,
struct pnfs_layout_segment *lseg = NULL;
struct nfs4_layoutget *lgp;
nfs4_stateid stateid;
- long timeout = 0;
+ struct nfs4_exception exception = {
+ .inode = ino,
+ };
unsigned long giveup = jiffies + (clp->cl_lease_time << 1);
bool first;
@@ -1997,6 +2130,14 @@ pnfs_update_layout(struct inode *ino,
}
lookup_again:
+ if (!nfs4_valid_open_stateid(ctx->state)) {
+ trace_pnfs_update_layout(ino, pos, count,
+ iomode, lo, lseg,
+ PNFS_UPDATE_LAYOUT_INVALID_OPEN);
+ lseg = ERR_PTR(-EIO);
+ goto out;
+ }
+
lseg = ERR_PTR(nfs4_client_recover_expired_lease(clp));
if (IS_ERR(lseg))
goto out;
@@ -2144,7 +2285,7 @@ lookup_again:
lgp->lo = lo;
pnfs_get_layout_hdr(lo);
- lseg = nfs4_proc_layoutget(lgp, &timeout);
+ lseg = nfs4_proc_layoutget(lgp, &exception);
trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET);
nfs_layoutget_end(lo);
@@ -2171,6 +2312,8 @@ lookup_again:
goto out_put_layout_hdr;
}
if (lseg) {
+ if (!exception.retry)
+ goto out_put_layout_hdr;
if (first)
pnfs_clear_first_layoutget(lo);
trace_pnfs_update_layout(ino, pos, count,
@@ -2554,7 +2697,8 @@ pnfs_mark_layout_for_return(struct inode *inode,
return_now = pnfs_prepare_layoutreturn(lo, &stateid, &cred, &iomode);
spin_unlock(&inode->i_lock);
if (return_now)
- pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, false);
+ pnfs_send_layoutreturn(lo, &stateid, &cred, iomode,
+ PNFS_FL_LAYOUTRETURN_ASYNC);
} else {
spin_unlock(&inode->i_lock);
nfs_commit_inode(inode, 0);
@@ -2634,31 +2778,45 @@ pnfs_should_return_unused_layout(struct pnfs_layout_hdr *lo,
return mode == 0;
}
-static int
-pnfs_layout_return_unused_byserver(struct nfs_server *server, void *data)
+static int pnfs_layout_return_unused_byserver(struct nfs_server *server,
+ void *data)
{
const struct pnfs_layout_range *range = data;
+ const struct cred *cred;
struct pnfs_layout_hdr *lo;
struct inode *inode;
+ nfs4_stateid stateid;
+ enum pnfs_iomode iomode;
+
restart:
rcu_read_lock();
list_for_each_entry_rcu(lo, &server->layouts, plh_layouts) {
- if (!pnfs_layout_can_be_returned(lo) ||
+ inode = lo->plh_inode;
+ if (!inode || !pnfs_layout_can_be_returned(lo) ||
test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
continue;
- inode = lo->plh_inode;
spin_lock(&inode->i_lock);
- if (!pnfs_should_return_unused_layout(lo, range)) {
+ if (!lo->plh_inode ||
+ !pnfs_should_return_unused_layout(lo, range)) {
spin_unlock(&inode->i_lock);
continue;
}
+ pnfs_get_layout_hdr(lo);
+ pnfs_set_plh_return_info(lo, range->iomode, 0);
+ if (pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs,
+ range, 0) != 0 ||
+ !pnfs_prepare_layoutreturn(lo, &stateid, &cred, &iomode)) {
+ spin_unlock(&inode->i_lock);
+ rcu_read_unlock();
+ pnfs_put_layout_hdr(lo);
+ cond_resched();
+ goto restart;
+ }
spin_unlock(&inode->i_lock);
- inode = pnfs_grab_inode_layout_hdr(lo);
- if (!inode)
- continue;
rcu_read_unlock();
- pnfs_mark_layout_for_return(inode, range);
- iput(inode);
+ pnfs_send_layoutreturn(lo, &stateid, &cred, iomode,
+ PNFS_FL_LAYOUTRETURN_ASYNC);
+ pnfs_put_layout_hdr(lo);
cond_resched();
goto restart;
}
@@ -2680,43 +2838,34 @@ pnfs_layout_return_unused_byclid(struct nfs_client *clp,
&range);
}
+/* Check if we have we have a valid layout but if there isn't an intersection
+ * between the request and the pgio->pg_lseg, put this pgio->pg_lseg away.
+ */
void
-pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio)
+pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio,
+ struct nfs_page *req)
{
if (pgio->pg_lseg == NULL ||
- test_bit(NFS_LSEG_VALID, &pgio->pg_lseg->pls_flags))
+ (test_bit(NFS_LSEG_VALID, &pgio->pg_lseg->pls_flags) &&
+ pnfs_lseg_request_intersecting(pgio->pg_lseg, req)))
return;
pnfs_put_lseg(pgio->pg_lseg);
pgio->pg_lseg = NULL;
}
EXPORT_SYMBOL_GPL(pnfs_generic_pg_check_layout);
-/*
- * Check for any intersection between the request and the pgio->pg_lseg,
- * and if none, put this pgio->pg_lseg away.
- */
-void
-pnfs_generic_pg_check_range(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
-{
- if (pgio->pg_lseg && !pnfs_lseg_request_intersecting(pgio->pg_lseg, req)) {
- pnfs_put_lseg(pgio->pg_lseg);
- pgio->pg_lseg = NULL;
- }
-}
-EXPORT_SYMBOL_GPL(pnfs_generic_pg_check_range);
-
void
pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
{
u64 rd_size;
- pnfs_generic_pg_check_layout(pgio);
- pnfs_generic_pg_check_range(pgio, req);
+ pnfs_generic_pg_check_layout(pgio, req);
if (pgio->pg_lseg == NULL) {
if (pgio->pg_dreq == NULL)
rd_size = i_size_read(pgio->pg_inode) - req_offset(req);
else
- rd_size = nfs_dreq_bytes_left(pgio->pg_dreq);
+ rd_size = nfs_dreq_bytes_left(pgio->pg_dreq,
+ req_offset(req));
pgio->pg_lseg =
pnfs_update_layout(pgio->pg_inode, nfs_req_openctx(req),
@@ -2740,8 +2889,7 @@ void
pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req, u64 wb_size)
{
- pnfs_generic_pg_check_layout(pgio);
- pnfs_generic_pg_check_range(pgio, req);
+ pnfs_generic_pg_check_layout(pgio, req);
if (pgio->pg_lseg == NULL) {
pgio->pg_lseg =
pnfs_update_layout(pgio->pg_inode, nfs_req_openctx(req),
@@ -3184,6 +3332,7 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
struct nfs_inode *nfsi = NFS_I(inode);
loff_t end_pos;
int status;
+ bool mark_as_dirty = false;
if (!pnfs_layoutcommit_outstanding(inode))
return 0;
@@ -3235,19 +3384,23 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
if (ld->prepare_layoutcommit) {
status = ld->prepare_layoutcommit(&data->args);
if (status) {
- put_cred(data->cred);
+ if (status != -ENOSPC)
+ put_cred(data->cred);
spin_lock(&inode->i_lock);
set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags);
if (end_pos > nfsi->layout->plh_lwb)
nfsi->layout->plh_lwb = end_pos;
- goto out_unlock;
+ if (status != -ENOSPC)
+ goto out_unlock;
+ spin_unlock(&inode->i_lock);
+ mark_as_dirty = true;
}
}
status = nfs4_proc_layoutcommit(data, sync);
out:
- if (status)
+ if (status || mark_as_dirty)
mark_inode_dirty_sync(inode);
dprintk("<-- %s status %d\n", __func__, status);
return status;
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index e3e6a41f19de..91ff877185c8 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -35,6 +35,7 @@
#include <linux/nfs_page.h>
#include <linux/workqueue.h>
+struct nfs4_exception;
struct nfs4_opendata;
enum {
@@ -59,6 +60,7 @@ struct nfs4_pnfs_ds {
struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */
char *ds_remotestr; /* comma sep list of addrs */
struct list_head ds_addrs;
+ const struct net *ds_net;
struct nfs_client *ds_clp;
refcount_t ds_count;
unsigned long ds_state;
@@ -117,6 +119,12 @@ enum layoutdriver_policy_flags {
PNFS_LAYOUTGET_ON_OPEN = 1 << 3,
};
+enum pnfs_layout_destroy_mode {
+ PNFS_LAYOUT_INVALIDATE = 0,
+ PNFS_LAYOUT_BULK_RETURN,
+ PNFS_LAYOUT_FILE_BULK_RETURN,
+};
+
struct nfs4_deviceid_node;
/* Per-layout driver specific registration structure */
@@ -126,7 +134,6 @@ struct pnfs_layoutdriver_type {
const char *name;
struct module *owner;
unsigned flags;
- unsigned max_deviceinfo_size;
unsigned max_layoutget_response;
int (*set_layoutdriver) (struct nfs_server *, const struct nfs_fh *);
@@ -192,8 +199,6 @@ struct pnfs_commit_ops {
int max);
void (*recover_commit_reqs) (struct list_head *list,
struct nfs_commit_info *cinfo);
- struct nfs_page * (*search_commit_reqs)(struct nfs_commit_info *cinfo,
- struct page *page);
};
struct pnfs_layout_hdr {
@@ -241,12 +246,18 @@ extern const struct pnfs_layoutdriver_type *pnfs_find_layoutdriver(u32 id);
extern void pnfs_put_layoutdriver(const struct pnfs_layoutdriver_type *ld);
/* nfs4proc.c */
+#define PNFS_FL_LAYOUTRETURN_ASYNC (1U << 0)
+#define PNFS_FL_LAYOUTRETURN_PRIVILEGED (1U << 1)
+
extern size_t max_response_pages(struct nfs_server *server);
extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
struct pnfs_device *dev,
const struct cred *cred);
-extern struct pnfs_layout_segment* nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout);
-extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync);
+extern struct pnfs_layout_segment *
+nfs4_proc_layoutget(struct nfs4_layoutget *lgp,
+ struct nfs4_exception *exception);
+extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp,
+ unsigned int flags);
/* pnfs.c */
void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo);
@@ -254,8 +265,7 @@ void pnfs_put_lseg(struct pnfs_layout_segment *lseg);
void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, struct nfs_fsinfo *);
void unset_pnfs_layoutdriver(struct nfs_server *);
-void pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio);
-void pnfs_generic_pg_check_range(struct nfs_pageio_descriptor *pgio, struct nfs_page *req);
+void pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio, struct nfs_page *req);
void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *);
int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc);
void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
@@ -271,11 +281,10 @@ void pnfs_free_lseg_list(struct list_head *tmp_list);
void pnfs_destroy_layout(struct nfs_inode *);
void pnfs_destroy_layout_final(struct nfs_inode *);
void pnfs_destroy_all_layouts(struct nfs_client *);
-int pnfs_destroy_layouts_byfsid(struct nfs_client *clp,
- struct nfs_fsid *fsid,
- bool is_recall);
-int pnfs_destroy_layouts_byclid(struct nfs_client *clp,
- bool is_recall);
+int pnfs_layout_destroy_byfsid(struct nfs_client *clp, struct nfs_fsid *fsid,
+ enum pnfs_layout_destroy_mode mode);
+int pnfs_layout_destroy_byclid(struct nfs_client *clp,
+ enum pnfs_layout_destroy_mode mode);
bool nfs4_layout_refresh_old_stateid(nfs4_stateid *dst,
struct pnfs_layout_range *dst_range,
struct inode *inode);
@@ -321,6 +330,9 @@ struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
enum pnfs_iomode iomode,
bool strict_iomode,
gfp_t gfp_flags);
+void pnfs_layoutreturn_retry_later(struct pnfs_layout_hdr *lo,
+ const nfs4_stateid *arg_stateid,
+ const struct pnfs_layout_range *range);
void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo,
const nfs4_stateid *arg_stateid,
const struct pnfs_layout_range *range,
@@ -342,6 +354,7 @@ void pnfs_error_mark_layout_for_return(struct inode *inode,
struct pnfs_layout_segment *lseg);
void pnfs_layout_return_unused_byclid(struct nfs_client *clp,
enum pnfs_iomode iomode);
+int pnfs_layout_handle_reboot(struct nfs_client *clp);
/* nfs4_deviceid_flags */
enum {
@@ -394,8 +407,6 @@ void pnfs_generic_prepare_to_resend_writes(struct nfs_commit_data *data);
void pnfs_generic_rw_release(void *data);
void pnfs_generic_recover_commit_reqs(struct list_head *dst,
struct nfs_commit_info *cinfo);
-struct nfs_page *pnfs_generic_search_commit_reqs(struct nfs_commit_info *cinfo,
- struct page *page);
int pnfs_generic_commit_pagelist(struct inode *inode,
struct list_head *mds_pages,
int how,
@@ -405,7 +416,8 @@ int pnfs_generic_commit_pagelist(struct inode *inode,
int pnfs_generic_scan_commit_lists(struct nfs_commit_info *cinfo, int max);
void pnfs_generic_write_commit_done(struct rpc_task *task, void *data);
void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds);
-struct nfs4_pnfs_ds *nfs4_pnfs_ds_add(struct list_head *dsaddrs,
+struct nfs4_pnfs_ds *nfs4_pnfs_ds_add(const struct net *net,
+ struct list_head *dsaddrs,
gfp_t gfp_flags);
void nfs4_pnfs_v3_ds_connect_unload(void);
int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
@@ -555,17 +567,6 @@ pnfs_recover_commit_reqs(struct list_head *head, struct nfs_commit_info *cinfo)
fl_cinfo->ops->recover_commit_reqs(head, cinfo);
}
-static inline struct nfs_page *
-pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo,
- struct page *page)
-{
- struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds;
-
- if (!fl_cinfo->ops || !fl_cinfo->ops->search_commit_reqs)
- return NULL;
- return fl_cinfo->ops->search_commit_reqs(cinfo, page);
-}
-
/* Should the pNFS client commit and return the layout upon a setattr */
static inline bool
pnfs_ld_layoutret_on_setattr(struct inode *inode)
@@ -723,6 +724,11 @@ static inline void pnfs_destroy_layout_final(struct nfs_inode *nfsi)
{
}
+static inline int pnfs_layout_handle_reboot(struct nfs_client *clp)
+{
+ return 0;
+}
+
static inline struct pnfs_layout_segment *
pnfs_get_lseg(struct pnfs_layout_segment *lseg)
{
@@ -862,13 +868,6 @@ pnfs_recover_commit_reqs(struct list_head *head, struct nfs_commit_info *cinfo)
{
}
-static inline struct nfs_page *
-pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo,
- struct page *page)
-{
- return NULL;
-}
-
static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
{
return 0;
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
index ddbbf4fcda86..bf0f2d67e96c 100644
--- a/fs/nfs/pnfs_dev.c
+++ b/fs/nfs/pnfs_dev.c
@@ -110,9 +110,6 @@ nfs4_get_device_info(struct nfs_server *server,
* GETDEVICEINFO's maxcount
*/
max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
- if (server->pnfs_curr_ld->max_deviceinfo_size &&
- server->pnfs_curr_ld->max_deviceinfo_size < max_resp_sz)
- max_resp_sz = server->pnfs_curr_ld->max_deviceinfo_size;
max_pages = nfs_page_array_len(0, max_resp_sz);
dprintk("%s: server %p max_resp_sz %u max_pages %d\n",
__func__, server, max_resp_sz, max_pages);
@@ -154,7 +151,7 @@ nfs4_get_device_info(struct nfs_server *server,
set_bit(NFS_DEVICEID_NOCACHE, &d->flags);
out_free_pages:
- for (i = 0; i < max_pages; i++)
+ while (--i >= 0)
__free_page(pages[i]);
kfree(pages);
out_free_pdev:
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 5d035dd2d7bf..9976cc16b689 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -16,6 +16,8 @@
#include "nfs4session.h"
#include "internal.h"
#include "pnfs.h"
+#include "netns.h"
+#include "nfs4trace.h"
#define NFSDBG_FACILITY NFSDBG_PNFS
@@ -351,53 +353,6 @@ void pnfs_generic_recover_commit_reqs(struct list_head *dst,
}
EXPORT_SYMBOL_GPL(pnfs_generic_recover_commit_reqs);
-static struct nfs_page *
-pnfs_bucket_search_commit_reqs(struct pnfs_commit_bucket *buckets,
- unsigned int nbuckets, struct page *page)
-{
- struct nfs_page *req;
- struct pnfs_commit_bucket *b;
- unsigned int i;
-
- /* Linearly search the commit lists for each bucket until a matching
- * request is found */
- for (i = 0, b = buckets; i < nbuckets; i++, b++) {
- list_for_each_entry(req, &b->written, wb_list) {
- if (req->wb_page == page)
- return req->wb_head;
- }
- list_for_each_entry(req, &b->committing, wb_list) {
- if (req->wb_page == page)
- return req->wb_head;
- }
- }
- return NULL;
-}
-
-/* pnfs_generic_search_commit_reqs - Search lists in @cinfo for the head request
- * for @page
- * @cinfo - commit info for current inode
- * @page - page to search for matching head request
- *
- * Return: the head request if one is found, otherwise %NULL.
- */
-struct nfs_page *
-pnfs_generic_search_commit_reqs(struct nfs_commit_info *cinfo, struct page *page)
-{
- struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds;
- struct pnfs_commit_array *array;
- struct nfs_page *req;
-
- list_for_each_entry(array, &fl_cinfo->commits, cinfo_list) {
- req = pnfs_bucket_search_commit_reqs(array->buckets,
- array->nbuckets, page);
- if (req)
- return req;
- }
- return NULL;
-}
-EXPORT_SYMBOL_GPL(pnfs_generic_search_commit_reqs);
-
static struct pnfs_layout_segment *
pnfs_bucket_get_committing(struct list_head *head,
struct pnfs_commit_bucket *bucket,
@@ -537,7 +492,7 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
nfs_initiate_commit(NFS_CLIENT(inode), data,
NFS_PROTO(data->inode),
data->mds_ops, how,
- RPC_TASK_CRED_NOREF);
+ RPC_TASK_CRED_NOREF, NULL);
} else {
nfs_init_commit(data, NULL, data->lseg, cinfo);
initiate_commit(data, how);
@@ -551,14 +506,14 @@ EXPORT_SYMBOL_GPL(pnfs_generic_commit_pagelist);
/*
* Data server cache
*
- * Data servers can be mapped to different device ids.
- * nfs4_pnfs_ds reference counting
+ * Data servers can be mapped to different device ids, but should
+ * never be shared between net namespaces.
+ *
+ * nfs4_pnfs_ds reference counting:
* - set to 1 on allocation
* - incremented when a device id maps a data server already in the cache.
* - decremented when deviceid is removed from the cache.
*/
-static DEFINE_SPINLOCK(nfs4_ds_cache_lock);
-static LIST_HEAD(nfs4_data_server_cache);
/* Debug routines */
static void
@@ -651,11 +606,11 @@ _same_data_server_addrs_locked(const struct list_head *dsaddrs1,
* Lookup DS by addresses. nfs4_ds_cache_lock is held
*/
static struct nfs4_pnfs_ds *
-_data_server_lookup_locked(const struct list_head *dsaddrs)
+_data_server_lookup_locked(const struct nfs_net *nn, const struct list_head *dsaddrs)
{
struct nfs4_pnfs_ds *ds;
- list_for_each_entry(ds, &nfs4_data_server_cache, ds_node)
+ list_for_each_entry(ds, &nn->nfs4_data_server_cache, ds_node)
if (_same_data_server_addrs_locked(&ds->ds_addrs, dsaddrs))
return ds;
return NULL;
@@ -700,10 +655,11 @@ static void destroy_ds(struct nfs4_pnfs_ds *ds)
void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds)
{
- if (refcount_dec_and_lock(&ds->ds_count,
- &nfs4_ds_cache_lock)) {
+ struct nfs_net *nn = net_generic(ds->ds_net, nfs_net_id);
+
+ if (refcount_dec_and_lock(&ds->ds_count, &nn->nfs4_data_server_lock)) {
list_del_init(&ds->ds_node);
- spin_unlock(&nfs4_ds_cache_lock);
+ spin_unlock(&nn->nfs4_data_server_lock);
destroy_ds(ds);
}
}
@@ -763,8 +719,9 @@ out_err:
* uncached and return cached struct nfs4_pnfs_ds.
*/
struct nfs4_pnfs_ds *
-nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags)
+nfs4_pnfs_ds_add(const struct net *net, struct list_head *dsaddrs, gfp_t gfp_flags)
{
+ struct nfs_net *nn = net_generic(net, nfs_net_id);
struct nfs4_pnfs_ds *tmp_ds, *ds = NULL;
char *remotestr;
@@ -780,16 +737,17 @@ nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags)
/* this is only used for debugging, so it's ok if its NULL */
remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags);
- spin_lock(&nfs4_ds_cache_lock);
- tmp_ds = _data_server_lookup_locked(dsaddrs);
+ spin_lock(&nn->nfs4_data_server_lock);
+ tmp_ds = _data_server_lookup_locked(nn, dsaddrs);
if (tmp_ds == NULL) {
INIT_LIST_HEAD(&ds->ds_addrs);
list_splice_init(dsaddrs, &ds->ds_addrs);
ds->ds_remotestr = remotestr;
refcount_set(&ds->ds_count, 1);
INIT_LIST_HEAD(&ds->ds_node);
+ ds->ds_net = net;
ds->ds_clp = NULL;
- list_add(&ds->ds_node, &nfs4_data_server_cache);
+ list_add(&ds->ds_node, &nn->nfs4_data_server_cache);
dprintk("%s add new data server %s\n", __func__,
ds->ds_remotestr);
} else {
@@ -801,7 +759,7 @@ nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags)
refcount_read(&tmp_ds->ds_count));
ds = tmp_ds;
}
- spin_unlock(&nfs4_ds_cache_lock);
+ spin_unlock(&nn->nfs4_data_server_lock);
out:
return ds;
}
@@ -851,7 +809,11 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv,
unsigned int retrans)
{
struct nfs_client *clp = ERR_PTR(-EIO);
+ struct nfs_client *mds_clp = mds_srv->nfs_client;
+ enum xprtsec_policies xprtsec_policy = mds_clp->cl_xprtsec.policy;
struct nfs4_pnfs_ds_addr *da;
+ unsigned long connect_timeout = timeo * (retrans + 1) * HZ / 10;
+ int ds_proto;
int status = 0;
dprintk("--> %s DS %s\n", __func__, ds->ds_remotestr);
@@ -870,21 +832,33 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv,
.dstaddr = (struct sockaddr *)&da->da_addr,
.addrlen = da->da_addrlen,
.servername = clp->cl_hostname,
+ .connect_timeout = connect_timeout,
+ .reconnect_timeout = connect_timeout,
+ .xprtsec = clp->cl_xprtsec,
};
- if (da->da_transport != clp->cl_proto)
+ if (xprt_args.ident == XPRT_TRANSPORT_TCP &&
+ clp->cl_proto == XPRT_TRANSPORT_TCP_TLS)
+ xprt_args.ident = XPRT_TRANSPORT_TCP_TLS;
+
+ if (xprt_args.ident != clp->cl_proto)
continue;
- if (da->da_addr.ss_family != clp->cl_addr.ss_family)
+ if (xprt_args.dstaddr->sa_family !=
+ clp->cl_addr.ss_family)
continue;
/* Add this address as an alias */
rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args,
- rpc_clnt_test_and_add_xprt, NULL);
+ rpc_clnt_test_and_add_xprt, NULL);
continue;
}
- clp = get_v3_ds_connect(mds_srv,
- &da->da_addr,
- da->da_addrlen, da->da_transport,
- timeo, retrans);
+
+ ds_proto = da->da_transport;
+ if (ds_proto == XPRT_TRANSPORT_TCP &&
+ xprtsec_policy != RPC_XPRTSEC_NONE)
+ ds_proto = XPRT_TRANSPORT_TCP_TLS;
+
+ clp = get_v3_ds_connect(mds_srv, &da->da_addr, da->da_addrlen,
+ ds_proto, timeo, retrans);
if (IS_ERR(clp))
continue;
clp->cl_rpcclient->cl_softerr = 0;
@@ -910,12 +884,17 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv,
u32 minor_version)
{
struct nfs_client *clp = ERR_PTR(-EIO);
+ struct nfs_client *mds_clp = mds_srv->nfs_client;
+ enum xprtsec_policies xprtsec_policy = mds_clp->cl_xprtsec.policy;
struct nfs4_pnfs_ds_addr *da;
+ int ds_proto;
int status = 0;
dprintk("--> %s DS %s\n", __func__, ds->ds_remotestr);
list_for_each_entry(da, &ds->ds_addrs, da_node) {
+ char servername[48];
+
dprintk("%s: DS %s: trying address %s\n",
__func__, ds->ds_remotestr, da->da_remotestr);
@@ -926,6 +905,7 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv,
.dstaddr = (struct sockaddr *)&da->da_addr,
.addrlen = da->da_addrlen,
.servername = clp->cl_hostname,
+ .xprtsec = clp->cl_xprtsec,
};
struct nfs4_add_xprt_data xprtdata = {
.clp = clp,
@@ -935,26 +915,63 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv,
.data = &xprtdata,
};
- if (da->da_transport != clp->cl_proto)
+ if (xprt_args.ident == XPRT_TRANSPORT_TCP &&
+ clp->cl_proto == XPRT_TRANSPORT_TCP_TLS) {
+ struct sockaddr *addr =
+ (struct sockaddr *)&da->da_addr;
+ struct sockaddr_in *sin =
+ (struct sockaddr_in *)&da->da_addr;
+ struct sockaddr_in6 *sin6 =
+ (struct sockaddr_in6 *)&da->da_addr;
+
+ /* for NFS with TLS we need to supply a correct
+ * servername of the trunked transport, not the
+ * servername of the main transport stored in
+ * clp->cl_hostname. And set the protocol to
+ * indicate to use TLS
+ */
+ servername[0] = '\0';
+ switch(addr->sa_family) {
+ case AF_INET:
+ snprintf(servername, sizeof(servername),
+ "%pI4", &sin->sin_addr.s_addr);
+ break;
+ case AF_INET6:
+ snprintf(servername, sizeof(servername),
+ "%pI6", &sin6->sin6_addr);
+ break;
+ default:
+ /* do not consider this address */
+ continue;
+ }
+ xprt_args.ident = XPRT_TRANSPORT_TCP_TLS;
+ xprt_args.servername = servername;
+ }
+ if (xprt_args.ident != clp->cl_proto)
continue;
- if (da->da_addr.ss_family != clp->cl_addr.ss_family)
+ if (xprt_args.dstaddr->sa_family !=
+ clp->cl_addr.ss_family)
continue;
+
/**
* Test this address for session trunking and
* add as an alias
*/
- xprtdata.cred = nfs4_get_clid_cred(clp),
+ xprtdata.cred = nfs4_get_clid_cred(clp);
rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args,
rpc_clnt_setup_test_and_add_xprt,
&rpcdata);
if (xprtdata.cred)
put_cred(xprtdata.cred);
} else {
- clp = nfs4_set_ds_client(mds_srv,
- &da->da_addr,
- da->da_addrlen,
- da->da_transport, timeo,
- retrans, minor_version);
+ ds_proto = da->da_transport;
+ if (ds_proto == XPRT_TRANSPORT_TCP &&
+ xprtsec_policy != RPC_XPRTSEC_NONE)
+ ds_proto = XPRT_TRANSPORT_TCP_TLS;
+
+ clp = nfs4_set_ds_client(mds_srv, &da->da_addr,
+ da->da_addrlen, ds_proto,
+ timeo, retrans, minor_version);
if (IS_ERR(clp))
continue;
@@ -965,7 +982,6 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv,
clp = ERR_PTR(-EIO);
continue;
}
-
}
}
@@ -996,8 +1012,10 @@ int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
err = nfs4_wait_ds_connect(ds);
if (err || ds->ds_clp)
goto out;
- if (nfs4_test_deviceid_unavailable(devid))
- return -ENODEV;
+ if (nfs4_test_deviceid_unavailable(devid)) {
+ err = -ENODEV;
+ goto out;
+ }
} while (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) != 0);
if (ds->ds_clp)
@@ -1027,11 +1045,12 @@ out:
if (!ds->ds_clp || !nfs_client_init_is_complete(ds->ds_clp)) {
WARN_ON_ONCE(ds->ds_clp ||
!nfs4_test_deviceid_unavailable(devid));
- return -EINVAL;
- }
- err = nfs_client_init_status(ds->ds_clp);
+ err = -EINVAL;
+ } else
+ err = nfs_client_init_status(ds->ds_clp);
}
+ trace_pnfs_ds_connect(ds->ds_remotestr, err);
return err;
}
EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_connect);
@@ -1180,7 +1199,7 @@ pnfs_layout_mark_request_commit(struct nfs_page *req,
nfs_request_add_commit_list_locked(req, list, cinfo);
mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
- nfs_mark_page_unstable(req->wb_page, cinfo);
+ nfs_folio_mark_unstable(nfs_page_to_folio(req), cinfo);
return;
out_resched:
mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index e3570c656b0f..63e71310b9f6 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -153,13 +153,13 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
}
static int
-nfs_proc_lookup(struct inode *dir, struct dentry *dentry,
+nfs_proc_lookup(struct inode *dir, struct dentry *dentry, const struct qstr *name,
struct nfs_fh *fhandle, struct nfs_fattr *fattr)
{
struct nfs_diropargs arg = {
.fh = NFS_FH(dir),
- .name = dentry->d_name.name,
- .len = dentry->d_name.len
+ .name = name->name,
+ .len = name->len
};
struct nfs_diropok res = {
.fh = fhandle,
@@ -396,9 +396,10 @@ nfs_proc_link(struct inode *inode, struct inode *dir, const struct qstr *name)
}
static int
-nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
+nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct folio *folio,
unsigned int len, struct iattr *sattr)
{
+ struct page *page = &folio->page;
struct nfs_fh *fh;
struct nfs_fattr *fattr;
struct nfs_symlinkargs arg = {
@@ -445,13 +446,14 @@ out:
return status;
}
-static int
+static struct dentry *
nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
{
struct nfs_createdata *data;
struct rpc_message msg = {
.rpc_proc = &nfs_procedures[NFSPROC_MKDIR],
};
+ struct dentry *alias = NULL;
int status = -ENOMEM;
dprintk("NFS call mkdir %pd\n", dentry);
@@ -463,12 +465,15 @@ nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_mark_for_revalidate(dir);
- if (status == 0)
- status = nfs_instantiate(dentry, data->res.fh, data->res.fattr);
+ if (status == 0) {
+ alias = nfs_add_or_obtain(dentry, data->res.fh, data->res.fattr);
+ status = PTR_ERR_OR_ZERO(alias);
+ } else
+ alias = ERR_PTR(status);
nfs_free_createdata(data);
out:
dprintk("NFS reply mkdir: %d\n", status);
- return status;
+ return alias;
}
static int
@@ -686,14 +691,22 @@ out_einval:
return -EINVAL;
}
-static int nfs_have_delegation(struct inode *inode, fmode_t flags)
+static int nfs_have_delegation(struct inode *inode, fmode_t type, int flags)
+{
+ return 0;
+}
+
+static int nfs_return_delegation(struct inode *inode)
{
+ if (S_ISREG(inode->i_mode))
+ nfs_wb_all(inode);
return 0;
}
static const struct inode_operations nfs_dir_inode_operations = {
.create = nfs_create,
.lookup = nfs_lookup,
+ .atomic_open = nfs_atomic_open_v23,
.link = nfs_link,
.unlink = nfs_unlink,
.symlink = nfs_symlink,
@@ -755,6 +768,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
.lock_check_bounds = nfs_lock_check_bounds,
.close_context = nfs_close_context,
.have_delegation = nfs_have_delegation,
+ .return_delegation = nfs_return_delegation,
.alloc_client = nfs_alloc_client,
.init_client = nfs_init_client,
.free_client = nfs_free_client,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 8ae2c8d1219d..3c1fa320b3f1 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -15,6 +15,7 @@
#include <linux/stat.h>
#include <linux/mm.h>
#include <linux/slab.h>
+#include <linux/task_io_accounting_ops.h>
#include <linux/pagemap.h>
#include <linux/sunrpc/clnt.h>
#include <linux/nfs_fs.h>
@@ -27,10 +28,11 @@
#include "fscache.h"
#include "pnfs.h"
#include "nfstrace.h"
+#include "delegation.h"
#define NFSDBG_FACILITY NFSDBG_PAGECACHE
-static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops;
+const struct nfs_pgio_completion_ops nfs_async_read_completion_ops;
static const struct nfs_rw_ops nfs_rw_read_ops;
static struct kmem_cache *nfs_rdata_cachep;
@@ -46,15 +48,16 @@ static struct nfs_pgio_header *nfs_readhdr_alloc(void)
static void nfs_readhdr_free(struct nfs_pgio_header *rhdr)
{
+ kfree(rhdr->res.scratch);
kmem_cache_free(nfs_rdata_cachep, rhdr);
}
-static
-int nfs_return_empty_page(struct page *page)
+static int nfs_return_empty_folio(struct folio *folio)
{
- zero_user(page, 0, PAGE_SIZE);
- SetPageUptodate(page);
- unlock_page(page);
+ folio_zero_segment(folio, 0, folio_size(folio));
+ folio_mark_uptodate(folio);
+ if (nfs_netfs_folio_unlock(folio))
+ folio_unlock(folio);
return 0;
}
@@ -74,7 +77,7 @@ void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
}
EXPORT_SYMBOL_GPL(nfs_pageio_init_read);
-static void nfs_pageio_complete_read(struct nfs_pageio_descriptor *pgio)
+void nfs_pageio_complete_read(struct nfs_pageio_descriptor *pgio)
{
struct nfs_pgio_mirror *pgm;
unsigned long npages;
@@ -108,34 +111,29 @@ void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
}
EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);
+bool nfs_read_alloc_scratch(struct nfs_pgio_header *hdr, size_t size)
+{
+ WARN_ON(hdr->res.scratch != NULL);
+ hdr->res.scratch = kmalloc(size, GFP_KERNEL);
+ return hdr->res.scratch != NULL;
+}
+EXPORT_SYMBOL_GPL(nfs_read_alloc_scratch);
+
static void nfs_readpage_release(struct nfs_page *req, int error)
{
- struct inode *inode = d_inode(nfs_req_openctx(req)->dentry);
- struct page *page = req->wb_page;
-
- dprintk("NFS: read done (%s/%llu %d@%lld)\n", inode->i_sb->s_id,
- (unsigned long long)NFS_FILEID(inode), req->wb_bytes,
- (long long)req_offset(req));
-
- if (nfs_error_is_fatal_on_server(error) && error != -ETIMEDOUT)
- SetPageError(page);
- if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) {
- if (PageUptodate(page))
- nfs_fscache_write_page(inode, page);
- unlock_page(page);
- }
+ struct folio *folio = nfs_page_to_folio(req);
+
+ if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE))
+ if (nfs_netfs_folio_unlock(folio))
+ folio_unlock(folio);
+
nfs_release_request(req);
}
-struct nfs_readdesc {
- struct nfs_pageio_descriptor pgio;
- struct nfs_open_context *ctx;
-};
-
static void nfs_page_group_set_uptodate(struct nfs_page *req)
{
if (nfs_page_group_sync_on_bit(req, PG_UPTODATE))
- SetPageUptodate(req->wb_page);
+ folio_mark_uptodate(nfs_page_to_folio(req));
}
static void nfs_read_completion(struct nfs_pgio_header *hdr)
@@ -147,24 +145,25 @@ static void nfs_read_completion(struct nfs_pgio_header *hdr)
goto out;
while (!list_empty(&hdr->pages)) {
struct nfs_page *req = nfs_list_entry(hdr->pages.next);
- struct page *page = req->wb_page;
+ struct folio *folio = nfs_page_to_folio(req);
unsigned long start = req->wb_pgbase;
unsigned long end = req->wb_pgbase + req->wb_bytes;
if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) {
/* note: regions of the page not covered by a
- * request are zeroed in readpage_async_filler */
+ * request are zeroed in nfs_read_add_folio
+ */
if (bytes > hdr->good_bytes) {
/* nothing in this request was good, so zero
* the full extent of the request */
- zero_user_segment(page, start, end);
+ folio_zero_segment(folio, start, end);
} else if (hdr->good_bytes - bytes < req->wb_bytes) {
/* part of this request has good bytes, but
* not all. zero the bad bytes */
start += hdr->good_bytes - bytes;
WARN_ON(start < req->wb_pgbase);
- zero_user_segment(page, start, end);
+ folio_zero_segment(folio, start, end);
}
}
error = 0;
@@ -181,6 +180,8 @@ static void nfs_read_completion(struct nfs_pgio_header *hdr)
nfs_list_remove_request(req);
nfs_readpage_release(req, error);
}
+ nfs_netfs_read_completion(hdr);
+
out:
hdr->release(hdr);
}
@@ -191,6 +192,7 @@ static void nfs_initiate_read(struct nfs_pgio_header *hdr,
struct rpc_task_setup *task_setup_data, int how)
{
rpc_ops->read_setup(hdr, msg);
+ nfs_netfs_initiate_read(hdr);
trace_nfs_initiate_read(hdr);
}
@@ -206,7 +208,7 @@ nfs_async_read_error(struct list_head *head, int error)
}
}
-static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = {
+const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = {
.error_cleanup = nfs_async_read_error,
.completion = nfs_read_completion,
};
@@ -281,151 +283,169 @@ static void nfs_readpage_result(struct rpc_task *task,
nfs_readpage_retry(task, hdr);
}
-static int
-readpage_async_filler(struct nfs_readdesc *desc, struct page *page)
+int nfs_read_add_folio(struct nfs_pageio_descriptor *pgio,
+ struct nfs_open_context *ctx,
+ struct folio *folio)
{
- struct inode *inode = page_file_mapping(page)->host;
- unsigned int rsize = NFS_SERVER(inode)->rsize;
+ struct inode *inode = folio->mapping->host;
+ struct nfs_server *server = NFS_SERVER(inode);
+ size_t fsize = folio_size(folio);
+ unsigned int rsize = server->rsize;
struct nfs_page *new;
unsigned int len, aligned_len;
int error;
- len = nfs_page_length(page);
+ len = nfs_folio_length(folio);
if (len == 0)
- return nfs_return_empty_page(page);
+ return nfs_return_empty_folio(folio);
- aligned_len = min_t(unsigned int, ALIGN(len, rsize), PAGE_SIZE);
+ aligned_len = min_t(unsigned int, ALIGN(len, rsize), fsize);
- if (!IS_SYNC(page->mapping->host)) {
- error = nfs_fscache_read_page(page->mapping->host, page);
- if (error == 0)
- goto out_unlock;
+ new = nfs_page_create_from_folio(ctx, folio, 0, aligned_len);
+ if (IS_ERR(new)) {
+ error = PTR_ERR(new);
+ if (nfs_netfs_folio_unlock(folio))
+ folio_unlock(folio);
+ goto out;
}
- new = nfs_create_request(desc->ctx, page, 0, aligned_len);
- if (IS_ERR(new))
- goto out_error;
-
- if (len < PAGE_SIZE)
- zero_user_segment(page, len, PAGE_SIZE);
- if (!nfs_pageio_add_request(&desc->pgio, new)) {
+ if (len < fsize)
+ folio_zero_segment(folio, len, fsize);
+ if (!nfs_pageio_add_request(pgio, new)) {
nfs_list_remove_request(new);
- error = desc->pgio.pg_error;
+ error = pgio->pg_error;
nfs_readpage_release(new, error);
goto out;
}
return 0;
-out_error:
- error = PTR_ERR(new);
-out_unlock:
- unlock_page(page);
out:
return error;
}
/*
- * Read a page over NFS.
- * We read the page synchronously in the following case:
- * - The error flag is set for this page. This happens only when a
- * previous async read operation failed.
+ * Actually read a folio over the wire.
+ */
+static int nfs_do_read_folio(struct file *file, struct folio *folio)
+{
+ struct inode *inode = file_inode(file);
+ struct nfs_pageio_descriptor pgio;
+ struct nfs_open_context *ctx;
+ int ret;
+
+ ctx = get_nfs_open_context(nfs_file_open_context(file));
+
+ xchg(&ctx->error, 0);
+ nfs_pageio_init_read(&pgio, inode, false,
+ &nfs_async_read_completion_ops);
+
+ ret = nfs_read_add_folio(&pgio, ctx, folio);
+ if (ret)
+ goto out_put;
+
+ nfs_pageio_complete_read(&pgio);
+ nfs_update_delegated_atime(inode);
+ if (pgio.pg_error < 0) {
+ ret = pgio.pg_error;
+ goto out_put;
+ }
+
+ ret = folio_wait_locked_killable(folio);
+ if (!folio_test_uptodate(folio) && !ret)
+ ret = xchg(&ctx->error, 0);
+
+out_put:
+ put_nfs_open_context(ctx);
+ return ret;
+}
+
+/*
+ * Synchronously read a folio.
+ *
+ * This is not heavily used as most users to try an asynchronous
+ * large read through ->readahead first.
*/
int nfs_read_folio(struct file *file, struct folio *folio)
{
- struct page *page = &folio->page;
- struct nfs_readdesc desc;
- struct inode *inode = page_file_mapping(page)->host;
+ struct inode *inode = file_inode(file);
+ loff_t pos = folio_pos(folio);
+ size_t len = folio_size(folio);
int ret;
- trace_nfs_aop_readpage(inode, page);
+ trace_nfs_aop_readpage(inode, pos, len);
nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
+ task_io_account_read(len);
/*
* Try to flush any pending writes to the file..
*
- * NOTE! Because we own the page lock, there cannot
+ * NOTE! Because we own the folio lock, there cannot
* be any new pending writes generated at this point
- * for this page (other pages can be written to).
+ * for this folio (other folios can be written to).
*/
- ret = nfs_wb_page(inode, page);
+ ret = nfs_wb_folio(inode, folio);
if (ret)
goto out_unlock;
- if (PageUptodate(page))
+ if (folio_test_uptodate(folio))
goto out_unlock;
ret = -ESTALE;
if (NFS_STALE(inode))
goto out_unlock;
- if (file == NULL) {
- ret = -EBADF;
- desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
- if (desc.ctx == NULL)
- goto out_unlock;
- } else
- desc.ctx = get_nfs_open_context(nfs_file_open_context(file));
-
- xchg(&desc.ctx->error, 0);
- nfs_pageio_init_read(&desc.pgio, inode, false,
- &nfs_async_read_completion_ops);
-
- ret = readpage_async_filler(&desc, page);
+ ret = nfs_netfs_read_folio(file, folio);
if (ret)
- goto out;
-
- nfs_pageio_complete_read(&desc.pgio);
- ret = desc.pgio.pg_error < 0 ? desc.pgio.pg_error : 0;
- if (!ret) {
- ret = wait_on_page_locked_killable(page);
- if (!PageUptodate(page) && !ret)
- ret = xchg(&desc.ctx->error, 0);
- }
+ ret = nfs_do_read_folio(file, folio);
out:
- put_nfs_open_context(desc.ctx);
- trace_nfs_aop_readpage_done(inode, page, ret);
+ trace_nfs_aop_readpage_done(inode, pos, len, ret);
return ret;
out_unlock:
- unlock_page(page);
- trace_nfs_aop_readpage_done(inode, page, ret);
- return ret;
+ folio_unlock(folio);
+ goto out;
}
void nfs_readahead(struct readahead_control *ractl)
{
+ struct nfs_pageio_descriptor pgio;
+ struct nfs_open_context *ctx;
unsigned int nr_pages = readahead_count(ractl);
struct file *file = ractl->file;
- struct nfs_readdesc desc;
struct inode *inode = ractl->mapping->host;
- struct page *page;
+ struct folio *folio;
int ret;
trace_nfs_aop_readahead(inode, readahead_pos(ractl), nr_pages);
nfs_inc_stats(inode, NFSIOS_VFSREADPAGES);
+ task_io_account_read(readahead_length(ractl));
ret = -ESTALE;
if (NFS_STALE(inode))
goto out;
+ ret = nfs_netfs_readahead(ractl);
+ if (!ret)
+ goto out;
+
if (file == NULL) {
ret = -EBADF;
- desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
- if (desc.ctx == NULL)
+ ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
+ if (ctx == NULL)
goto out;
} else
- desc.ctx = get_nfs_open_context(nfs_file_open_context(file));
+ ctx = get_nfs_open_context(nfs_file_open_context(file));
- nfs_pageio_init_read(&desc.pgio, inode, false,
+ nfs_pageio_init_read(&pgio, inode, false,
&nfs_async_read_completion_ops);
- while ((page = readahead_page(ractl)) != NULL) {
- ret = readpage_async_filler(&desc, page);
- put_page(page);
+ while ((folio = readahead_folio(ractl)) != NULL) {
+ ret = nfs_read_add_folio(&pgio, ctx, folio);
if (ret)
break;
}
- nfs_pageio_complete_read(&desc.pgio);
+ nfs_pageio_complete_read(&pgio);
+ nfs_update_delegated_atime(inode);
- put_nfs_open_context(desc.ctx);
+ put_nfs_open_context(ctx);
out:
trace_nfs_aop_readahead_done(inode, nr_pages, ret);
}
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 05ae23657527..72dee6f3050e 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -47,6 +47,7 @@
#include <linux/vfs.h>
#include <linux/inet.h>
#include <linux/in6.h>
+#include <linux/sched.h>
#include <linux/slab.h>
#include <net/ipv6.h>
#include <linux/netdevice.h>
@@ -59,6 +60,8 @@
#include <linux/uaccess.h>
#include <linux/nfs_ssc.h>
+#include <uapi/linux/tls.h>
+
#include "nfs4_fs.h"
#include "callback.h"
#include "delegation.h"
@@ -68,6 +71,9 @@
#include "nfs4session.h"
#include "pnfs.h"
#include "nfs.h"
+#include "netns.h"
+#include "sysfs.h"
+#include "nfs4idmap.h"
#define NFSDBG_FACILITY NFSDBG_VFS
@@ -125,11 +131,7 @@ static void nfs_ssc_unregister_ops(void)
}
#endif /* CONFIG_NFS_V4_2 */
-static struct shrinker acl_shrinker = {
- .count_objects = nfs_access_cache_count,
- .scan_objects = nfs_access_cache_scan,
- .seeks = DEFAULT_SEEKS,
-};
+static struct shrinker *acl_shrinker;
/*
* Register the NFS filesystems
@@ -149,9 +151,18 @@ int __init register_nfs_fs(void)
ret = nfs_register_sysctl();
if (ret < 0)
goto error_2;
- ret = register_shrinker(&acl_shrinker, "nfs-acl");
- if (ret < 0)
+
+ acl_shrinker = shrinker_alloc(0, "nfs-acl");
+ if (!acl_shrinker) {
+ ret = -ENOMEM;
goto error_3;
+ }
+
+ acl_shrinker->count_objects = nfs_access_cache_count;
+ acl_shrinker->scan_objects = nfs_access_cache_scan;
+
+ shrinker_register(acl_shrinker);
+
#ifdef CONFIG_NFS_V4_2
nfs_ssc_register_ops();
#endif
@@ -171,7 +182,7 @@ error_0:
*/
void __exit unregister_nfs_fs(void)
{
- unregister_shrinker(&acl_shrinker);
+ shrinker_free(acl_shrinker);
nfs_unregister_sysctl();
unregister_nfs4_fs();
#ifdef CONFIG_NFS_V4_2
@@ -219,6 +230,7 @@ static int __nfs_list_for_each_server(struct list_head *head,
ret = fn(server, data);
if (ret)
goto out;
+ cond_resched();
rcu_read_lock();
}
rcu_read_unlock();
@@ -442,8 +454,12 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
{ NFS_MOUNT_NONLM, ",nolock", "" },
{ NFS_MOUNT_NOACL, ",noacl", "" },
{ NFS_MOUNT_NORDIRPLUS, ",nordirplus", "" },
+ { NFS_MOUNT_FORCE_RDIRPLUS, ",rdirplus=force", "" },
{ NFS_MOUNT_UNSHARED, ",nosharecache", "" },
{ NFS_MOUNT_NORESVPORT, ",noresvport", "" },
+ { NFS_MOUNT_NETUNREACH_FATAL,
+ ",fatal_neterrors=ENETDOWN:ENETUNREACH",
+ ",fatal_neterrors=none" },
{ 0, NULL, NULL }
};
const struct proc_nfs_info *nfs_infop;
@@ -491,14 +507,32 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
seq_printf(m, ",timeo=%lu", 10U * nfss->client->cl_timeout->to_initval / HZ);
seq_printf(m, ",retrans=%u", nfss->client->cl_timeout->to_retries);
seq_printf(m, ",sec=%s", nfs_pseudoflavour_to_name(nfss->client->cl_auth->au_flavor));
+ switch (clp->cl_xprtsec.policy) {
+ case RPC_XPRTSEC_TLS_ANON:
+ seq_puts(m, ",xprtsec=tls");
+ break;
+ case RPC_XPRTSEC_TLS_X509:
+ seq_puts(m, ",xprtsec=mtls");
+ break;
+ default:
+ break;
+ }
if (version != 4)
nfs_show_mountd_options(m, nfss, showdefaults);
else
nfs_show_nfsv4_options(m, nfss, showdefaults);
- if (nfss->options & NFS_OPTION_FSCACHE)
+ if (nfss->options & NFS_OPTION_FSCACHE) {
+#ifdef CONFIG_NFS_FSCACHE
+ if (nfss->fscache_uniq)
+ seq_printf(m, ",fsc=%s", nfss->fscache_uniq);
+ else
+ seq_puts(m, ",fsc");
+#else
seq_puts(m, ",fsc");
+#endif
+ }
if (nfss->options & NFS_OPTION_MIGRATION)
seq_puts(m, ",migration");
@@ -522,6 +556,9 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
else
seq_puts(m, ",local_lock=posix");
+ if (nfss->flags & NFS_MOUNT_NO_ALIGNWRITE)
+ seq_puts(m, ",noalignwrite");
+
if (nfss->flags & NFS_MOUNT_WRITE_EAGER) {
if (nfss->flags & NFS_MOUNT_WRITE_WAIT)
seq_puts(m, ",write=wait");
@@ -692,10 +729,6 @@ int nfs_show_stats(struct seq_file *m, struct dentry *root)
totals.events[i] += stats->events[i];
for (i = 0; i < __NFSIOS_BYTESMAX; i++)
totals.bytes[i] += stats->bytes[i];
-#ifdef CONFIG_NFS_FSCACHE
- for (i = 0; i < __NFSIOS_FSCACHEMAX; i++)
- totals.fscache[i] += stats->fscache[i];
-#endif
preempt_enable();
}
@@ -706,13 +739,6 @@ int nfs_show_stats(struct seq_file *m, struct dentry *root)
seq_puts(m, "\n\tbytes:\t");
for (i = 0; i < __NFSIOS_BYTESMAX; i++)
seq_printf(m, "%Lu ", totals.bytes[i]);
-#ifdef CONFIG_NFS_FSCACHE
- if (nfss->options & NFS_OPTION_FSCACHE) {
- seq_puts(m, "\n\tfsc:\t");
- for (i = 0; i < __NFSIOS_FSCACHEMAX; i++)
- seq_printf(m, "%Lu ", totals.fscache[i]);
- }
-#endif
seq_putc(m, '\n');
rpc_clnt_show_stats(m, nfss->client);
@@ -864,7 +890,15 @@ static int nfs_request_mount(struct fs_context *fc,
* Now ask the mount server to map our export path
* to a file handle.
*/
- status = nfs_mount(&request, ctx->timeo, ctx->retrans);
+ if ((request.protocol == XPRT_TRANSPORT_UDP) ==
+ !(ctx->flags & NFS_MOUNT_TCP))
+ /*
+ * NFS protocol and mount protocol are both UDP or neither UDP
+ * so timeouts are compatible. Use NFS timeouts for MOUNT
+ */
+ status = nfs_mount(&request, ctx->timeo, ctx->retrans);
+ else
+ status = nfs_mount(&request, NFS_UNSPEC_TIMEO, NFS_UNSPEC_RETRANS);
if (status != 0) {
dfprintk(MOUNT, "NFS: unable to mount server %s, error %d\n",
request.hostname, status);
@@ -885,6 +919,16 @@ static struct nfs_server *nfs_try_mount_request(struct fs_context *fc)
rpc_authflavor_t authlist[NFS_MAX_SECFLAVORS];
unsigned int authlist_len = ARRAY_SIZE(authlist);
+ /* make sure 'nolock'/'lock' override the 'local_lock' mount option */
+ if (ctx->lock_status) {
+ if (ctx->lock_status == NFS_LOCK_NOLOCK) {
+ ctx->flags |= NFS_MOUNT_NONLM;
+ ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL);
+ } else {
+ ctx->flags &= ~NFS_MOUNT_NONLM;
+ ctx->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL);
+ }
+ }
status = nfs_request_mount(fc, ctx->mntfh, authlist, &authlist_len);
if (status)
return ERR_PTR(status);
@@ -1008,6 +1052,16 @@ int nfs_reconfigure(struct fs_context *fc)
sync_filesystem(sb);
/*
+ * The SB_RDONLY flag has been removed from the superblock during
+ * mounts to prevent interference between different filesystems.
+ * Similarly, it is also necessary to ignore the SB_RDONLY flag
+ * during reconfiguration; otherwise, it may also result in the
+ * creation of redundant superblocks when mounting a directory with
+ * different rw and ro flags multiple times.
+ */
+ fc->sb_flags_mask &= ~SB_RDONLY;
+
+ /*
* Userspace mount programs that send binary options generally send
* them populated with default values. We have no way to know which
* ones were explicitly specified. Fall back to legacy behavior and
@@ -1068,7 +1122,7 @@ static void nfs_fill_super(struct super_block *sb, struct nfs_fs_context *ctx)
sb->s_export_op = &nfs_export_ops;
break;
case 4:
- sb->s_flags |= SB_POSIXACL;
+ sb->s_iflags |= SB_I_NOUMASK;
sb->s_time_gran = 1;
sb->s_time_min = S64_MIN;
sb->s_time_max = S64_MAX;
@@ -1088,6 +1142,7 @@ static void nfs_fill_super(struct super_block *sb, struct nfs_fs_context *ctx)
&sb->s_blocksize_bits);
nfs_super_set_maxbytes(sb, server->maxfilesize);
+ nfs_sysfs_move_server_to_sb(sb);
server->has_sec_mnt_opts = ctx->has_sec_mnt_opts;
}
@@ -1128,7 +1183,7 @@ static int nfs_set_super(struct super_block *s, struct fs_context *fc)
struct nfs_server *server = fc->s_fs_info;
int ret;
- s->s_d_op = server->nfs_client->rpc_ops->dentry_ops;
+ set_default_d_op(s, server->nfs_client->rpc_ops->dentry_ops);
ret = set_anon_super(s, server);
if (ret == 0)
server->s_dev = s->s_dev;
@@ -1263,8 +1318,17 @@ int nfs_get_tree_common(struct fs_context *fc)
if (IS_ERR(server))
return PTR_ERR(server);
+ /*
+ * When NFS_MOUNT_UNSHARED is not set, NFS forces the sharing of a
+ * superblock among each filesystem that mounts sub-directories
+ * belonging to a single exported root path.
+ * To prevent interference between different filesystems, the
+ * SB_RDONLY flag should be removed from the superblock.
+ */
if (server->flags & NFS_MOUNT_UNSHARED)
compare_super = NULL;
+ else
+ fc->sb_flags &= ~SB_RDONLY;
/* -o noac implies -o sync */
if (server->flags & NFS_MOUNT_NOAC)
@@ -1274,9 +1338,6 @@ int nfs_get_tree_common(struct fs_context *fc)
if (ctx->clone_data.sb->s_flags & SB_SYNCHRONOUS)
fc->sb_flags |= SB_SYNCHRONOUS;
- if (server->caps & NFS_CAP_SECURITY_LABEL)
- fc->lsm_flags |= SECURITY_LSM_NATIVE_LABELS;
-
/* Get a superblock - note that we may end up sharing one that already exists */
fc->s_fs_info = server;
s = sget_fc(fc, compare_super, nfs_set_super);
@@ -1333,19 +1394,18 @@ error_splat_super:
}
/*
- * Destroy an NFS2/3 superblock
+ * Destroy an NFS superblock
*/
void nfs_kill_super(struct super_block *s)
{
struct nfs_server *server = NFS_SB(s);
- dev_t dev = s->s_dev;
- generic_shutdown_super(s);
+ nfs_sysfs_move_sb_to_server(server);
+ kill_anon_super(s);
nfs_fscache_release_super_cookie(s);
nfs_free_server(server);
- free_anon_bdev(dev);
}
EXPORT_SYMBOL_GPL(nfs_kill_super);
@@ -1366,6 +1426,7 @@ unsigned short max_session_cb_slots = NFS4_DEF_CB_SLOT_TABLE_SIZE;
unsigned short send_implementation_id = 1;
char nfs4_client_id_uniquifier[NFS4_CLIENT_ID_UNIQ_LEN] = "";
bool recover_lost_locks = false;
+short nfs_delay_retrans = -1;
EXPORT_SYMBOL_GPL(nfs_callback_nr_threads);
EXPORT_SYMBOL_GPL(nfs_callback_set_tcpport);
@@ -1376,6 +1437,7 @@ EXPORT_SYMBOL_GPL(max_session_cb_slots);
EXPORT_SYMBOL_GPL(send_implementation_id);
EXPORT_SYMBOL_GPL(nfs4_client_id_uniquifier);
EXPORT_SYMBOL_GPL(recover_lost_locks);
+EXPORT_SYMBOL_GPL(nfs_delay_retrans);
#define NFS_CALLBACK_MAXPORTNR (65535U)
@@ -1424,5 +1486,9 @@ MODULE_PARM_DESC(recover_lost_locks,
"If the server reports that a lock might be lost, "
"try to recover it risking data corruption.");
-
+module_param_named(delay_retrans, nfs_delay_retrans, short, 0644);
+MODULE_PARM_DESC(delay_retrans,
+ "Unless negative, specifies the number of times the NFSv4 "
+ "client retries a request before returning an EAGAIN error, "
+ "after a reply of NFS4ERR_DELAY from the server.");
#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c
index 0e27a2e4e68b..58146e935402 100644
--- a/fs/nfs/symlink.c
+++ b/fs/nfs/symlink.c
@@ -32,47 +32,39 @@ static int nfs_symlink_filler(struct file *file, struct folio *folio)
int error;
error = NFS_PROTO(inode)->readlink(inode, &folio->page, 0, PAGE_SIZE);
- if (error < 0)
- goto error;
- folio_mark_uptodate(folio);
- folio_unlock(folio);
- return 0;
-
-error:
- folio_set_error(folio);
- folio_unlock(folio);
- return -EIO;
+ folio_end_read(folio, error == 0);
+ return error;
}
static const char *nfs_get_link(struct dentry *dentry,
struct inode *inode,
struct delayed_call *done)
{
- struct page *page;
+ struct folio *folio;
void *err;
if (!dentry) {
err = ERR_PTR(nfs_revalidate_mapping_rcu(inode));
if (err)
return err;
- page = find_get_page(inode->i_mapping, 0);
- if (!page)
+ folio = filemap_get_folio(inode->i_mapping, 0);
+ if (IS_ERR(folio))
return ERR_PTR(-ECHILD);
- if (!PageUptodate(page)) {
- put_page(page);
+ if (!folio_test_uptodate(folio)) {
+ folio_put(folio);
return ERR_PTR(-ECHILD);
}
} else {
err = ERR_PTR(nfs_revalidate_mapping(inode, inode->i_mapping));
if (err)
return err;
- page = read_cache_page(&inode->i_data, 0, nfs_symlink_filler,
+ folio = read_cache_folio(&inode->i_data, 0, nfs_symlink_filler,
NULL);
- if (IS_ERR(page))
- return ERR_CAST(page);
+ if (IS_ERR(folio))
+ return ERR_CAST(folio);
}
- set_delayed_call(done, page_put_link, page);
- return page_address(page);
+ set_delayed_call(done, page_put_link, folio);
+ return folio_address(folio);
}
/*
diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c
index 7aea195ddb35..f579df0e8d67 100644
--- a/fs/nfs/sysctl.c
+++ b/fs/nfs/sysctl.c
@@ -14,7 +14,7 @@
static struct ctl_table_header *nfs_callback_sysctl_table;
-static struct ctl_table nfs_cb_sysctls[] = {
+static const struct ctl_table nfs_cb_sysctls[] = {
{
.procname = "nfs_mountpoint_timeout",
.data = &nfs_mountpoint_expiry_timeout,
@@ -29,30 +29,11 @@ static struct ctl_table nfs_cb_sysctls[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- { }
-};
-
-static struct ctl_table nfs_cb_sysctl_dir[] = {
- {
- .procname = "nfs",
- .mode = 0555,
- .child = nfs_cb_sysctls,
- },
- { }
-};
-
-static struct ctl_table nfs_cb_sysctl_root[] = {
- {
- .procname = "fs",
- .mode = 0555,
- .child = nfs_cb_sysctl_dir,
- },
- { }
};
int nfs_register_sysctl(void)
{
- nfs_callback_sysctl_table = register_sysctl_table(nfs_cb_sysctl_root);
+ nfs_callback_sysctl_table = register_sysctl("fs/nfs", nfs_cb_sysctls);
if (nfs_callback_sysctl_table == NULL)
return -ENOMEM;
return 0;
diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c
index 0cbcd2dfa732..ea6e6168092b 100644
--- a/fs/nfs/sysfs.c
+++ b/fs/nfs/sysfs.c
@@ -12,17 +12,19 @@
#include <linux/string.h>
#include <linux/nfs_fs.h>
#include <linux/rcupdate.h>
+#include <linux/lockd/lockd.h>
+#include "internal.h"
#include "nfs4_fs.h"
#include "netns.h"
#include "sysfs.h"
-struct kobject *nfs_client_kobj;
-static struct kset *nfs_client_kset;
+static struct kset *nfs_kset;
-static void nfs_netns_object_release(struct kobject *kobj)
+static void nfs_kset_release(struct kobject *kobj)
{
- kfree(kobj);
+ struct kset *kset = container_of(kobj, struct kset, kobj);
+ kfree(kset);
}
static const struct kobj_ns_type_operations *nfs_netns_object_child_ns_type(
@@ -31,46 +33,42 @@ static const struct kobj_ns_type_operations *nfs_netns_object_child_ns_type(
return &net_ns_type_operations;
}
-static struct kobj_type nfs_netns_object_type = {
- .release = nfs_netns_object_release,
+static struct kobj_type nfs_kset_type = {
+ .release = nfs_kset_release,
.sysfs_ops = &kobj_sysfs_ops,
.child_ns_type = nfs_netns_object_child_ns_type,
};
-static struct kobject *nfs_netns_object_alloc(const char *name,
- struct kset *kset, struct kobject *parent)
+int nfs_sysfs_init(void)
{
- struct kobject *kobj;
+ int ret;
- kobj = kzalloc(sizeof(*kobj), GFP_KERNEL);
- if (kobj) {
- kobj->kset = kset;
- if (kobject_init_and_add(kobj, &nfs_netns_object_type,
- parent, "%s", name) == 0)
- return kobj;
- kobject_put(kobj);
+ nfs_kset = kzalloc(sizeof(*nfs_kset), GFP_KERNEL);
+ if (!nfs_kset)
+ return -ENOMEM;
+
+ ret = kobject_set_name(&nfs_kset->kobj, "nfs");
+ if (ret) {
+ kfree(nfs_kset);
+ return ret;
}
- return NULL;
-}
-int nfs_sysfs_init(void)
-{
- nfs_client_kset = kset_create_and_add("nfs", NULL, fs_kobj);
- if (!nfs_client_kset)
- return -ENOMEM;
- nfs_client_kobj = nfs_netns_object_alloc("net", nfs_client_kset, NULL);
- if (!nfs_client_kobj) {
- kset_unregister(nfs_client_kset);
- nfs_client_kset = NULL;
- return -ENOMEM;
+ nfs_kset->kobj.parent = fs_kobj;
+ nfs_kset->kobj.ktype = &nfs_kset_type;
+ nfs_kset->kobj.kset = NULL;
+
+ ret = kset_register(nfs_kset);
+ if (ret) {
+ kfree(nfs_kset);
+ return ret;
}
+
return 0;
}
void nfs_sysfs_exit(void)
{
- kobject_put(nfs_client_kobj);
- kset_unregister(nfs_client_kset);
+ kset_unregister(nfs_kset);
}
static ssize_t nfs_netns_identifier_show(struct kobject *kobj,
@@ -127,7 +125,6 @@ static void nfs_netns_client_release(struct kobject *kobj)
kobject);
kfree(rcu_dereference_raw(c->identifier));
- kfree(c);
}
static const void *nfs_netns_client_namespace(const struct kobject *kobj)
@@ -151,6 +148,25 @@ static struct kobj_type nfs_netns_client_type = {
.namespace = nfs_netns_client_namespace,
};
+static void nfs_netns_object_release(struct kobject *kobj)
+{
+ struct nfs_netns_client *c = container_of(kobj,
+ struct nfs_netns_client,
+ nfs_net_kobj);
+ kfree(c);
+}
+
+static const void *nfs_netns_namespace(const struct kobject *kobj)
+{
+ return container_of(kobj, struct nfs_netns_client, nfs_net_kobj)->net;
+}
+
+static struct kobj_type nfs_netns_object_type = {
+ .release = nfs_netns_object_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .namespace = nfs_netns_namespace,
+};
+
static struct nfs_netns_client *nfs_netns_client_alloc(struct kobject *parent,
struct net *net)
{
@@ -159,11 +175,21 @@ static struct nfs_netns_client *nfs_netns_client_alloc(struct kobject *parent,
p = kzalloc(sizeof(*p), GFP_KERNEL);
if (p) {
p->net = net;
- p->kobject.kset = nfs_client_kset;
+ p->kobject.kset = nfs_kset;
+ p->nfs_net_kobj.kset = nfs_kset;
+
+ if (kobject_init_and_add(&p->nfs_net_kobj, &nfs_netns_object_type,
+ parent, "net") != 0) {
+ kobject_put(&p->nfs_net_kobj);
+ return NULL;
+ }
+
if (kobject_init_and_add(&p->kobject, &nfs_netns_client_type,
- parent, "nfs_client") == 0)
+ &p->nfs_net_kobj, "nfs_client") == 0)
return p;
+
kobject_put(&p->kobject);
+ kobject_put(&p->nfs_net_kobj);
}
return NULL;
}
@@ -172,7 +198,7 @@ void nfs_netns_sysfs_setup(struct nfs_net *netns, struct net *net)
{
struct nfs_netns_client *clp;
- clp = nfs_netns_client_alloc(nfs_client_kobj, net);
+ clp = nfs_netns_client_alloc(&nfs_kset->kobj, net);
if (clp) {
netns->nfs_client = clp;
kobject_uevent(&clp->kobject, KOBJ_ADD);
@@ -187,6 +213,258 @@ void nfs_netns_sysfs_destroy(struct nfs_net *netns)
kobject_uevent(&clp->kobject, KOBJ_REMOVE);
kobject_del(&clp->kobject);
kobject_put(&clp->kobject);
+ kobject_del(&clp->nfs_net_kobj);
+ kobject_put(&clp->nfs_net_kobj);
netns->nfs_client = NULL;
}
}
+
+static bool shutdown_match_client(const struct rpc_task *task, const void *data)
+{
+ return true;
+}
+
+static void shutdown_client(struct rpc_clnt *clnt)
+{
+ clnt->cl_shutdown = 1;
+ rpc_cancel_tasks(clnt, -EIO, shutdown_match_client, NULL);
+}
+
+/*
+ * Shut down the nfs_client only once all the superblocks
+ * have been shut down.
+ */
+static void shutdown_nfs_client(struct nfs_client *clp)
+{
+ struct nfs_server *server;
+ rcu_read_lock();
+ list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+ if (!(server->flags & NFS_MOUNT_SHUTDOWN)) {
+ rcu_read_unlock();
+ return;
+ }
+ }
+ rcu_read_unlock();
+ nfs_mark_client_ready(clp, -EIO);
+ shutdown_client(clp->cl_rpcclient);
+}
+
+static ssize_t
+shutdown_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct nfs_server *server = container_of(kobj, struct nfs_server, kobj);
+ bool shutdown = server->flags & NFS_MOUNT_SHUTDOWN;
+ return sysfs_emit(buf, "%d\n", shutdown);
+}
+
+static ssize_t
+shutdown_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct nfs_server *server;
+ int ret, val;
+
+ server = container_of(kobj, struct nfs_server, kobj);
+
+ ret = kstrtoint(buf, 0, &val);
+ if (ret)
+ return ret;
+
+ if (val != 1)
+ return -EINVAL;
+
+ /* already shut down? */
+ if (server->flags & NFS_MOUNT_SHUTDOWN)
+ goto out;
+
+ server->flags |= NFS_MOUNT_SHUTDOWN;
+ shutdown_client(server->client);
+
+ if (!IS_ERR(server->client_acl))
+ shutdown_client(server->client_acl);
+
+ if (server->nlm_host)
+ shutdown_client(server->nlm_host->h_rpcclnt);
+out:
+ shutdown_nfs_client(server->nfs_client);
+ return count;
+}
+
+static struct kobj_attribute nfs_sysfs_attr_shutdown = __ATTR_RW(shutdown);
+
+#if IS_ENABLED(CONFIG_NFS_V4_1)
+static ssize_t
+implid_domain_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct nfs_server *server = container_of(kobj, struct nfs_server, kobj);
+ struct nfs41_impl_id *impl_id = server->nfs_client->cl_implid;
+
+ if (!impl_id || strlen(impl_id->domain) == 0)
+ return 0; //sysfs_emit(buf, "");
+ return sysfs_emit(buf, "%s\n", impl_id->domain);
+}
+
+static struct kobj_attribute nfs_sysfs_attr_implid_domain = __ATTR_RO(implid_domain);
+
+
+static ssize_t
+implid_name_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct nfs_server *server = container_of(kobj, struct nfs_server, kobj);
+ struct nfs41_impl_id *impl_id = server->nfs_client->cl_implid;
+
+ if (!impl_id || strlen(impl_id->name) == 0)
+ return 0; //sysfs_emit(buf, "");
+ return sysfs_emit(buf, "%s\n", impl_id->name);
+}
+
+static struct kobj_attribute nfs_sysfs_attr_implid_name = __ATTR_RO(implid_name);
+
+#endif /* IS_ENABLED(CONFIG_NFS_V4_1) */
+
+#define RPC_CLIENT_NAME_SIZE 64
+
+void nfs_sysfs_link_rpc_client(struct nfs_server *server,
+ struct rpc_clnt *clnt, const char *uniq)
+{
+ char name[RPC_CLIENT_NAME_SIZE];
+ int ret;
+
+ strscpy(name, clnt->cl_program->name, sizeof(name));
+ strncat(name, uniq ? uniq : "", sizeof(name) - strlen(name) - 1);
+ strncat(name, "_client", sizeof(name) - strlen(name) - 1);
+
+ ret = sysfs_create_link_nowarn(&server->kobj,
+ &clnt->cl_sysfs->kobject, name);
+ if (ret < 0)
+ pr_warn("NFS: can't create link to %s in sysfs (%d)\n",
+ name, ret);
+}
+EXPORT_SYMBOL_GPL(nfs_sysfs_link_rpc_client);
+
+static void nfs_sysfs_sb_release(struct kobject *kobj)
+{
+ /* no-op: why? see lib/kobject.c kobject_cleanup() */
+}
+
+static const void *nfs_netns_server_namespace(const struct kobject *kobj)
+{
+ return container_of(kobj, struct nfs_server, kobj)->nfs_client->cl_net;
+}
+
+static struct kobj_type nfs_sb_ktype = {
+ .release = nfs_sysfs_sb_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .namespace = nfs_netns_server_namespace,
+ .child_ns_type = nfs_netns_object_child_ns_type,
+};
+
+#if IS_ENABLED(CONFIG_NFS_V4_1)
+static void nfs_sysfs_add_nfsv41_server(struct nfs_server *server)
+{
+ int ret;
+
+ if (!server->nfs_client->cl_implid)
+ return;
+
+ ret = sysfs_create_file_ns(&server->kobj, &nfs_sysfs_attr_implid_domain.attr,
+ nfs_netns_server_namespace(&server->kobj));
+ if (ret < 0)
+ pr_warn("NFS: sysfs_create_file_ns for server-%d failed (%d)\n",
+ server->s_sysfs_id, ret);
+
+ ret = sysfs_create_file_ns(&server->kobj, &nfs_sysfs_attr_implid_name.attr,
+ nfs_netns_server_namespace(&server->kobj));
+ if (ret < 0)
+ pr_warn("NFS: sysfs_create_file_ns for server-%d failed (%d)\n",
+ server->s_sysfs_id, ret);
+}
+#else /* CONFIG_NFS_V4_1 */
+static inline void nfs_sysfs_add_nfsv41_server(struct nfs_server *server)
+{
+}
+#endif /* CONFIG_NFS_V4_1 */
+
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+
+static ssize_t
+localio_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct nfs_server *server = container_of(kobj, struct nfs_server, kobj);
+ bool localio = nfs_server_is_local(server->nfs_client);
+ return sysfs_emit(buf, "%d\n", localio);
+}
+
+static struct kobj_attribute nfs_sysfs_attr_localio = __ATTR_RO(localio);
+
+static void nfs_sysfs_add_nfs_localio_server(struct nfs_server *server)
+{
+ int ret = sysfs_create_file_ns(&server->kobj, &nfs_sysfs_attr_localio.attr,
+ nfs_netns_server_namespace(&server->kobj));
+ if (ret < 0)
+ pr_warn("NFS: sysfs_create_file_ns for server-%d failed (%d)\n",
+ server->s_sysfs_id, ret);
+}
+#else
+static inline void nfs_sysfs_add_nfs_localio_server(struct nfs_server *server)
+{
+}
+#endif /* IS_ENABLED(CONFIG_NFS_LOCALIO) */
+
+void nfs_sysfs_add_server(struct nfs_server *server)
+{
+ int ret;
+
+ ret = kobject_init_and_add(&server->kobj, &nfs_sb_ktype,
+ &nfs_kset->kobj, "server-%d", server->s_sysfs_id);
+ if (ret < 0) {
+ pr_warn("NFS: nfs sysfs add server-%d failed (%d)\n",
+ server->s_sysfs_id, ret);
+ return;
+ }
+ ret = sysfs_create_file_ns(&server->kobj, &nfs_sysfs_attr_shutdown.attr,
+ nfs_netns_server_namespace(&server->kobj));
+ if (ret < 0)
+ pr_warn("NFS: sysfs_create_file_ns for server-%d failed (%d)\n",
+ server->s_sysfs_id, ret);
+
+ nfs_sysfs_add_nfsv41_server(server);
+ nfs_sysfs_add_nfs_localio_server(server);
+}
+EXPORT_SYMBOL_GPL(nfs_sysfs_add_server);
+
+void nfs_sysfs_move_server_to_sb(struct super_block *s)
+{
+ struct nfs_server *server = s->s_fs_info;
+ int ret;
+
+ ret = kobject_rename(&server->kobj, s->s_id);
+ if (ret < 0)
+ pr_warn("NFS: rename sysfs %s failed (%d)\n",
+ server->kobj.name, ret);
+}
+
+void nfs_sysfs_move_sb_to_server(struct nfs_server *server)
+{
+ const char *s;
+ int ret = -ENOMEM;
+
+ s = kasprintf(GFP_KERNEL, "server-%d", server->s_sysfs_id);
+ if (s) {
+ ret = kobject_rename(&server->kobj, s);
+ kfree(s);
+ }
+ if (ret < 0)
+ pr_warn("NFS: rename sysfs %s failed (%d)\n",
+ server->kobj.name, ret);
+}
+
+/* unlink, not dec-ref */
+void nfs_sysfs_remove_server(struct nfs_server *server)
+{
+ kobject_del(&server->kobj);
+}
diff --git a/fs/nfs/sysfs.h b/fs/nfs/sysfs.h
index 5501ef573c32..c5d1990cade5 100644
--- a/fs/nfs/sysfs.h
+++ b/fs/nfs/sysfs.h
@@ -10,11 +10,12 @@
struct nfs_netns_client {
struct kobject kobject;
+ struct kobject nfs_net_kobj;
struct net *net;
const char __rcu *identifier;
};
-extern struct kobject *nfs_client_kobj;
+extern struct kobject *nfs_net_kobj;
extern int nfs_sysfs_init(void);
extern void nfs_sysfs_exit(void);
@@ -22,4 +23,11 @@ extern void nfs_sysfs_exit(void);
void nfs_netns_sysfs_setup(struct nfs_net *netns, struct net *net);
void nfs_netns_sysfs_destroy(struct nfs_net *netns);
+void nfs_sysfs_link_rpc_client(struct nfs_server *server,
+ struct rpc_clnt *clnt, const char *sysfs_prefix);
+void nfs_sysfs_add_server(struct nfs_server *s);
+void nfs_sysfs_move_server_to_sb(struct super_block *s);
+void nfs_sysfs_move_sb_to_server(struct nfs_server *s);
+void nfs_sysfs_remove_server(struct nfs_server *s);
+
#endif
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 150a953a8be9..b55467911648 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -232,6 +232,8 @@ nfs_complete_unlink(struct dentry *dentry, struct inode *inode)
dentry->d_fsdata = NULL;
spin_unlock(&dentry->d_lock);
+ NFS_PROTO(inode)->return_delegation(inode);
+
if (NFS_STALE(inode) || !nfs_call_unlink(dentry, inode, data))
nfs_free_unlinkdata(data);
}
@@ -267,7 +269,7 @@ static void nfs_async_rename_done(struct rpc_task *task, void *calldata)
struct inode *new_dir = data->new_dir;
struct dentry *old_dentry = data->old_dentry;
- trace_nfs_sillyrename_rename(old_dir, old_dentry,
+ trace_nfs_async_rename_done(old_dir, old_dentry,
new_dir, data->new_dentry, task->tk_status);
if (!NFS_PROTO(old_dir)->rename_done(task, old_dir, new_dir)) {
rpc_restart_call_prepare(task);
@@ -462,18 +464,17 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
sdentry = NULL;
do {
- int slen;
dput(sdentry);
sillycounter++;
- slen = scnprintf(silly, sizeof(silly),
- SILLYNAME_PREFIX "%0*llx%0*x",
- SILLYNAME_FILEID_LEN, fileid,
- SILLYNAME_COUNTER_LEN, sillycounter);
+ scnprintf(silly, sizeof(silly),
+ SILLYNAME_PREFIX "%0*llx%0*x",
+ SILLYNAME_FILEID_LEN, fileid,
+ SILLYNAME_COUNTER_LEN, sillycounter);
dfprintk(VFS, "NFS: trying to rename %pd to %s\n",
dentry, silly);
- sdentry = lookup_one_len(silly, dentry->d_parent, slen);
+ sdentry = lookup_noperm(&QSTR(silly), dentry->d_parent);
/*
* N.B. Better to return EBUSY here ... it could be
* dangerous to delete the file while it's in use.
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 80c240e50952..336c510f3750 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -25,6 +25,7 @@
#include <linux/freezer.h>
#include <linux/wait.h>
#include <linux/iversion.h>
+#include <linux/filelock.h>
#include <linux/uaccess.h>
#include <linux/sched/mm.h>
@@ -58,12 +59,10 @@ static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;
static const struct nfs_commit_completion_ops nfs_commit_completion_ops;
static const struct nfs_rw_ops nfs_rw_write_ops;
static void nfs_inode_remove_request(struct nfs_page *req);
-static void nfs_clear_request_commit(struct nfs_page *req);
+static void nfs_clear_request_commit(struct nfs_commit_info *cinfo,
+ struct nfs_page *req);
static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
struct inode *inode);
-static struct nfs_page *
-nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi,
- struct page *page);
static struct kmem_cache *nfs_wdata_cachep;
static mempool_t *nfs_wdata_mempool;
@@ -154,134 +153,51 @@ nfs_page_set_inode_ref(struct nfs_page *req, struct inode *inode)
}
}
-static int
-nfs_cancel_remove_inode(struct nfs_page *req, struct inode *inode)
+static void nfs_cancel_remove_inode(struct nfs_page *req, struct inode *inode)
{
- int ret;
-
- if (!test_bit(PG_REMOVE, &req->wb_flags))
- return 0;
- ret = nfs_page_group_lock(req);
- if (ret)
- return ret;
if (test_and_clear_bit(PG_REMOVE, &req->wb_flags))
nfs_page_set_inode_ref(req, inode);
- nfs_page_group_unlock(req);
- return 0;
}
-static struct nfs_page *
-nfs_page_private_request(struct page *page)
-{
- if (!PagePrivate(page))
- return NULL;
- return (struct nfs_page *)page_private(page);
-}
-
-/*
- * nfs_page_find_head_request_locked - find head request associated with @page
+/**
+ * nfs_folio_find_head_request - find head request associated with a folio
+ * @folio: pointer to folio
*
* must be called while holding the inode lock.
*
* returns matching head request with reference held, or NULL if not found.
*/
-static struct nfs_page *
-nfs_page_find_private_request(struct page *page)
+static struct nfs_page *nfs_folio_find_head_request(struct folio *folio)
{
- struct address_space *mapping = page_file_mapping(page);
+ struct address_space *mapping = folio->mapping;
struct nfs_page *req;
- if (!PagePrivate(page))
+ if (!folio_test_private(folio))
return NULL;
- spin_lock(&mapping->private_lock);
- req = nfs_page_private_request(page);
+ spin_lock(&mapping->i_private_lock);
+ req = folio->private;
if (req) {
WARN_ON_ONCE(req->wb_head != req);
kref_get(&req->wb_kref);
}
- spin_unlock(&mapping->private_lock);
- return req;
-}
-
-static struct nfs_page *
-nfs_page_find_swap_request(struct page *page)
-{
- struct inode *inode = page_file_mapping(page)->host;
- struct nfs_inode *nfsi = NFS_I(inode);
- struct nfs_page *req = NULL;
- if (!PageSwapCache(page))
- return NULL;
- mutex_lock(&nfsi->commit_mutex);
- if (PageSwapCache(page)) {
- req = nfs_page_search_commits_for_head_request_locked(nfsi,
- page);
- if (req) {
- WARN_ON_ONCE(req->wb_head != req);
- kref_get(&req->wb_kref);
- }
- }
- mutex_unlock(&nfsi->commit_mutex);
- return req;
-}
-
-/*
- * nfs_page_find_head_request - find head request associated with @page
- *
- * returns matching head request with reference held, or NULL if not found.
- */
-static struct nfs_page *nfs_page_find_head_request(struct page *page)
-{
- struct nfs_page *req;
-
- req = nfs_page_find_private_request(page);
- if (!req)
- req = nfs_page_find_swap_request(page);
+ spin_unlock(&mapping->i_private_lock);
return req;
}
-static struct nfs_page *nfs_find_and_lock_page_request(struct page *page)
-{
- struct inode *inode = page_file_mapping(page)->host;
- struct nfs_page *req, *head;
- int ret;
-
- for (;;) {
- req = nfs_page_find_head_request(page);
- if (!req)
- return req;
- head = nfs_page_group_lock_head(req);
- if (head != req)
- nfs_release_request(req);
- if (IS_ERR(head))
- return head;
- ret = nfs_cancel_remove_inode(head, inode);
- if (ret < 0) {
- nfs_unlock_and_release_request(head);
- return ERR_PTR(ret);
- }
- /* Ensure that nobody removed the request before we locked it */
- if (head == nfs_page_private_request(page))
- break;
- if (PageSwapCache(page))
- break;
- nfs_unlock_and_release_request(head);
- }
- return head;
-}
-
/* Adjust the file length if we're writing beyond the end */
-static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count)
+static void nfs_grow_file(struct folio *folio, unsigned int offset,
+ unsigned int count)
{
- struct inode *inode = page_file_mapping(page)->host;
+ struct inode *inode = folio->mapping->host;
loff_t end, i_size;
pgoff_t end_index;
spin_lock(&inode->i_lock);
i_size = i_size_read(inode);
- end_index = (i_size - 1) >> PAGE_SHIFT;
- if (i_size > 0 && page_index(page) < end_index)
+ end_index = ((i_size - 1) >> folio_shift(folio)) << folio_order(folio);
+ if (i_size > 0 && folio->index < end_index)
goto out;
- end = page_file_offset(page) + ((loff_t)offset+count);
+ end = folio_pos(folio) + (loff_t)offset + (loff_t)count;
if (i_size >= end)
goto out;
trace_nfs_size_grow(inode, end);
@@ -289,6 +205,8 @@ static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int c
NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_SIZE;
nfs_inc_stats(inode, NFSIOS_EXTENDWRITE);
out:
+ /* Atomically update timestamps if they are delegated to us. */
+ nfs_update_delegated_mtime_locked(inode);
spin_unlock(&inode->i_lock);
nfs_fscache_invalidate(inode, 0);
}
@@ -307,11 +225,10 @@ static void nfs_set_pageerror(struct address_space *mapping)
spin_unlock(&inode->i_lock);
}
-static void nfs_mapping_set_error(struct page *page, int error)
+static void nfs_mapping_set_error(struct folio *folio, int error)
{
- struct address_space *mapping = page_file_mapping(page);
+ struct address_space *mapping = folio->mapping;
- SetPageError(page);
filemap_set_wb_err(mapping, error);
if (mapping->host)
errseq_set(&mapping->host->i_sb->s_wb_err,
@@ -320,59 +237,17 @@ static void nfs_mapping_set_error(struct page *page, int error)
}
/*
- * nfs_page_group_search_locked
- * @head - head request of page group
- * @page_offset - offset into page
- *
- * Search page group with head @head to find a request that contains the
- * page offset @page_offset.
- *
- * Returns a pointer to the first matching nfs request, or NULL if no
- * match is found.
- *
- * Must be called with the page group lock held
- */
-static struct nfs_page *
-nfs_page_group_search_locked(struct nfs_page *head, unsigned int page_offset)
-{
- struct nfs_page *req;
-
- req = head;
- do {
- if (page_offset >= req->wb_pgbase &&
- page_offset < (req->wb_pgbase + req->wb_bytes))
- return req;
-
- req = req->wb_this_page;
- } while (req != head);
-
- return NULL;
-}
-
-/*
- * nfs_page_group_covers_page
- * @head - head request of page group
+ * nfs_page_covers_folio
+ * @req: struct nfs_page
*
- * Return true if the page group with head @head covers the whole page,
- * returns false otherwise
+ * Return true if the request covers the whole folio.
+ * Note that the caller should ensure all subrequests have been joined
*/
static bool nfs_page_group_covers_page(struct nfs_page *req)
{
- struct nfs_page *tmp;
- unsigned int pos = 0;
- unsigned int len = nfs_page_length(req->wb_page);
+ unsigned int len = nfs_folio_length(nfs_page_to_folio(req));
- nfs_page_group_lock(req);
-
- for (;;) {
- tmp = nfs_page_group_search_locked(req->wb_head, pos);
- if (!tmp)
- break;
- pos = tmp->wb_pgbase + tmp->wb_bytes;
- }
-
- nfs_page_group_unlock(req);
- return pos >= len;
+ return req->wb_pgbase == 0 && req->wb_bytes == len;
}
/* We can set the PG_uptodate flag if we see that a write request
@@ -380,11 +255,13 @@ static bool nfs_page_group_covers_page(struct nfs_page *req)
*/
static void nfs_mark_uptodate(struct nfs_page *req)
{
- if (PageUptodate(req->wb_page))
+ struct folio *folio = nfs_page_to_folio(req);
+
+ if (folio_test_uptodate(folio))
return;
if (!nfs_page_group_covers_page(req))
return;
- SetPageUptodate(req->wb_page);
+ folio_mark_uptodate(folio);
}
static int wb_priority(struct writeback_control *wbc)
@@ -406,33 +283,34 @@ int nfs_congestion_kb;
#define NFS_CONGESTION_OFF_THRESH \
(NFS_CONGESTION_ON_THRESH - (NFS_CONGESTION_ON_THRESH >> 2))
-static void nfs_set_page_writeback(struct page *page)
+static void nfs_folio_set_writeback(struct folio *folio)
{
- struct inode *inode = page_file_mapping(page)->host;
- struct nfs_server *nfss = NFS_SERVER(inode);
- int ret = test_set_page_writeback(page);
-
- WARN_ON_ONCE(ret != 0);
+ struct nfs_server *nfss = NFS_SERVER(folio->mapping->host);
- if (atomic_long_inc_return(&nfss->writeback) >
- NFS_CONGESTION_ON_THRESH)
+ folio_start_writeback(folio);
+ if (atomic_long_inc_return(&nfss->writeback) > NFS_CONGESTION_ON_THRESH)
nfss->write_congested = 1;
}
-static void nfs_end_page_writeback(struct nfs_page *req)
+static void nfs_folio_end_writeback(struct folio *folio)
{
- struct inode *inode = page_file_mapping(req->wb_page)->host;
- struct nfs_server *nfss = NFS_SERVER(inode);
- bool is_done;
+ struct nfs_server *nfss = NFS_SERVER(folio->mapping->host);
- is_done = nfs_page_group_sync_on_bit(req, PG_WB_END);
- nfs_unlock_request(req);
- if (!is_done)
- return;
-
- end_page_writeback(req->wb_page);
- if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
+ folio_end_writeback_no_dropbehind(folio);
+ if (atomic_long_dec_return(&nfss->writeback) <
+ NFS_CONGESTION_OFF_THRESH) {
nfss->write_congested = 0;
+ wake_up_all(&nfss->write_congestion_wait);
+ }
+}
+
+static void nfs_page_end_writeback(struct nfs_page *req)
+{
+ if (nfs_page_group_sync_on_bit(req, PG_WB_END)) {
+ nfs_unlock_request(req);
+ nfs_folio_end_writeback(nfs_page_to_folio(req));
+ } else
+ nfs_unlock_request(req);
}
/*
@@ -502,8 +380,8 @@ nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list,
* the (former) group. All subrequests are removed from any write or commit
* lists, unlinked from the group and destroyed.
*/
-void
-nfs_join_page_group(struct nfs_page *head, struct inode *inode)
+void nfs_join_page_group(struct nfs_page *head, struct nfs_commit_info *cinfo,
+ struct inode *inode)
{
struct nfs_page *subreq;
struct nfs_page *destroy_list = NULL;
@@ -533,7 +411,7 @@ nfs_join_page_group(struct nfs_page *head, struct inode *inode)
* Commit list removal accounting is done after locks are dropped */
subreq = head;
do {
- nfs_clear_request_commit(subreq);
+ nfs_clear_request_commit(cinfo, subreq);
subreq = subreq->wb_this_page;
} while (subreq != head);
@@ -547,9 +425,77 @@ nfs_join_page_group(struct nfs_page *head, struct inode *inode)
nfs_destroy_unlinked_subrequests(destroy_list, head, inode);
}
+/**
+ * nfs_wait_on_request - Wait for a request to complete.
+ * @req: request to wait upon.
+ *
+ * Interruptible by fatal signals only.
+ * The user is responsible for holding a count on the request.
+ */
+static int nfs_wait_on_request(struct nfs_page *req)
+{
+ if (!test_bit(PG_BUSY, &req->wb_flags))
+ return 0;
+ set_bit(PG_CONTENDED2, &req->wb_flags);
+ smp_mb__after_atomic();
+ return wait_on_bit_io(&req->wb_flags, PG_BUSY,
+ TASK_UNINTERRUPTIBLE);
+}
+
+/*
+ * nfs_unroll_locks - unlock all newly locked reqs and wait on @req
+ * @head: head request of page group, must be holding head lock
+ * @req: request that couldn't lock and needs to wait on the req bit lock
+ *
+ * This is a helper function for nfs_lock_and_join_requests
+ * returns 0 on success, < 0 on error.
+ */
+static void
+nfs_unroll_locks(struct nfs_page *head, struct nfs_page *req)
+{
+ struct nfs_page *tmp;
+
+ /* relinquish all the locks successfully grabbed this run */
+ for (tmp = head->wb_this_page ; tmp != req; tmp = tmp->wb_this_page) {
+ if (!kref_read(&tmp->wb_kref))
+ continue;
+ nfs_unlock_and_release_request(tmp);
+ }
+}
+
+/*
+ * nfs_page_group_lock_subreq - try to lock a subrequest
+ * @head: head request of page group
+ * @subreq: request to lock
+ *
+ * This is a helper function for nfs_lock_and_join_requests which
+ * must be called with the head request and page group both locked.
+ * On error, it returns with the page group unlocked.
+ */
+static int
+nfs_page_group_lock_subreq(struct nfs_page *head, struct nfs_page *subreq)
+{
+ int ret;
+
+ if (!kref_get_unless_zero(&subreq->wb_kref))
+ return 0;
+ while (!nfs_lock_request(subreq)) {
+ nfs_page_group_unlock(head);
+ ret = nfs_wait_on_request(subreq);
+ if (!ret)
+ ret = nfs_page_group_lock(head);
+ if (ret < 0) {
+ nfs_unroll_locks(head, subreq);
+ nfs_release_request(subreq);
+ return ret;
+ }
+ }
+ return 0;
+}
+
/*
* nfs_lock_and_join_requests - join all subreqs to the head req
- * @page: the page used to lookup the "page group" of nfs_page structures
+ * @folio: the folio used to lookup the "page group" of nfs_page structures
*
* This function joins all sub requests to the head request by first
* locking all requests in the group, cancelling any pending operations
@@ -559,14 +505,14 @@ nfs_join_page_group(struct nfs_page *head, struct inode *inode)
*
* Returns a locked, referenced pointer to the head request - which after
* this call is guaranteed to be the only request associated with the page.
- * Returns NULL if no requests are found for @page, or a ERR_PTR if an
+ * Returns NULL if no requests are found for @folio, or a ERR_PTR if an
* error was encountered.
*/
-static struct nfs_page *
-nfs_lock_and_join_requests(struct page *page)
+static struct nfs_page *nfs_lock_and_join_requests(struct folio *folio)
{
- struct inode *inode = page_file_mapping(page)->host;
- struct nfs_page *head;
+ struct inode *inode = folio->mapping->host;
+ struct nfs_page *head, *subreq;
+ struct nfs_commit_info cinfo;
int ret;
/*
@@ -574,29 +520,58 @@ nfs_lock_and_join_requests(struct page *page)
* reference to the whole page group - the group will not be destroyed
* until the head reference is released.
*/
- head = nfs_find_and_lock_page_request(page);
- if (IS_ERR_OR_NULL(head))
- return head;
+retry:
+ head = nfs_folio_find_head_request(folio);
+ if (!head)
+ return NULL;
- /* lock each request in the page group */
- ret = nfs_page_group_lock_subrequests(head);
- if (ret < 0) {
+ while (!nfs_lock_request(head)) {
+ ret = nfs_wait_on_request(head);
+ if (ret < 0) {
+ nfs_release_request(head);
+ return ERR_PTR(ret);
+ }
+ }
+
+ ret = nfs_page_group_lock(head);
+ if (ret < 0)
+ goto out_unlock;
+
+ /* Ensure that nobody removed the request before we locked it */
+ if (head != folio->private) {
+ nfs_page_group_unlock(head);
nfs_unlock_and_release_request(head);
- return ERR_PTR(ret);
+ goto retry;
}
- nfs_join_page_group(head, inode);
+ nfs_cancel_remove_inode(head, inode);
+ /* lock each request in the page group */
+ for (subreq = head->wb_this_page;
+ subreq != head;
+ subreq = subreq->wb_this_page) {
+ ret = nfs_page_group_lock_subreq(head, subreq);
+ if (ret < 0)
+ goto out_unlock;
+ }
+
+ nfs_page_group_unlock(head);
+
+ nfs_init_cinfo_from_inode(&cinfo, inode);
+ nfs_join_page_group(head, &cinfo, inode);
return head;
+
+out_unlock:
+ nfs_unlock_and_release_request(head);
+ return ERR_PTR(ret);
}
static void nfs_write_error(struct nfs_page *req, int error)
{
- trace_nfs_write_error(page_file_mapping(req->wb_page)->host, req,
- error);
- nfs_mapping_set_error(req->wb_page, error);
+ trace_nfs_write_error(nfs_page_to_inode(req), req, error);
+ nfs_mapping_set_error(nfs_page_to_folio(req), error);
nfs_inode_remove_request(req);
- nfs_end_page_writeback(req);
+ nfs_page_end_writeback(req);
nfs_release_request(req);
}
@@ -604,21 +579,22 @@ static void nfs_write_error(struct nfs_page *req, int error)
* Find an associated nfs write request, and prepare to flush it out
* May return an error if the user signalled nfs_wait_on_request().
*/
-static int nfs_page_async_flush(struct page *page,
- struct writeback_control *wbc,
- struct nfs_pageio_descriptor *pgio)
+static int nfs_do_writepage(struct folio *folio, struct writeback_control *wbc,
+ struct nfs_pageio_descriptor *pgio)
{
struct nfs_page *req;
- int ret = 0;
+ int ret;
- req = nfs_lock_and_join_requests(page);
+ nfs_pageio_cond_complete(pgio, folio->index);
+
+ req = nfs_lock_and_join_requests(folio);
if (!req)
- goto out;
- ret = PTR_ERR(req);
+ return 0;
if (IS_ERR(req))
- goto out;
+ return PTR_ERR(req);
- nfs_set_page_writeback(page);
+ trace_nfs_do_writepage(req);
+ nfs_folio_set_writeback(folio);
WARN_ON_ONCE(test_bit(PG_CLEAN, &req->wb_flags));
/* If there is a fatal error that covers this write, just exit */
@@ -626,7 +602,6 @@ static int nfs_page_async_flush(struct page *page,
if (nfs_error_is_fatal_on_server(ret))
goto out_launder;
- ret = 0;
if (!nfs_pageio_add_request(pgio, req)) {
ret = pgio->pg_error;
/*
@@ -634,71 +609,39 @@ static int nfs_page_async_flush(struct page *page,
*/
if (nfs_error_is_fatal_on_server(ret))
goto out_launder;
- if (wbc->sync_mode == WB_SYNC_NONE)
- ret = AOP_WRITEPAGE_ACTIVATE;
- redirty_page_for_writepage(wbc, page);
+ folio_redirty_for_writepage(wbc, folio);
nfs_redirty_request(req);
pgio->pg_error = 0;
- } else
- nfs_add_stats(page_file_mapping(page)->host,
- NFSIOS_WRITEPAGES, 1);
-out:
- return ret;
+ return ret;
+ }
+
+ nfs_add_stats(folio->mapping->host, NFSIOS_WRITEPAGES, 1);
+ return 0;
+
out_launder:
nfs_write_error(req, ret);
return 0;
}
-static int nfs_do_writepage(struct page *page, struct writeback_control *wbc,
- struct nfs_pageio_descriptor *pgio)
-{
- nfs_pageio_cond_complete(pgio, page_index(page));
- return nfs_page_async_flush(page, wbc, pgio);
-}
-
/*
* Write an mmapped page to the server.
*/
-static int nfs_writepage_locked(struct page *page,
+static int nfs_writepage_locked(struct folio *folio,
struct writeback_control *wbc)
{
struct nfs_pageio_descriptor pgio;
- struct inode *inode = page_file_mapping(page)->host;
+ struct inode *inode = folio->mapping->host;
int err;
- if (wbc->sync_mode == WB_SYNC_NONE &&
- NFS_SERVER(inode)->write_congested)
- return AOP_WRITEPAGE_ACTIVATE;
-
nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
- nfs_pageio_init_write(&pgio, inode, 0,
- false, &nfs_async_write_completion_ops);
- err = nfs_do_writepage(page, wbc, &pgio);
+ nfs_pageio_init_write(&pgio, inode, 0, false,
+ &nfs_async_write_completion_ops);
+ err = nfs_do_writepage(folio, wbc, &pgio);
pgio.pg_error = 0;
nfs_pageio_complete(&pgio);
return err;
}
-int nfs_writepage(struct page *page, struct writeback_control *wbc)
-{
- int ret;
-
- ret = nfs_writepage_locked(page, wbc);
- if (ret != AOP_WRITEPAGE_ACTIVATE)
- unlock_page(page);
- return ret;
-}
-
-static int nfs_writepages_callback(struct page *page, struct writeback_control *wbc, void *data)
-{
- int ret;
-
- ret = nfs_do_writepage(page, wbc, data);
- if (ret != AOP_WRITEPAGE_ACTIVATE)
- unlock_page(page);
- return ret;
-}
-
static void nfs_io_completion_commit(void *inode)
{
nfs_commit_inode(inode, 0);
@@ -710,17 +653,24 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
struct nfs_pageio_descriptor pgio;
struct nfs_io_completion *ioc = NULL;
unsigned int mntflags = NFS_SERVER(inode)->flags;
+ struct nfs_server *nfss = NFS_SERVER(inode);
int priority = 0;
int err;
- if (wbc->sync_mode == WB_SYNC_NONE &&
- NFS_SERVER(inode)->write_congested)
- return 0;
+ trace_nfs_writepages(inode, wbc->range_start, wbc->range_end - wbc->range_start);
+
+ /* Wait with writeback until write congestion eases */
+ if (wbc->sync_mode == WB_SYNC_NONE && nfss->write_congested) {
+ err = wait_event_killable(nfss->write_congestion_wait,
+ nfss->write_congested == 0);
+ if (err)
+ goto out_err;
+ }
nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
if (!(mntflags & NFS_MOUNT_WRITE_EAGER) || wbc->for_kupdate ||
- wbc->for_background || wbc->for_sync || wbc->for_reclaim) {
+ wbc->for_background || wbc->for_sync) {
ioc = nfs_io_completion_alloc(GFP_KERNEL);
if (ioc)
nfs_io_completion_init(ioc, nfs_io_completion_commit,
@@ -729,47 +679,47 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
}
do {
+ struct folio *folio = NULL;
+
nfs_pageio_init_write(&pgio, inode, priority, false,
&nfs_async_write_completion_ops);
pgio.pg_io_completion = ioc;
- err = write_cache_pages(mapping, wbc, nfs_writepages_callback,
- &pgio);
+ while ((folio = writeback_iter(mapping, wbc, folio, &err))) {
+ err = nfs_do_writepage(folio, wbc, &pgio);
+ folio_unlock(folio);
+ }
pgio.pg_error = 0;
nfs_pageio_complete(&pgio);
+ if (err == -EAGAIN && mntflags & NFS_MOUNT_SOFTERR)
+ break;
} while (err < 0 && !nfs_error_is_fatal(err));
nfs_io_completion_put(ioc);
- if (err < 0)
- goto out_err;
- return 0;
+ if (err > 0)
+ err = 0;
out_err:
+ trace_nfs_writepages_done(inode, wbc->range_start, wbc->range_end - wbc->range_start, err);
return err;
}
/*
* Insert a write request into an inode
*/
-static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
+static void nfs_inode_add_request(struct nfs_page *req)
{
- struct address_space *mapping = page_file_mapping(req->wb_page);
- struct nfs_inode *nfsi = NFS_I(inode);
+ struct folio *folio = nfs_page_to_folio(req);
+ struct address_space *mapping = folio->mapping;
+ struct nfs_inode *nfsi = NFS_I(mapping->host);
WARN_ON_ONCE(req->wb_this_page != req);
/* Lock the request! */
nfs_lock_request(req);
-
- /*
- * Swap-space should not get truncated. Hence no need to plug the race
- * with invalidate/truncate.
- */
- spin_lock(&mapping->private_lock);
- if (likely(!PageSwapCache(req->wb_page))) {
- set_bit(PG_MAPPED, &req->wb_flags);
- SetPagePrivate(req->wb_page);
- set_page_private(req->wb_page, (unsigned long)req);
- }
- spin_unlock(&mapping->private_lock);
+ spin_lock(&mapping->i_private_lock);
+ set_bit(PG_MAPPED, &req->wb_flags);
+ folio_set_private(folio);
+ folio->private = req;
+ spin_unlock(&mapping->i_private_lock);
atomic_long_inc(&nfsi->nrequests);
/* this a head request for a page group - mark it as having an
* extra reference so sub groups can follow suit.
@@ -784,66 +734,36 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
*/
static void nfs_inode_remove_request(struct nfs_page *req)
{
- struct address_space *mapping = page_file_mapping(req->wb_page);
- struct inode *inode = mapping->host;
- struct nfs_inode *nfsi = NFS_I(inode);
- struct nfs_page *head;
+ struct nfs_inode *nfsi = NFS_I(nfs_page_to_inode(req));
- if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) {
- head = req->wb_head;
-
- spin_lock(&mapping->private_lock);
- if (likely(head->wb_page && !PageSwapCache(head->wb_page))) {
- set_page_private(head->wb_page, 0);
- ClearPagePrivate(head->wb_page);
- clear_bit(PG_MAPPED, &head->wb_flags);
+ nfs_page_group_lock(req);
+ if (nfs_page_group_sync_on_bit_locked(req, PG_REMOVE)) {
+ struct folio *folio = nfs_page_to_folio(req->wb_head);
+ struct address_space *mapping = folio->mapping;
+
+ spin_lock(&mapping->i_private_lock);
+ if (likely(folio)) {
+ folio->private = NULL;
+ folio_clear_private(folio);
+ clear_bit(PG_MAPPED, &req->wb_head->wb_flags);
}
- spin_unlock(&mapping->private_lock);
+ spin_unlock(&mapping->i_private_lock);
+
+ folio_end_dropbehind(folio);
}
+ nfs_page_group_unlock(req);
if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) {
- nfs_release_request(req);
atomic_long_dec(&nfsi->nrequests);
+ nfs_release_request(req);
}
}
-static void
-nfs_mark_request_dirty(struct nfs_page *req)
+static void nfs_mark_request_dirty(struct nfs_page *req)
{
- if (req->wb_page)
- __set_page_dirty_nobuffers(req->wb_page);
-}
-
-/*
- * nfs_page_search_commits_for_head_request_locked
- *
- * Search through commit lists on @inode for the head request for @page.
- * Must be called while holding the inode (which is cinfo) lock.
- *
- * Returns the head request if found, or NULL if not found.
- */
-static struct nfs_page *
-nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi,
- struct page *page)
-{
- struct nfs_page *freq, *t;
- struct nfs_commit_info cinfo;
- struct inode *inode = &nfsi->vfs_inode;
-
- nfs_init_cinfo_from_inode(&cinfo, inode);
-
- /* search through pnfs commit lists */
- freq = pnfs_search_commit_reqs(inode, &cinfo, page);
- if (freq)
- return freq->wb_head;
-
- /* Linearly search the commit list for the correct request */
- list_for_each_entry_safe(freq, t, &cinfo.mds->list, wb_list) {
- if (freq->wb_page == page)
- return freq->wb_head;
- }
-
- return NULL;
+ struct folio *folio = nfs_page_to_folio(req);
+ if (folio)
+ filemap_dirty_folio(folio_mapping(folio), folio);
}
/**
@@ -887,8 +807,7 @@ nfs_request_add_commit_list(struct nfs_page *req, struct nfs_commit_info *cinfo)
mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
nfs_request_add_commit_list_locked(req, &cinfo->mds->list, cinfo);
mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
- if (req->wb_page)
- nfs_mark_page_unstable(req->wb_page, cinfo);
+ nfs_folio_mark_unstable(nfs_page_to_folio(req), cinfo);
}
EXPORT_SYMBOL_GPL(nfs_request_add_commit_list);
@@ -947,30 +866,31 @@ nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
nfs_request_add_commit_list(req, cinfo);
}
-static void
-nfs_clear_page_commit(struct page *page)
+static void nfs_folio_clear_commit(struct folio *folio)
{
- dec_node_page_state(page, NR_WRITEBACK);
- dec_wb_stat(&inode_to_bdi(page_file_mapping(page)->host)->wb,
- WB_WRITEBACK);
+ if (folio) {
+ long nr = folio_nr_pages(folio);
+
+ node_stat_mod_folio(folio, NR_WRITEBACK, -nr);
+ wb_stat_mod(&inode_to_bdi(folio->mapping->host)->wb,
+ WB_WRITEBACK, -nr);
+ }
}
/* Called holding the request lock on @req */
-static void
-nfs_clear_request_commit(struct nfs_page *req)
+static void nfs_clear_request_commit(struct nfs_commit_info *cinfo,
+ struct nfs_page *req)
{
if (test_bit(PG_CLEAN, &req->wb_flags)) {
struct nfs_open_context *ctx = nfs_req_openctx(req);
struct inode *inode = d_inode(ctx->dentry);
- struct nfs_commit_info cinfo;
- nfs_init_cinfo_from_inode(&cinfo, inode);
mutex_lock(&NFS_I(inode)->commit_mutex);
- if (!pnfs_clear_request_commit(req, &cinfo)) {
- nfs_request_remove_commit_list(req, &cinfo);
+ if (!pnfs_clear_request_commit(req, cinfo)) {
+ nfs_request_remove_commit_list(req, cinfo);
}
mutex_unlock(&NFS_I(inode)->commit_mutex);
- nfs_clear_page_commit(req->wb_page);
+ nfs_folio_clear_commit(nfs_page_to_folio(req));
}
}
@@ -1002,7 +922,8 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) &&
(hdr->good_bytes < bytes)) {
trace_nfs_comp_error(hdr->inode, req, hdr->error);
- nfs_mapping_set_error(req->wb_page, hdr->error);
+ nfs_mapping_set_error(nfs_page_to_folio(req),
+ hdr->error);
goto remove_req;
}
if (nfs_write_need_commit(hdr)) {
@@ -1010,13 +931,13 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
req->wb_nio = 0;
memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf));
nfs_mark_request_commit(req, hdr->lseg, &cinfo,
- hdr->pgio_mirror_idx);
+ hdr->ds_commit_idx);
goto next;
}
remove_req:
nfs_inode_remove_request(req);
next:
- nfs_end_page_writeback(req);
+ nfs_page_end_writeback(req);
nfs_release_request(req);
}
out:
@@ -1092,21 +1013,21 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst,
* If the attempt fails, then the existing request is flushed out
* to disk.
*/
-static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
- struct page *page,
- unsigned int offset,
- unsigned int bytes)
+static struct nfs_page *nfs_try_to_update_request(struct folio *folio,
+ unsigned int offset,
+ unsigned int bytes)
{
struct nfs_page *req;
unsigned int rqend;
unsigned int end;
int error;
+ trace_nfs_try_to_update_request(folio_inode(folio), offset, bytes);
end = offset + bytes;
- req = nfs_lock_and_join_requests(page);
+ req = nfs_lock_and_join_requests(folio);
if (IS_ERR_OR_NULL(req))
- return req;
+ goto out;
rqend = req->wb_offset + req->wb_bytes;
/*
@@ -1128,6 +1049,9 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
else
req->wb_bytes = rqend - req->wb_offset;
req->wb_nio = 0;
+out:
+ trace_nfs_try_to_update_request_done(folio_inode(folio), offset, bytes,
+ PTR_ERR_OR_ZERO(req));
return req;
out_flushme:
/*
@@ -1137,7 +1061,8 @@ out_flushme:
*/
nfs_mark_request_dirty(req);
nfs_unlock_and_release_request(req);
- error = nfs_wb_page(inode, page);
+ error = nfs_wb_folio(folio->mapping->host, folio);
+ trace_nfs_try_to_update_request_done(folio_inode(folio), offset, bytes, error);
return (error < 0) ? ERR_PTR(error) : NULL;
}
@@ -1148,40 +1073,43 @@ out_flushme:
* if we have to add a new request. Also assumes that the caller has
* already called nfs_flush_incompatible() if necessary.
*/
-static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx,
- struct page *page, unsigned int offset, unsigned int bytes)
+static struct nfs_page *nfs_setup_write_request(struct nfs_open_context *ctx,
+ struct folio *folio,
+ unsigned int offset,
+ unsigned int bytes)
{
- struct inode *inode = page_file_mapping(page)->host;
- struct nfs_page *req;
+ struct nfs_page *req;
- req = nfs_try_to_update_request(inode, page, offset, bytes);
+ req = nfs_try_to_update_request(folio, offset, bytes);
if (req != NULL)
goto out;
- req = nfs_create_request(ctx, page, offset, bytes);
+ req = nfs_page_create_from_folio(ctx, folio, offset, bytes);
if (IS_ERR(req))
goto out;
- nfs_inode_add_request(inode, req);
+ nfs_inode_add_request(req);
out:
return req;
}
-static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
- unsigned int offset, unsigned int count)
+static int nfs_writepage_setup(struct nfs_open_context *ctx,
+ struct folio *folio, unsigned int offset,
+ unsigned int count)
{
- struct nfs_page *req;
+ struct nfs_page *req;
- req = nfs_setup_write_request(ctx, page, offset, count);
+ req = nfs_setup_write_request(ctx, folio, offset, count);
if (IS_ERR(req))
return PTR_ERR(req);
+ trace_nfs_writepage_setup(req);
/* Update file length */
- nfs_grow_file(page, offset, count);
+ nfs_grow_file(folio, offset, count);
nfs_mark_uptodate(req);
nfs_mark_request_dirty(req);
nfs_unlock_and_release_request(req);
return 0;
}
-int nfs_flush_incompatible(struct file *file, struct page *page)
+int nfs_flush_incompatible(struct file *file, struct folio *folio)
{
struct nfs_open_context *ctx = nfs_file_open_context(file);
struct nfs_lock_context *l_ctx;
@@ -1197,12 +1125,12 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
* dropped page.
*/
do {
- req = nfs_page_find_head_request(page);
+ req = nfs_folio_find_head_request(folio);
if (req == NULL)
return 0;
l_ctx = req->wb_lock_context;
- do_flush = req->wb_page != page ||
- !nfs_match_open_context(nfs_req_openctx(req), ctx);
+ do_flush = nfs_page_to_folio(req) != folio ||
+ !nfs_match_open_context(nfs_req_openctx(req), ctx);
if (l_ctx && flctx &&
!(list_empty_careful(&flctx->flc_posix) &&
list_empty_careful(&flctx->flc_flock))) {
@@ -1211,7 +1139,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
nfs_release_request(req);
if (!do_flush)
return 0;
- status = nfs_wb_page(page_file_mapping(page)->host, page);
+ status = nfs_wb_folio(folio->mapping->host, folio);
} while (status == 0);
return status;
}
@@ -1283,9 +1211,9 @@ out:
* the PageUptodate() flag. In this case, we will need to turn off
* write optimisations that depend on the page contents being correct.
*/
-static bool nfs_write_pageuptodate(struct page *page, struct inode *inode,
- unsigned int pagelen)
+static bool nfs_folio_write_uptodate(struct folio *folio, unsigned int pagelen)
{
+ struct inode *inode = folio->mapping->host;
struct nfs_inode *nfsi = NFS_I(inode);
if (nfs_have_delegated_attributes(inode))
@@ -1299,14 +1227,14 @@ static bool nfs_write_pageuptodate(struct page *page, struct inode *inode,
out:
if (nfsi->cache_validity & NFS_INO_INVALID_DATA && pagelen != 0)
return false;
- return PageUptodate(page) != 0;
+ return folio_test_uptodate(folio) != 0;
}
static bool
is_whole_file_wrlock(struct file_lock *fl)
{
return fl->fl_start == 0 && fl->fl_end == OFFSET_MAX &&
- fl->fl_type == F_WRLCK;
+ lock_is_write(fl);
}
/* If we know the page is up to date, and we're not using byte range locks (or
@@ -1317,18 +1245,22 @@ is_whole_file_wrlock(struct file_lock *fl)
* If the file is opened for synchronous writes then we can just skip the rest
* of the checks.
*/
-static int nfs_can_extend_write(struct file *file, struct page *page,
- struct inode *inode, unsigned int pagelen)
+static int nfs_can_extend_write(struct file *file, struct folio *folio,
+ unsigned int pagelen)
{
- int ret;
+ struct inode *inode = file_inode(file);
struct file_lock_context *flctx = locks_inode_context(inode);
struct file_lock *fl;
+ int ret;
+ unsigned int mntflags = NFS_SERVER(inode)->flags;
+ if (mntflags & NFS_MOUNT_NO_ALIGNWRITE)
+ return 0;
if (file->f_flags & O_DSYNC)
return 0;
- if (!nfs_write_pageuptodate(page, inode, pagelen))
+ if (!nfs_folio_write_uptodate(folio, pagelen))
return 0;
- if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
+ if (nfs_have_write_delegation(inode))
return 1;
if (!flctx || (list_empty_careful(&flctx->flc_flock) &&
list_empty_careful(&flctx->flc_posix)))
@@ -1339,13 +1271,13 @@ static int nfs_can_extend_write(struct file *file, struct page *page,
spin_lock(&flctx->flc_lock);
if (!list_empty(&flctx->flc_posix)) {
fl = list_first_entry(&flctx->flc_posix, struct file_lock,
- fl_list);
+ c.flc_list);
if (is_whole_file_wrlock(fl))
ret = 1;
} else if (!list_empty(&flctx->flc_flock)) {
fl = list_first_entry(&flctx->flc_flock, struct file_lock,
- fl_list);
- if (fl->fl_type == F_WRLCK)
+ c.flc_list);
+ if (lock_is_write(fl))
ret = 1;
}
spin_unlock(&flctx->flc_lock);
@@ -1358,33 +1290,40 @@ static int nfs_can_extend_write(struct file *file, struct page *page,
* XXX: Keep an eye on generic_file_read to make sure it doesn't do bad
* things with a page scheduled for an RPC call (e.g. invalidate it).
*/
-int nfs_updatepage(struct file *file, struct page *page,
- unsigned int offset, unsigned int count)
+int nfs_update_folio(struct file *file, struct folio *folio,
+ unsigned int offset, unsigned int count)
{
struct nfs_open_context *ctx = nfs_file_open_context(file);
- struct address_space *mapping = page_file_mapping(page);
- struct inode *inode = mapping->host;
- unsigned int pagelen = nfs_page_length(page);
+ struct address_space *mapping = folio->mapping;
+ struct inode *inode = mapping->host;
+ unsigned int pagelen = nfs_folio_length(folio);
int status = 0;
nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE);
- dprintk("NFS: nfs_updatepage(%pD2 %d@%lld)\n",
- file, count, (long long)(page_file_offset(page) + offset));
+ trace_nfs_update_folio(inode, offset, count);
+
+ dprintk("NFS: nfs_update_folio(%pD2 %d@%lld)\n", file, count,
+ (long long)(folio_pos(folio) + offset));
if (!count)
goto out;
- if (nfs_can_extend_write(file, page, inode, pagelen)) {
- count = max(count + offset, pagelen);
- offset = 0;
+ if (nfs_can_extend_write(file, folio, pagelen)) {
+ unsigned int end = count + offset;
+
+ offset = round_down(offset, PAGE_SIZE);
+ if (end < pagelen)
+ end = min(round_up(end, PAGE_SIZE), pagelen);
+ count = end - offset;
}
- status = nfs_writepage_setup(ctx, page, offset, count);
+ status = nfs_writepage_setup(ctx, folio, offset, count);
if (status < 0)
nfs_set_pageerror(mapping);
out:
- dprintk("NFS: nfs_updatepage returns %d (isize %lld)\n",
+ trace_nfs_update_folio_done(inode, offset, count, status);
+ dprintk("NFS: nfs_update_folio returns %d (isize %lld)\n",
status, (long long)i_size_read(inode));
return status;
}
@@ -1420,13 +1359,13 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr,
*/
static void nfs_redirty_request(struct nfs_page *req)
{
- struct nfs_inode *nfsi = NFS_I(page_file_mapping(req->wb_page)->host);
+ struct nfs_inode *nfsi = NFS_I(nfs_page_to_inode(req));
/* Bump the transmission count */
req->wb_nio++;
nfs_mark_request_dirty(req);
atomic_long_inc(&nfsi->redirtied_pages);
- nfs_end_page_writeback(req);
+ nfs_page_end_writeback(req);
nfs_release_request(req);
}
@@ -1522,6 +1461,13 @@ void nfs_writeback_update_inode(struct nfs_pgio_header *hdr)
struct nfs_fattr *fattr = &hdr->fattr;
struct inode *inode = hdr->inode;
+ if (nfs_have_delegated_mtime(inode)) {
+ spin_lock(&inode->i_lock);
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS);
+ spin_unlock(&inode->i_lock);
+ return;
+ }
+
spin_lock(&inode->i_lock);
nfs_writeback_check_extend(hdr, fattr);
nfs_post_op_update_inode_force_wcc_locked(inode, fattr);
@@ -1589,7 +1535,8 @@ static int nfs_writeback_done(struct rpc_task *task,
/* Deal with the suid/sgid bit corner case */
if (nfs_should_remove_suid(inode)) {
spin_lock(&inode->i_lock);
- nfs_set_cache_invalid(inode, NFS_INO_INVALID_MODE);
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_MODE
+ | NFS_INO_REVAL_FORCED);
spin_unlock(&inode->i_lock);
}
return 0;
@@ -1654,7 +1601,7 @@ static int wait_on_commit(struct nfs_mds_commit_info *cinfo)
!atomic_read(&cinfo->rpcs_out));
}
-static void nfs_commit_begin(struct nfs_mds_commit_info *cinfo)
+void nfs_commit_begin(struct nfs_mds_commit_info *cinfo)
{
atomic_inc(&cinfo->rpcs_out);
}
@@ -1678,7 +1625,8 @@ EXPORT_SYMBOL_GPL(nfs_commitdata_release);
int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data,
const struct nfs_rpc_ops *nfs_ops,
const struct rpc_call_ops *call_ops,
- int how, int flags)
+ int how, int flags,
+ struct nfsd_file *localio)
{
struct rpc_task *task;
int priority = flush_task_priority(how);
@@ -1707,6 +1655,9 @@ int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data,
dprintk("NFS: initiated commit call\n");
+ if (localio)
+ return nfs_local_commit(localio, data, call_ops, how);
+
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
@@ -1784,18 +1735,18 @@ void nfs_retry_commit(struct list_head *page_list,
req = nfs_list_entry(page_list->next);
nfs_list_remove_request(req);
nfs_mark_request_commit(req, lseg, cinfo, ds_commit_idx);
- if (!cinfo->dreq)
- nfs_clear_page_commit(req->wb_page);
+ nfs_folio_clear_commit(nfs_page_to_folio(req));
nfs_unlock_and_release_request(req);
}
}
EXPORT_SYMBOL_GPL(nfs_retry_commit);
-static void
-nfs_commit_resched_write(struct nfs_commit_info *cinfo,
- struct nfs_page *req)
+static void nfs_commit_resched_write(struct nfs_commit_info *cinfo,
+ struct nfs_page *req)
{
- __set_page_dirty_nobuffers(req->wb_page);
+ struct folio *folio = nfs_page_to_folio(req);
+
+ filemap_dirty_folio(folio_mapping(folio), folio);
}
/*
@@ -1806,6 +1757,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how,
struct nfs_commit_info *cinfo)
{
struct nfs_commit_data *data;
+ struct nfsd_file *localio;
unsigned short task_flags = 0;
/* another commit raced with us */
@@ -1822,9 +1774,13 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how,
nfs_init_commit(data, head, NULL, cinfo);
if (NFS_SERVER(inode)->nfs_client->cl_minorversion)
task_flags = RPC_TASK_MOVEABLE;
+
+ localio = nfs_local_open_fh(NFS_SERVER(inode)->nfs_client, data->cred,
+ data->args.fh, &data->context->nfl,
+ data->context->mode);
return nfs_initiate_commit(NFS_CLIENT(inode), data, NFS_PROTO(inode),
data->mds_ops, how,
- RPC_TASK_CRED_NOREF | task_flags);
+ RPC_TASK_CRED_NOREF | task_flags, localio);
}
/*
@@ -1845,13 +1801,13 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
struct nfs_page *req;
int status = data->task.tk_status;
struct nfs_commit_info cinfo;
- struct nfs_server *nfss;
+ struct folio *folio;
while (!list_empty(&data->pages)) {
req = nfs_list_entry(data->pages.next);
nfs_list_remove_request(req);
- if (req->wb_page)
- nfs_clear_page_commit(req->wb_page);
+ folio = nfs_page_to_folio(req);
+ nfs_folio_clear_commit(folio);
dprintk("NFS: commit (%s/%llu %d@%lld)",
nfs_req_openctx(req)->dentry->d_sb->s_id,
@@ -1859,13 +1815,13 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
req->wb_bytes,
(long long)req_offset(req));
if (status < 0) {
- if (req->wb_page) {
+ if (folio) {
trace_nfs_commit_error(data->inode, req,
status);
- nfs_mapping_set_error(req->wb_page, status);
+ nfs_mapping_set_error(folio, status);
nfs_inode_remove_request(req);
}
- dprintk_cont(", error = %d\n", status);
+ dprintk(", error = %d\n", status);
goto next;
}
@@ -1873,13 +1829,13 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
* returned by the server against all stored verfs. */
if (nfs_write_match_verf(verf, req)) {
/* We have a match */
- if (req->wb_page)
+ if (folio)
nfs_inode_remove_request(req);
- dprintk_cont(" OK\n");
+ dprintk(" OK\n");
goto next;
}
/* We have a mismatch. Write the page again */
- dprintk_cont(" mismatch\n");
+ dprintk(" mismatch\n");
nfs_mark_request_dirty(req);
atomic_long_inc(&NFS_I(data->inode)->redirtied_pages);
next:
@@ -1887,9 +1843,6 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
/* Latency breaker */
cond_resched();
}
- nfss = NFS_SERVER(data->inode);
- if (atomic_long_read(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
- nfss->write_congested = 0;
nfs_init_cinfo(&cinfo, data->inode, data->dreq);
nfs_commit_end(cinfo.mds);
@@ -2054,7 +2007,7 @@ int nfs_wb_folio_cancel(struct inode *inode, struct folio *folio)
/* blocking call to cancel all requests and join to a single (head)
* request */
- req = nfs_lock_and_join_requests(&folio->page);
+ req = nfs_lock_and_join_requests(folio);
if (IS_ERR(req)) {
ret = PTR_ERR(req);
@@ -2065,45 +2018,51 @@ int nfs_wb_folio_cancel(struct inode *inode, struct folio *folio)
* release it */
nfs_inode_remove_request(req);
nfs_unlock_and_release_request(req);
+ folio_cancel_dirty(folio);
}
return ret;
}
-/*
- * Write back all requests on one page - we do this before reading it.
+/**
+ * nfs_wb_folio - Write back all requests on one page
+ * @inode: pointer to page
+ * @folio: pointer to folio
+ *
+ * Assumes that the folio has been locked by the caller, and will
+ * not unlock it.
*/
-int nfs_wb_page(struct inode *inode, struct page *page)
+int nfs_wb_folio(struct inode *inode, struct folio *folio)
{
- loff_t range_start = page_file_offset(page);
- loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
+ loff_t range_start = folio_pos(folio);
+ size_t len = folio_size(folio);
struct writeback_control wbc = {
.sync_mode = WB_SYNC_ALL,
.nr_to_write = 0,
.range_start = range_start,
- .range_end = range_end,
+ .range_end = range_start + len - 1,
};
int ret;
- trace_nfs_writeback_page_enter(inode);
+ trace_nfs_writeback_folio(inode, range_start, len);
for (;;) {
- wait_on_page_writeback(page);
- if (clear_page_dirty_for_io(page)) {
- ret = nfs_writepage_locked(page, &wbc);
+ folio_wait_writeback(folio);
+ if (folio_clear_dirty_for_io(folio)) {
+ ret = nfs_writepage_locked(folio, &wbc);
if (ret < 0)
goto out_error;
continue;
}
ret = 0;
- if (!PagePrivate(page))
+ if (!folio_test_private(folio))
break;
ret = nfs_commit_inode(inode, FLUSH_SYNC);
if (ret < 0)
goto out_error;
}
out_error:
- trace_nfs_writeback_page_exit(inode, ret);
+ trace_nfs_writeback_folio_done(inode, range_start, len, ret);
return ret;
}
@@ -2119,13 +2078,17 @@ int nfs_migrate_folio(struct address_space *mapping, struct folio *dst,
* that we can safely release the inode reference while holding
* the folio lock.
*/
- if (folio_test_private(src))
- return -EBUSY;
+ if (folio_test_private(src)) {
+ if (mode == MIGRATE_SYNC)
+ nfs_wb_folio(src->mapping->host, src);
+ if (folio_test_private(src))
+ return -EBUSY;
+ }
- if (folio_test_fscache(src)) {
+ if (folio_test_private_2(src)) { /* [DEPRECATED] */
if (mode == MIGRATE_ASYNC)
return -EBUSY;
- folio_wait_fscache(src);
+ folio_wait_private_2(src);
}
return migrate_folio(mapping, dst, src, mode);