diff options
Diffstat (limited to 'fs')
31 files changed, 1359 insertions, 491 deletions
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index e80262a51884..d68afa196535 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -216,8 +216,7 @@ out_err: if (warned++ == 0) printk(KERN_WARNING "lockd_up: makesock failed, error=%d\n", err); - svc_xprt_destroy_all(serv, net); - svc_rpcb_cleanup(serv, net); + svc_xprt_destroy_all(serv, net, true); return err; } @@ -255,8 +254,7 @@ static void lockd_down_net(struct svc_serv *serv, struct net *net) nlm_shutdown_hosts_net(net); cancel_delayed_work_sync(&ln->grace_period_end); locks_end_grace(&ln->lockd_manager); - svc_xprt_destroy_all(serv, net); - svc_rpcb_cleanup(serv, net); + svc_xprt_destroy_all(serv, net, true); } } else { pr_err("%s: no users! net=%x\n", diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 5d6edafbed20..0e4c67373e4f 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -676,7 +676,7 @@ bl_alloc_lseg(struct pnfs_layout_hdr *lo, struct nfs4_layoutget_res *lgr, struct pnfs_layout_segment *lseg; struct xdr_buf buf; struct xdr_stream xdr; - struct page *scratch; + struct folio *scratch; int status, i; uint32_t count; __be32 *p; @@ -689,13 +689,13 @@ bl_alloc_lseg(struct pnfs_layout_hdr *lo, struct nfs4_layoutget_res *lgr, return ERR_PTR(-ENOMEM); status = -ENOMEM; - scratch = alloc_page(gfp_mask); + scratch = folio_alloc(gfp_mask, 0); if (!scratch) goto out; xdr_init_decode_pages(&xdr, &buf, lgr->layoutp->pages, lgr->layoutp->len); - xdr_set_scratch_page(&xdr, scratch); + xdr_set_scratch_folio(&xdr, scratch); status = -EIO; p = xdr_inline_decode(&xdr, 4); @@ -744,7 +744,7 @@ process_extents: } out_free_scratch: - __free_page(scratch); + folio_put(scratch); out: dprintk("%s returns %d\n", __func__, status); switch (status) { diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c index 44306ac22353..ab76120705e2 100644 --- a/fs/nfs/blocklayout/dev.c +++ b/fs/nfs/blocklayout/dev.c @@ -541,16 +541,16 @@ bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, struct pnfs_block_dev *top; struct xdr_stream xdr; struct xdr_buf buf; - struct page *scratch; + struct folio *scratch; int nr_volumes, ret, i; __be32 *p; - scratch = alloc_page(gfp_mask); + scratch = folio_alloc(gfp_mask, 0); if (!scratch) goto out; xdr_init_decode_pages(&xdr, &buf, pdev->pages, pdev->pglen); - xdr_set_scratch_page(&xdr, scratch); + xdr_set_scratch_folio(&xdr, scratch); p = xdr_inline_decode(&xdr, sizeof(__be32)); if (!p) @@ -582,7 +582,7 @@ bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, out_free_volumes: kfree(volumes); out_free_scratch: - __free_page(scratch); + folio_put(scratch); out: return node; } diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 86bdc7d23fb9..c8b837006bb2 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -136,7 +136,7 @@ static void nfs_callback_down_net(u32 minorversion, struct svc_serv *serv, struc return; dprintk("NFS: destroy per-net callback data; net=%x\n", net->ns.inum); - svc_xprt_destroy_all(serv, net); + svc_xprt_destroy_all(serv, net, false); } static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, @@ -153,7 +153,7 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, ret = svc_bind(serv, net); if (ret < 0) { printk(KERN_WARNING "NFS: bind callback service failed\n"); - goto err_bind; + goto err; } ret = 0; @@ -166,13 +166,11 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, if (ret < 0) { printk(KERN_ERR "NFS: callback service start failed\n"); - goto err_socks; + goto err; } return 0; -err_socks: - svc_rpcb_cleanup(serv, net); -err_bind: +err: nn->cb_users[minorversion]--; dprintk("NFS: Couldn't create callback socket: err = %d; " "net = %x\n", ret, net->ns.inum); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 5f7d9be6f022..46d9c65d50f8 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -829,17 +829,17 @@ static int nfs_readdir_folio_filler(struct nfs_readdir_descriptor *desc, struct address_space *mapping = desc->file->f_mapping; struct folio *new, *folio = *arrays; struct xdr_stream stream; - struct page *scratch; + struct folio *scratch; struct xdr_buf buf; u64 cookie; int status; - scratch = alloc_page(GFP_KERNEL); + scratch = folio_alloc(GFP_KERNEL, 0); if (scratch == NULL) return -ENOMEM; xdr_init_decode_pages(&stream, &buf, xdr_pages, buflen); - xdr_set_scratch_page(&stream, scratch); + xdr_set_scratch_folio(&stream, scratch); do { status = nfs_readdir_entry_decode(desc, entry, &stream); @@ -891,7 +891,7 @@ static int nfs_readdir_folio_filler(struct nfs_readdir_descriptor *desc, if (folio != *arrays) nfs_readdir_folio_unlock_and_put(folio); - put_page(scratch); + folio_put(scratch); return status; } diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 8059ece82468..d020aab40c64 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -161,6 +161,8 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to) struct inode *inode = file_inode(iocb->ki_filp); ssize_t result; + trace_nfs_file_read(iocb, to); + if (iocb->ki_flags & IOCB_DIRECT) return nfs_file_direct_read(iocb, to, false); @@ -361,6 +363,8 @@ static bool nfs_want_read_modify_write(struct file *file, struct folio *folio, if (pnfs_ld_read_whole_page(file_inode(file))) return true; + if (folio_test_dropbehind(folio)) + return false; /* Open for reading too? */ if (file->f_mode & FMODE_READ) return true; @@ -380,22 +384,23 @@ static int nfs_write_begin(const struct kiocb *iocb, loff_t pos, unsigned len, struct folio **foliop, void **fsdata) { - fgf_t fgp = FGP_WRITEBEGIN; struct folio *folio; struct file *file = iocb->ki_filp; int once_thru = 0; int ret; + trace_nfs_write_begin(file_inode(file), pos, len); + dfprintk(PAGECACHE, "NFS: write_begin(%pD2(%lu), %u@%lld)\n", file, mapping->host->i_ino, len, (long long) pos); nfs_truncate_last_folio(mapping, i_size_read(mapping->host), pos); - fgp |= fgf_set_order(len); start: - folio = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, fgp, - mapping_gfp_mask(mapping)); - if (IS_ERR(folio)) - return PTR_ERR(folio); + folio = write_begin_get_folio(iocb, mapping, pos >> PAGE_SHIFT, len); + if (IS_ERR(folio)) { + ret = PTR_ERR(folio); + goto out; + } *foliop = folio; ret = nfs_flush_incompatible(file, folio); @@ -405,11 +410,14 @@ start: } else if (!once_thru && nfs_want_read_modify_write(file, folio, pos, len)) { once_thru = 1; + folio_clear_dropbehind(folio); ret = nfs_read_folio(file, folio); folio_put(folio); if (!ret) goto start; } +out: + trace_nfs_write_begin_done(file_inode(file), pos, len, ret); return ret; } @@ -423,6 +431,7 @@ static int nfs_write_end(const struct kiocb *iocb, unsigned offset = offset_in_folio(folio, pos); int status; + trace_nfs_write_end(file_inode(file), pos, len); dfprintk(PAGECACHE, "NFS: write_end(%pD2(%lu), %u@%lld)\n", file, mapping->host->i_ino, len, (long long) pos); @@ -451,13 +460,16 @@ static int nfs_write_end(const struct kiocb *iocb, folio_unlock(folio); folio_put(folio); - if (status < 0) + if (status < 0) { + trace_nfs_write_end_done(file_inode(file), pos, len, status); return status; + } NFS_I(mapping->host)->write_io += copied; if (nfs_ctx_key_to_expire(ctx, mapping->host)) nfs_wb_all(mapping->host); + trace_nfs_write_end_done(file_inode(file), pos, len, copied); return copied; } @@ -690,6 +702,8 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) errseq_t since; int error; + trace_nfs_file_write(iocb, from); + result = nfs_key_timeout_notify(file, inode); if (result) return result; @@ -949,5 +963,6 @@ const struct file_operations nfs_file_operations = { .splice_write = iter_file_splice_write, .check_flags = nfs_check_flags, .setlease = simple_nosetlease, + .fop_flags = FOP_DONTCACHE, }; EXPORT_SYMBOL_GPL(nfs_file_operations); diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index d39a1f58e18d..5c4551117c58 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -646,19 +646,19 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, { struct xdr_stream stream; struct xdr_buf buf; - struct page *scratch; + struct folio *scratch; __be32 *p; uint32_t nfl_util; int i; dprintk("%s: set_layout_map Begin\n", __func__); - scratch = alloc_page(gfp_flags); + scratch = folio_alloc(gfp_flags, 0); if (!scratch) return -ENOMEM; xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages, lgr->layoutp->len); - xdr_set_scratch_page(&stream, scratch); + xdr_set_scratch_folio(&stream, scratch); /* 20 = ufl_util (4), first_stripe_index (4), pattern_offset (8), * num_fh (4) */ @@ -724,11 +724,11 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, fl->fh_array[i]->size); } - __free_page(scratch); + folio_put(scratch); return 0; out_err: - __free_page(scratch); + folio_put(scratch); return -EIO; } diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c index 29d9234d5c08..df79aeb68db4 100644 --- a/fs/nfs/filelayout/filelayoutdev.c +++ b/fs/nfs/filelayout/filelayoutdev.c @@ -73,18 +73,18 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, struct nfs4_file_layout_dsaddr *dsaddr = NULL; struct xdr_stream stream; struct xdr_buf buf; - struct page *scratch; + struct folio *scratch; struct list_head dsaddrs; struct nfs4_pnfs_ds_addr *da; struct net *net = server->nfs_client->cl_net; /* set up xdr stream */ - scratch = alloc_page(gfp_flags); + scratch = folio_alloc(gfp_flags, 0); if (!scratch) goto out_err; xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen); - xdr_set_scratch_page(&stream, scratch); + xdr_set_scratch_folio(&stream, scratch); /* Get the stripe count (number of stripe index) */ p = xdr_inline_decode(&stream, 4); @@ -186,7 +186,7 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, } } - __free_page(scratch); + folio_put(scratch); return dsaddr; out_err_drain_dsaddrs: @@ -204,7 +204,7 @@ out_err_free_deviceid: out_err_free_stripe_indices: kfree(stripe_indices); out_err_free_scratch: - __free_page(scratch); + folio_put(scratch); out_err: dprintk("%s ERROR: returning NULL\n", __func__); return NULL; diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 9edb5f9b0c4e..df01d2876b68 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -47,7 +47,7 @@ ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo, int dev_limit, enum nfs4_ff_op_type type); static void ff_layout_encode_ff_layoutupdate(struct xdr_stream *xdr, const struct nfs42_layoutstat_devinfo *devinfo, - struct nfs4_ff_layout_mirror *mirror); + struct nfs4_ff_layout_ds_stripe *dss_info); static struct pnfs_layout_hdr * ff_layout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags) @@ -164,31 +164,32 @@ decode_name(struct xdr_stream *xdr, u32 *id) } static struct nfsd_file * -ff_local_open_fh(struct pnfs_layout_segment *lseg, u32 ds_idx, +ff_local_open_fh(struct pnfs_layout_segment *lseg, u32 ds_idx, u32 dss_id, struct nfs_client *clp, const struct cred *cred, struct nfs_fh *fh, fmode_t mode) { #if IS_ENABLED(CONFIG_NFS_LOCALIO) struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx); - return nfs_local_open_fh(clp, cred, fh, &mirror->nfl, mode); + return nfs_local_open_fh(clp, cred, fh, &mirror->dss[dss_id].nfl, mode); #else return NULL; #endif } -static bool ff_mirror_match_fh(const struct nfs4_ff_layout_mirror *m1, - const struct nfs4_ff_layout_mirror *m2) +static bool ff_dss_match_fh(const struct nfs4_ff_layout_ds_stripe *dss1, + const struct nfs4_ff_layout_ds_stripe *dss2) { int i, j; - if (m1->fh_versions_cnt != m2->fh_versions_cnt) + if (dss1->fh_versions_cnt != dss2->fh_versions_cnt) return false; - for (i = 0; i < m1->fh_versions_cnt; i++) { + + for (i = 0; i < dss1->fh_versions_cnt; i++) { bool found_fh = false; - for (j = 0; j < m2->fh_versions_cnt; j++) { - if (nfs_compare_fh(&m1->fh_versions[i], - &m2->fh_versions[j]) == 0) { + for (j = 0; j < dss2->fh_versions_cnt; j++) { + if (nfs_compare_fh(&dss1->fh_versions[i], + &dss2->fh_versions[j]) == 0) { found_fh = true; break; } @@ -199,6 +200,38 @@ static bool ff_mirror_match_fh(const struct nfs4_ff_layout_mirror *m1, return true; } +static bool ff_mirror_match_fh(const struct nfs4_ff_layout_mirror *m1, + const struct nfs4_ff_layout_mirror *m2) +{ + u32 dss_id; + + if (m1->dss_count != m2->dss_count) + return false; + + for (dss_id = 0; dss_id < m1->dss_count; dss_id++) + if (!ff_dss_match_fh(&m1->dss[dss_id], &m2->dss[dss_id])) + return false; + + return true; +} + +static bool ff_mirror_match_devid(const struct nfs4_ff_layout_mirror *m1, + const struct nfs4_ff_layout_mirror *m2) +{ + u32 dss_id; + + if (m1->dss_count != m2->dss_count) + return false; + + for (dss_id = 0; dss_id < m1->dss_count; dss_id++) + if (memcmp(&m1->dss[dss_id].devid, + &m2->dss[dss_id].devid, + sizeof(m1->dss[dss_id].devid)) != 0) + return false; + + return true; +} + static struct nfs4_ff_layout_mirror * ff_layout_add_mirror(struct pnfs_layout_hdr *lo, struct nfs4_ff_layout_mirror *mirror) @@ -209,7 +242,7 @@ ff_layout_add_mirror(struct pnfs_layout_hdr *lo, spin_lock(&inode->i_lock); list_for_each_entry(pos, &ff_layout->mirrors, mirrors) { - if (memcmp(&mirror->devid, &pos->devid, sizeof(pos->devid)) != 0) + if (!ff_mirror_match_devid(mirror, pos)) continue; if (!ff_mirror_match_fh(mirror, pos)) continue; @@ -240,29 +273,37 @@ ff_layout_remove_mirror(struct nfs4_ff_layout_mirror *mirror) static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags) { struct nfs4_ff_layout_mirror *mirror; + u32 dss_id; mirror = kzalloc(sizeof(*mirror), gfp_flags); if (mirror != NULL) { spin_lock_init(&mirror->lock); refcount_set(&mirror->ref, 1); INIT_LIST_HEAD(&mirror->mirrors); - nfs_localio_file_init(&mirror->nfl); + for (dss_id = 0; dss_id < mirror->dss_count; dss_id++) + nfs_localio_file_init(&mirror->dss[dss_id].nfl); } return mirror; } static void ff_layout_free_mirror(struct nfs4_ff_layout_mirror *mirror) { - const struct cred *cred; + const struct cred *cred; + u32 dss_id; ff_layout_remove_mirror(mirror); - kfree(mirror->fh_versions); - nfs_close_local_fh(&mirror->nfl); - cred = rcu_access_pointer(mirror->ro_cred); - put_cred(cred); - cred = rcu_access_pointer(mirror->rw_cred); - put_cred(cred); - nfs4_ff_layout_put_deviceid(mirror->mirror_ds); + + for (dss_id = 0; dss_id < mirror->dss_count; dss_id++) { + kfree(mirror->dss[dss_id].fh_versions); + cred = rcu_access_pointer(mirror->dss[dss_id].ro_cred); + put_cred(cred); + cred = rcu_access_pointer(mirror->dss[dss_id].rw_cred); + put_cred(cred); + nfs_close_local_fh(&mirror->dss[dss_id].nfl); + nfs4_ff_layout_put_deviceid(mirror->dss[dss_id].mirror_ds); + } + + kfree(mirror->dss); kfree(mirror); } @@ -366,14 +407,24 @@ ff_layout_add_lseg(struct pnfs_layout_hdr *lo, free_me); } +static u32 ff_mirror_efficiency_sum(const struct nfs4_ff_layout_mirror *mirror) +{ + u32 dss_id, sum = 0; + + for (dss_id = 0; dss_id < mirror->dss_count; dss_id++) + sum += mirror->dss[dss_id].efficiency; + + return sum; +} + static void ff_layout_sort_mirrors(struct nfs4_ff_layout_segment *fls) { int i, j; for (i = 0; i < fls->mirror_array_cnt - 1; i++) { for (j = i + 1; j < fls->mirror_array_cnt; j++) - if (fls->mirror_array[i]->efficiency < - fls->mirror_array[j]->efficiency) + if (ff_mirror_efficiency_sum(fls->mirror_array[i]) < + ff_mirror_efficiency_sum(fls->mirror_array[j])) swap(fls->mirror_array[i], fls->mirror_array[j]); } @@ -388,20 +439,21 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, struct nfs4_ff_layout_segment *fls = NULL; struct xdr_stream stream; struct xdr_buf buf; - struct page *scratch; + struct folio *scratch; u64 stripe_unit; u32 mirror_array_cnt; __be32 *p; int i, rc; + struct nfs4_ff_layout_ds_stripe *dss_info; dprintk("--> %s\n", __func__); - scratch = alloc_page(gfp_flags); + scratch = folio_alloc(gfp_flags, 0); if (!scratch) return ERR_PTR(-ENOMEM); xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages, lgr->layoutp->len); - xdr_set_scratch_page(&stream, scratch); + xdr_set_scratch_folio(&stream, scratch); /* stripe unit and mirror_array_cnt */ rc = -EIO; @@ -427,23 +479,32 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, fls->mirror_array_cnt = mirror_array_cnt; fls->stripe_unit = stripe_unit; + u32 dss_count = 0; for (i = 0; i < fls->mirror_array_cnt; i++) { struct nfs4_ff_layout_mirror *mirror; struct cred *kcred; const struct cred __rcu *cred; kuid_t uid; kgid_t gid; - u32 ds_count, fh_count, id; - int j; + u32 fh_count, id; + int j, dss_id; rc = -EIO; p = xdr_inline_decode(&stream, 4); if (!p) goto out_err_free; - ds_count = be32_to_cpup(p); - /* FIXME: allow for striping? */ - if (ds_count != 1) + // Ensure all mirrors have same stripe count. + if (dss_count == 0) + dss_count = be32_to_cpup(p); + else if (dss_count != be32_to_cpup(p)) + goto out_err_free; + + if (dss_count > NFS4_FLEXFILE_LAYOUT_MAX_STRIPE_CNT || + dss_count == 0) + goto out_err_free; + + if (dss_count > 1 && stripe_unit == 0) goto out_err_free; fls->mirror_array[i] = ff_layout_alloc_mirror(gfp_flags); @@ -452,91 +513,105 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, goto out_err_free; } - fls->mirror_array[i]->ds_count = ds_count; + fls->mirror_array[i]->dss_count = dss_count; + fls->mirror_array[i]->dss = + kcalloc(dss_count, sizeof(struct nfs4_ff_layout_ds_stripe), + gfp_flags); - /* deviceid */ - rc = decode_deviceid(&stream, &fls->mirror_array[i]->devid); - if (rc) - goto out_err_free; + for (dss_id = 0; dss_id < dss_count; dss_id++) { + dss_info = &fls->mirror_array[i]->dss[dss_id]; + dss_info->mirror = fls->mirror_array[i]; - /* efficiency */ - rc = -EIO; - p = xdr_inline_decode(&stream, 4); - if (!p) - goto out_err_free; - fls->mirror_array[i]->efficiency = be32_to_cpup(p); + /* deviceid */ + rc = decode_deviceid(&stream, &dss_info->devid); + if (rc) + goto out_err_free; - /* stateid */ - rc = decode_pnfs_stateid(&stream, &fls->mirror_array[i]->stateid); - if (rc) - goto out_err_free; + /* efficiency */ + rc = -EIO; + p = xdr_inline_decode(&stream, 4); + if (!p) + goto out_err_free; + dss_info->efficiency = be32_to_cpup(p); - /* fh */ - rc = -EIO; - p = xdr_inline_decode(&stream, 4); - if (!p) - goto out_err_free; - fh_count = be32_to_cpup(p); + /* stateid */ + rc = decode_pnfs_stateid(&stream, &dss_info->stateid); + if (rc) + goto out_err_free; - fls->mirror_array[i]->fh_versions = - kcalloc(fh_count, sizeof(struct nfs_fh), - gfp_flags); - if (fls->mirror_array[i]->fh_versions == NULL) { - rc = -ENOMEM; - goto out_err_free; - } + /* fh */ + rc = -EIO; + p = xdr_inline_decode(&stream, 4); + if (!p) + goto out_err_free; + fh_count = be32_to_cpup(p); - for (j = 0; j < fh_count; j++) { - rc = decode_nfs_fh(&stream, - &fls->mirror_array[i]->fh_versions[j]); + dss_info->fh_versions = + kcalloc(fh_count, sizeof(struct nfs_fh), + gfp_flags); + if (dss_info->fh_versions == NULL) { + rc = -ENOMEM; + goto out_err_free; + } + + for (j = 0; j < fh_count; j++) { + rc = decode_nfs_fh(&stream, + &dss_info->fh_versions[j]); + if (rc) + goto out_err_free; + } + + dss_info->fh_versions_cnt = fh_count; + + /* user */ + rc = decode_name(&stream, &id); if (rc) goto out_err_free; - } - fls->mirror_array[i]->fh_versions_cnt = fh_count; + uid = make_kuid(&init_user_ns, id); - /* user */ - rc = decode_name(&stream, &id); - if (rc) - goto out_err_free; + /* group */ + rc = decode_name(&stream, &id); + if (rc) + goto out_err_free; - uid = make_kuid(&init_user_ns, id); + gid = make_kgid(&init_user_ns, id); - /* group */ - rc = decode_name(&stream, &id); - if (rc) - goto out_err_free; + if (gfp_flags & __GFP_FS) + kcred = prepare_kernel_cred(&init_task); + else { + unsigned int nofs_flags = memalloc_nofs_save(); - gid = make_kgid(&init_user_ns, id); + kcred = prepare_kernel_cred(&init_task); + memalloc_nofs_restore(nofs_flags); + } + rc = -ENOMEM; + if (!kcred) + goto out_err_free; + kcred->fsuid = uid; + kcred->fsgid = gid; + cred = RCU_INITIALIZER(kcred); - if (gfp_flags & __GFP_FS) - kcred = prepare_kernel_cred(&init_task); - else { - unsigned int nofs_flags = memalloc_nofs_save(); - kcred = prepare_kernel_cred(&init_task); - memalloc_nofs_restore(nofs_flags); + if (lgr->range.iomode == IOMODE_READ) + rcu_assign_pointer(dss_info->ro_cred, cred); + else + rcu_assign_pointer(dss_info->rw_cred, cred); } - rc = -ENOMEM; - if (!kcred) - goto out_err_free; - kcred->fsuid = uid; - kcred->fsgid = gid; - cred = RCU_INITIALIZER(kcred); - - if (lgr->range.iomode == IOMODE_READ) - rcu_assign_pointer(fls->mirror_array[i]->ro_cred, cred); - else - rcu_assign_pointer(fls->mirror_array[i]->rw_cred, cred); mirror = ff_layout_add_mirror(lh, fls->mirror_array[i]); if (mirror != fls->mirror_array[i]) { - /* swap cred ptrs so free_mirror will clean up old */ - if (lgr->range.iomode == IOMODE_READ) { - cred = xchg(&mirror->ro_cred, cred); - rcu_assign_pointer(fls->mirror_array[i]->ro_cred, cred); - } else { - cred = xchg(&mirror->rw_cred, cred); - rcu_assign_pointer(fls->mirror_array[i]->rw_cred, cred); + for (dss_id = 0; dss_id < dss_count; dss_id++) { + dss_info = &fls->mirror_array[i]->dss[dss_id]; + /* swap cred ptrs so free_mirror will clean up old */ + if (lgr->range.iomode == IOMODE_READ) { + cred = xchg(&mirror->dss[dss_id].ro_cred, + dss_info->ro_cred); + rcu_assign_pointer(dss_info->ro_cred, cred); + } else { + cred = xchg(&mirror->dss[dss_id].rw_cred, + dss_info->rw_cred); + rcu_assign_pointer(dss_info->rw_cred, cred); + } } ff_layout_free_mirror(fls->mirror_array[i]); fls->mirror_array[i] = mirror; @@ -564,7 +639,7 @@ out_sort_mirrors: ret = &fls->generic_hdr; dprintk("<-- %s (success)\n", __func__); out_free_page: - __free_page(scratch); + folio_put(scratch); return ret; out_err_free: _ff_layout_free_lseg(fls); @@ -593,6 +668,26 @@ ff_layout_free_lseg(struct pnfs_layout_segment *lseg) _ff_layout_free_lseg(fls); } +static u32 calc_commit_idx(struct pnfs_layout_segment *lseg, + u32 mirror_idx, u32 dss_id) +{ + struct nfs4_ff_layout_segment *flseg = FF_LAYOUT_LSEG(lseg); + + return (mirror_idx * flseg->mirror_array[0]->dss_count) + dss_id; +} + +static u32 calc_mirror_idx_from_commit(struct pnfs_layout_segment *lseg, + u32 commit_index) +{ + return commit_index / FF_LAYOUT_LSEG(lseg)->mirror_array[0]->dss_count; +} + +static u32 calc_dss_id_from_commit(struct pnfs_layout_segment *lseg, + u32 commit_index) +{ + return commit_index % FF_LAYOUT_LSEG(lseg)->mirror_array[0]->dss_count; +} + static void nfs4_ff_start_busy_timer(struct nfs4_ff_busy_timer *timer, ktime_t now) { @@ -617,6 +712,7 @@ nfs4_ff_end_busy_timer(struct nfs4_ff_busy_timer *timer, ktime_t now) static bool nfs4_ff_layoutstat_start_io(struct nfs4_ff_layout_mirror *mirror, + u32 dss_id, struct nfs4_ff_layoutstat *layoutstat, ktime_t now) { @@ -624,8 +720,8 @@ nfs4_ff_layoutstat_start_io(struct nfs4_ff_layout_mirror *mirror, struct nfs4_flexfile_layout *ffl = FF_LAYOUT_FROM_HDR(mirror->layout); nfs4_ff_start_busy_timer(&layoutstat->busy_timer, now); - if (!mirror->start_time) - mirror->start_time = now; + if (!mirror->dss[dss_id].start_time) + mirror->dss[dss_id].start_time = now; if (mirror->report_interval != 0) report_interval = (s64)mirror->report_interval * 1000LL; else if (layoutstats_timer != 0) @@ -675,13 +771,16 @@ nfs4_ff_layout_stat_io_update_completed(struct nfs4_ff_layoutstat *layoutstat, static void nfs4_ff_layout_stat_io_start_read(struct inode *inode, struct nfs4_ff_layout_mirror *mirror, + u32 dss_id, __u64 requested, ktime_t now) { bool report; spin_lock(&mirror->lock); - report = nfs4_ff_layoutstat_start_io(mirror, &mirror->read_stat, now); - nfs4_ff_layout_stat_io_update_requested(&mirror->read_stat, requested); + report = nfs4_ff_layoutstat_start_io( + mirror, dss_id, &mirror->dss[dss_id].read_stat, now); + nfs4_ff_layout_stat_io_update_requested( + &mirror->dss[dss_id].read_stat, requested); set_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags); spin_unlock(&mirror->lock); @@ -692,11 +791,12 @@ nfs4_ff_layout_stat_io_start_read(struct inode *inode, static void nfs4_ff_layout_stat_io_end_read(struct rpc_task *task, struct nfs4_ff_layout_mirror *mirror, + u32 dss_id, __u64 requested, __u64 completed) { spin_lock(&mirror->lock); - nfs4_ff_layout_stat_io_update_completed(&mirror->read_stat, + nfs4_ff_layout_stat_io_update_completed(&mirror->dss[dss_id].read_stat, requested, completed, ktime_get(), task->tk_start); set_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags); @@ -706,13 +806,20 @@ nfs4_ff_layout_stat_io_end_read(struct rpc_task *task, static void nfs4_ff_layout_stat_io_start_write(struct inode *inode, struct nfs4_ff_layout_mirror *mirror, + u32 dss_id, __u64 requested, ktime_t now) { bool report; spin_lock(&mirror->lock); - report = nfs4_ff_layoutstat_start_io(mirror , &mirror->write_stat, now); - nfs4_ff_layout_stat_io_update_requested(&mirror->write_stat, requested); + report = nfs4_ff_layoutstat_start_io( + mirror, + dss_id, + &mirror->dss[dss_id].write_stat, + now); + nfs4_ff_layout_stat_io_update_requested( + &mirror->dss[dss_id].write_stat, + requested); set_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags); spin_unlock(&mirror->lock); @@ -723,6 +830,7 @@ nfs4_ff_layout_stat_io_start_write(struct inode *inode, static void nfs4_ff_layout_stat_io_end_write(struct rpc_task *task, struct nfs4_ff_layout_mirror *mirror, + u32 dss_id, __u64 requested, __u64 completed, enum nfs3_stable_how committed) @@ -731,25 +839,25 @@ nfs4_ff_layout_stat_io_end_write(struct rpc_task *task, requested = completed = 0; spin_lock(&mirror->lock); - nfs4_ff_layout_stat_io_update_completed(&mirror->write_stat, + nfs4_ff_layout_stat_io_update_completed(&mirror->dss[dss_id].write_stat, requested, completed, ktime_get(), task->tk_start); set_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags); spin_unlock(&mirror->lock); } static void -ff_layout_mark_ds_unreachable(struct pnfs_layout_segment *lseg, u32 idx) +ff_layout_mark_ds_unreachable(struct pnfs_layout_segment *lseg, u32 idx, u32 dss_id) { - struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); + struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx, dss_id); if (devid) nfs4_mark_deviceid_unavailable(devid); } static void -ff_layout_mark_ds_reachable(struct pnfs_layout_segment *lseg, u32 idx) +ff_layout_mark_ds_reachable(struct pnfs_layout_segment *lseg, u32 idx, u32 dss_id) { - struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); + struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx, dss_id); if (devid) nfs4_mark_deviceid_available(devid); @@ -758,6 +866,7 @@ ff_layout_mark_ds_reachable(struct pnfs_layout_segment *lseg, u32 idx) static struct nfs4_pnfs_ds * ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg, u32 start_idx, u32 *best_idx, + u32 offset, u32 *dss_id, bool check_device) { struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg); @@ -768,12 +877,16 @@ ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg, /* mirrors are initially sorted by efficiency */ for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) { mirror = FF_LAYOUT_COMP(lseg, idx); - ds = nfs4_ff_layout_prepare_ds(lseg, mirror, false); + *dss_id = nfs4_ff_layout_calc_dss_id( + fls->stripe_unit, + fls->mirror_array[idx]->dss_count, + offset); + ds = nfs4_ff_layout_prepare_ds(lseg, mirror, *dss_id, false); if (IS_ERR(ds)) continue; if (check_device && - nfs4_test_deviceid_unavailable(&mirror->mirror_ds->id_node)) { + nfs4_test_deviceid_unavailable(&mirror->dss[*dss_id].mirror_ds->id_node)) { // reinitialize the error state in case if this is the last iteration ds = ERR_PTR(-EINVAL); continue; @@ -788,42 +901,52 @@ ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg, static struct nfs4_pnfs_ds * ff_layout_choose_any_ds_for_read(struct pnfs_layout_segment *lseg, - u32 start_idx, u32 *best_idx) + u32 start_idx, u32 *best_idx, + u32 offset, u32 *dss_id) { - return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx, false); + return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx, + offset, dss_id, false); } static struct nfs4_pnfs_ds * ff_layout_choose_valid_ds_for_read(struct pnfs_layout_segment *lseg, - u32 start_idx, u32 *best_idx) + u32 start_idx, u32 *best_idx, + u32 offset, u32 *dss_id) { - return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx, true); + return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx, + offset, dss_id, true); } static struct nfs4_pnfs_ds * ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg, - u32 start_idx, u32 *best_idx) + u32 start_idx, u32 *best_idx, + u32 offset, u32 *dss_id) { struct nfs4_pnfs_ds *ds; - ds = ff_layout_choose_valid_ds_for_read(lseg, start_idx, best_idx); + ds = ff_layout_choose_valid_ds_for_read(lseg, start_idx, best_idx, + offset, dss_id); if (!IS_ERR(ds)) return ds; - return ff_layout_choose_any_ds_for_read(lseg, start_idx, best_idx); + return ff_layout_choose_any_ds_for_read(lseg, start_idx, best_idx, + offset, dss_id); } static struct nfs4_pnfs_ds * ff_layout_get_ds_for_read(struct nfs_pageio_descriptor *pgio, - u32 *best_idx) + u32 *best_idx, + u32 offset, + u32 *dss_id) { struct pnfs_layout_segment *lseg = pgio->pg_lseg; struct nfs4_pnfs_ds *ds; ds = ff_layout_choose_best_ds_for_read(lseg, pgio->pg_mirror_idx, - best_idx); + best_idx, offset, dss_id); if (!IS_ERR(ds) || !pgio->pg_mirror_idx) return ds; - return ff_layout_choose_best_ds_for_read(lseg, 0, best_idx); + return ff_layout_choose_best_ds_for_read(lseg, 0, best_idx, + offset, dss_id); } static void @@ -842,6 +965,56 @@ ff_layout_pg_get_read(struct nfs_pageio_descriptor *pgio, } } +static bool +ff_layout_lseg_is_striped(const struct nfs4_ff_layout_segment *fls) +{ + return fls->mirror_array[0]->dss_count > 1; +} + +/* + * ff_layout_pg_test(). Called by nfs_can_coalesce_requests() + * + * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number + * of bytes (maximum @req->wb_bytes) that can be coalesced. + */ +static size_t +ff_layout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, + struct nfs_page *req) +{ + unsigned int size; + u64 p_stripe, r_stripe; + u32 stripe_offset; + u64 segment_offset = pgio->pg_lseg->pls_range.offset; + u32 stripe_unit = FF_LAYOUT_LSEG(pgio->pg_lseg)->stripe_unit; + + /* calls nfs_generic_pg_test */ + size = pnfs_generic_pg_test(pgio, prev, req); + if (!size) + return 0; + else if (!ff_layout_lseg_is_striped(FF_LAYOUT_LSEG(pgio->pg_lseg))) + return size; + + /* see if req and prev are in the same stripe */ + if (prev) { + p_stripe = (u64)req_offset(prev) - segment_offset; + r_stripe = (u64)req_offset(req) - segment_offset; + do_div(p_stripe, stripe_unit); + do_div(r_stripe, stripe_unit); + + if (p_stripe != r_stripe) + return 0; + } + + /* calculate remaining bytes in the current stripe */ + div_u64_rem((u64)req_offset(req) - segment_offset, + stripe_unit, + &stripe_offset); + WARN_ON_ONCE(stripe_offset > stripe_unit); + if (stripe_offset >= stripe_unit) + return 0; + return min(stripe_unit - (unsigned int)stripe_offset, size); +} + static void ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) @@ -849,7 +1022,7 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_pgio_mirror *pgm; struct nfs4_ff_layout_mirror *mirror; struct nfs4_pnfs_ds *ds; - u32 ds_idx; + u32 ds_idx, dss_id; if (NFS_SERVER(pgio->pg_inode)->flags & (NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR)) @@ -870,7 +1043,8 @@ retry: /* Reset wb_nio, since getting layout segment was successful */ req->wb_nio = 0; - ds = ff_layout_get_ds_for_read(pgio, &ds_idx); + ds = ff_layout_get_ds_for_read(pgio, &ds_idx, + req_offset(req), &dss_id); if (IS_ERR(ds)) { if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg)) goto out_mds; @@ -882,7 +1056,7 @@ retry: mirror = FF_LAYOUT_COMP(pgio->pg_lseg, ds_idx); pgm = &pgio->pg_mirrors[0]; - pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize; + pgm->pg_bsize = mirror->dss[dss_id].mirror_ds->ds_versions[0].rsize; pgio->pg_mirror_idx = ds_idx; return; @@ -919,7 +1093,7 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs4_ff_layout_mirror *mirror; struct nfs_pgio_mirror *pgm; struct nfs4_pnfs_ds *ds; - u32 i; + u32 i, dss_id; retry: pnfs_generic_pg_check_layout(pgio, req); @@ -944,7 +1118,12 @@ retry: for (i = 0; i < pgio->pg_mirror_count; i++) { mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i); - ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, mirror, true); + dss_id = nfs4_ff_layout_calc_dss_id( + FF_LAYOUT_LSEG(pgio->pg_lseg)->stripe_unit, + mirror->dss_count, + req_offset(req)); + ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, mirror, + dss_id, true); if (IS_ERR(ds)) { if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg)) goto out_mds; @@ -954,7 +1133,7 @@ retry: goto retry; } pgm = &pgio->pg_mirrors[i]; - pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].wsize; + pgm->pg_bsize = mirror->dss[dss_id].mirror_ds->ds_versions[0].wsize; } if (NFS_SERVER(pgio->pg_inode)->flags & @@ -1020,14 +1199,14 @@ ff_layout_pg_get_mirror_write(struct nfs_pageio_descriptor *desc, u32 idx) static const struct nfs_pageio_ops ff_layout_pg_read_ops = { .pg_init = ff_layout_pg_init_read, - .pg_test = pnfs_generic_pg_test, + .pg_test = ff_layout_pg_test, .pg_doio = pnfs_generic_pg_readpages, .pg_cleanup = pnfs_generic_pg_cleanup, }; static const struct nfs_pageio_ops ff_layout_pg_write_ops = { .pg_init = ff_layout_pg_init_write, - .pg_test = pnfs_generic_pg_test, + .pg_test = ff_layout_pg_test, .pg_doio = pnfs_generic_pg_writepages, .pg_get_mirror_count = ff_layout_pg_get_mirror_count_write, .pg_cleanup = pnfs_generic_pg_cleanup, @@ -1075,9 +1254,11 @@ static void ff_layout_resend_pnfs_read(struct nfs_pgio_header *hdr) { u32 idx = hdr->pgio_mirror_idx + 1; u32 new_idx = 0; + u32 dss_id = 0; struct nfs4_pnfs_ds *ds; - ds = ff_layout_choose_any_ds_for_read(hdr->lseg, idx, &new_idx); + ds = ff_layout_choose_any_ds_for_read(hdr->lseg, idx, &new_idx, + hdr->args.offset, &dss_id); if (IS_ERR(ds)) pnfs_error_mark_layout_for_return(hdr->inode, hdr->lseg); else @@ -1114,11 +1295,11 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, struct nfs4_state *state, struct nfs_client *clp, struct pnfs_layout_segment *lseg, - u32 idx) + u32 idx, u32 dss_id) { struct pnfs_layout_hdr *lo = lseg->pls_layout; struct inode *inode = lo->plh_inode; - struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); + struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx, dss_id); struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table; switch (op_status) { @@ -1215,9 +1396,9 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task, u32 op_status, struct nfs_client *clp, struct pnfs_layout_segment *lseg, - u32 idx) + u32 idx, u32 dss_id) { - struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); + struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx, dss_id); switch (op_status) { case NFS_OK: @@ -1281,12 +1462,12 @@ static int ff_layout_async_handle_error(struct rpc_task *task, struct nfs4_state *state, struct nfs_client *clp, struct pnfs_layout_segment *lseg, - u32 idx) + u32 idx, u32 dss_id) { int vers = clp->cl_nfs_mod->rpc_vers->number; if (task->tk_status >= 0) { - ff_layout_mark_ds_reachable(lseg, idx); + ff_layout_mark_ds_reachable(lseg, idx, dss_id); return 0; } @@ -1297,10 +1478,10 @@ static int ff_layout_async_handle_error(struct rpc_task *task, switch (vers) { case 3: return ff_layout_async_handle_error_v3(task, op_status, clp, - lseg, idx); + lseg, idx, dss_id); case 4: return ff_layout_async_handle_error_v4(task, op_status, state, - clp, lseg, idx); + clp, lseg, idx, dss_id); default: /* should never happen */ WARN_ON_ONCE(1); @@ -1309,7 +1490,7 @@ static int ff_layout_async_handle_error(struct rpc_task *task, } static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, - u32 idx, u64 offset, u64 length, + u32 idx, u32 dss_id, u64 offset, u64 length, u32 *op_status, int opnum, int error) { struct nfs4_ff_layout_mirror *mirror; @@ -1347,7 +1528,7 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, mirror = FF_LAYOUT_COMP(lseg, idx); err = ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout), - mirror, offset, length, status, opnum, + mirror, dss_id, offset, length, status, opnum, nfs_io_gfp_mask()); switch (status) { @@ -1356,7 +1537,7 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, case NFS4ERR_PERM: break; case NFS4ERR_NXIO: - ff_layout_mark_ds_unreachable(lseg, idx); + ff_layout_mark_ds_unreachable(lseg, idx, dss_id); /* * Don't return the layout if this is a read and we still * have layouts to try @@ -1376,10 +1557,16 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, static int ff_layout_read_done_cb(struct rpc_task *task, struct nfs_pgio_header *hdr) { + struct nfs4_ff_layout_segment *flseg = FF_LAYOUT_LSEG(hdr->lseg); + u32 dss_id = nfs4_ff_layout_calc_dss_id( + flseg->stripe_unit, + flseg->mirror_array[hdr->pgio_mirror_idx]->dss_count, + hdr->args.offset); int err; if (task->tk_status < 0) { - ff_layout_io_track_ds_error(hdr->lseg, hdr->pgio_mirror_idx, + ff_layout_io_track_ds_error(hdr->lseg, + hdr->pgio_mirror_idx, dss_id, hdr->args.offset, hdr->args.count, &hdr->res.op_status, OP_READ, task->tk_status); @@ -1389,7 +1576,8 @@ static int ff_layout_read_done_cb(struct rpc_task *task, err = ff_layout_async_handle_error(task, hdr->res.op_status, hdr->args.context->state, hdr->ds_clp, hdr->lseg, - hdr->pgio_mirror_idx); + hdr->pgio_mirror_idx, + dss_id); trace_nfs4_pnfs_read(hdr, err); clear_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags); @@ -1445,23 +1633,47 @@ ff_layout_set_layoutcommit(struct inode *inode, static void ff_layout_read_record_layoutstats_start(struct rpc_task *task, struct nfs_pgio_header *hdr) { + struct nfs4_ff_layout_mirror *mirror; + u32 dss_id; + if (test_and_set_bit(NFS_IOHDR_STAT, &hdr->flags)) return; - nfs4_ff_layout_stat_io_start_read(hdr->inode, - FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), - hdr->args.count, - task->tk_start); + + mirror = FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx); + dss_id = nfs4_ff_layout_calc_dss_id( + FF_LAYOUT_LSEG(hdr->lseg)->stripe_unit, + mirror->dss_count, + hdr->args.offset); + + nfs4_ff_layout_stat_io_start_read( + hdr->inode, + mirror, + dss_id, + hdr->args.count, + task->tk_start); } static void ff_layout_read_record_layoutstats_done(struct rpc_task *task, struct nfs_pgio_header *hdr) { + struct nfs4_ff_layout_mirror *mirror; + u32 dss_id; + if (!test_and_clear_bit(NFS_IOHDR_STAT, &hdr->flags)) return; - nfs4_ff_layout_stat_io_end_read(task, - FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), - hdr->args.count, - hdr->res.count); + + mirror = FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx); + dss_id = nfs4_ff_layout_calc_dss_id( + FF_LAYOUT_LSEG(hdr->lseg)->stripe_unit, + mirror->dss_count, + hdr->args.offset); + + nfs4_ff_layout_stat_io_end_read( + task, + mirror, + dss_id, + hdr->args.count, + hdr->res.count); set_bit(NFS_LSEG_LAYOUTRETURN, &hdr->lseg->pls_flags); } @@ -1549,11 +1761,17 @@ static void ff_layout_read_release(void *data) static int ff_layout_write_done_cb(struct rpc_task *task, struct nfs_pgio_header *hdr) { + struct nfs4_ff_layout_segment *flseg = FF_LAYOUT_LSEG(hdr->lseg); + u32 dss_id = nfs4_ff_layout_calc_dss_id( + flseg->stripe_unit, + flseg->mirror_array[hdr->pgio_mirror_idx]->dss_count, + hdr->args.offset); loff_t end_offs = 0; int err; if (task->tk_status < 0) { - ff_layout_io_track_ds_error(hdr->lseg, hdr->pgio_mirror_idx, + ff_layout_io_track_ds_error(hdr->lseg, + hdr->pgio_mirror_idx, dss_id, hdr->args.offset, hdr->args.count, &hdr->res.op_status, OP_WRITE, task->tk_status); @@ -1563,7 +1781,8 @@ static int ff_layout_write_done_cb(struct rpc_task *task, err = ff_layout_async_handle_error(task, hdr->res.op_status, hdr->args.context->state, hdr->ds_clp, hdr->lseg, - hdr->pgio_mirror_idx); + hdr->pgio_mirror_idx, + dss_id); trace_nfs4_pnfs_write(hdr, err); clear_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags); @@ -1601,9 +1820,11 @@ static int ff_layout_commit_done_cb(struct rpc_task *task, struct nfs_commit_data *data) { int err; + u32 idx = calc_mirror_idx_from_commit(data->lseg, data->ds_commit_index); + u32 dss_id = calc_dss_id_from_commit(data->lseg, data->ds_commit_index); if (task->tk_status < 0) { - ff_layout_io_track_ds_error(data->lseg, data->ds_commit_index, + ff_layout_io_track_ds_error(data->lseg, idx, dss_id, data->args.offset, data->args.count, &data->res.op_status, OP_COMMIT, task->tk_status); @@ -1611,8 +1832,8 @@ static int ff_layout_commit_done_cb(struct rpc_task *task, } err = ff_layout_async_handle_error(task, data->res.op_status, - NULL, data->ds_clp, data->lseg, - data->ds_commit_index); + NULL, data->ds_clp, data->lseg, idx, + dss_id); trace_nfs4_pnfs_commit_ds(data, err); switch (err) { @@ -1631,30 +1852,54 @@ static int ff_layout_commit_done_cb(struct rpc_task *task, } ff_layout_set_layoutcommit(data->inode, data->lseg, data->lwb); - return 0; } static void ff_layout_write_record_layoutstats_start(struct rpc_task *task, struct nfs_pgio_header *hdr) { + struct nfs4_ff_layout_mirror *mirror; + u32 dss_id; + if (test_and_set_bit(NFS_IOHDR_STAT, &hdr->flags)) return; - nfs4_ff_layout_stat_io_start_write(hdr->inode, - FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), - hdr->args.count, - task->tk_start); + + mirror = FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx); + dss_id = nfs4_ff_layout_calc_dss_id( + FF_LAYOUT_LSEG(hdr->lseg)->stripe_unit, + mirror->dss_count, + hdr->args.offset); + + nfs4_ff_layout_stat_io_start_write( + hdr->inode, + mirror, + dss_id, + hdr->args.count, + task->tk_start); } static void ff_layout_write_record_layoutstats_done(struct rpc_task *task, struct nfs_pgio_header *hdr) { + struct nfs4_ff_layout_mirror *mirror; + u32 dss_id; + if (!test_and_clear_bit(NFS_IOHDR_STAT, &hdr->flags)) return; - nfs4_ff_layout_stat_io_end_write(task, - FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), - hdr->args.count, hdr->res.count, - hdr->res.verf->committed); + + mirror = FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx); + dss_id = nfs4_ff_layout_calc_dss_id( + FF_LAYOUT_LSEG(hdr->lseg)->stripe_unit, + mirror->dss_count, + hdr->args.offset); + + nfs4_ff_layout_stat_io_end_write( + task, + mirror, + dss_id, + hdr->args.count, + hdr->res.count, + hdr->res.verf->committed); set_bit(NFS_LSEG_LAYOUTRETURN, &hdr->lseg->pls_flags); } @@ -1737,10 +1982,16 @@ static void ff_layout_write_release(void *data) static void ff_layout_commit_record_layoutstats_start(struct rpc_task *task, struct nfs_commit_data *cdata) { + u32 idx, dss_id; + if (test_and_set_bit(NFS_IOHDR_STAT, &cdata->flags)) return; + + idx = calc_mirror_idx_from_commit(cdata->lseg, cdata->ds_commit_index); + dss_id = calc_dss_id_from_commit(cdata->lseg, cdata->ds_commit_index); nfs4_ff_layout_stat_io_start_write(cdata->inode, - FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index), + FF_LAYOUT_COMP(cdata->lseg, idx), + dss_id, 0, task->tk_start); } @@ -1749,6 +2000,7 @@ static void ff_layout_commit_record_layoutstats_done(struct rpc_task *task, { struct nfs_page *req; __u64 count = 0; + u32 idx, dss_id; if (!test_and_clear_bit(NFS_IOHDR_STAT, &cdata->flags)) return; @@ -1757,8 +2009,12 @@ static void ff_layout_commit_record_layoutstats_done(struct rpc_task *task, list_for_each_entry(req, &cdata->pages, wb_list) count += req->wb_bytes; } + + idx = calc_mirror_idx_from_commit(cdata->lseg, cdata->ds_commit_index); + dss_id = calc_dss_id_from_commit(cdata->lseg, cdata->ds_commit_index); nfs4_ff_layout_stat_io_end_write(task, - FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index), + FF_LAYOUT_COMP(cdata->lseg, idx), + dss_id, count, count, NFS_FILE_SYNC); set_bit(NFS_LSEG_LAYOUTRETURN, &cdata->lseg->pls_flags); } @@ -1872,6 +2128,7 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) u32 idx = hdr->pgio_mirror_idx; int vers; struct nfs_fh *fh; + u32 dss_id; bool ds_fatal_error = false; dprintk("--> %s ino %lu pgbase %u req %zu@%llu\n", @@ -1879,22 +2136,26 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) hdr->args.pgbase, (size_t)hdr->args.count, offset); mirror = FF_LAYOUT_COMP(lseg, idx); - ds = nfs4_ff_layout_prepare_ds(lseg, mirror, false); + dss_id = nfs4_ff_layout_calc_dss_id( + FF_LAYOUT_LSEG(lseg)->stripe_unit, + mirror->dss_count, + offset); + ds = nfs4_ff_layout_prepare_ds(lseg, mirror, dss_id, false); if (IS_ERR(ds)) { ds_fatal_error = nfs_error_is_fatal(PTR_ERR(ds)); goto out_failed; } ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp, - hdr->inode); + hdr->inode, dss_id); if (IS_ERR(ds_clnt)) goto out_failed; - ds_cred = ff_layout_get_ds_cred(mirror, &lseg->pls_range, hdr->cred); + ds_cred = ff_layout_get_ds_cred(mirror, &lseg->pls_range, hdr->cred, dss_id); if (!ds_cred) goto out_failed; - vers = nfs4_ff_layout_ds_version(mirror); + vers = nfs4_ff_layout_ds_version(mirror, dss_id); dprintk("%s USE DS: %s cl_count %d vers %d\n", __func__, ds->ds_remotestr, refcount_read(&ds->ds_clp->cl_count), vers); @@ -1902,11 +2163,11 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) hdr->pgio_done_cb = ff_layout_read_done_cb; refcount_inc(&ds->ds_clp->cl_count); hdr->ds_clp = ds->ds_clp; - fh = nfs4_ff_layout_select_ds_fh(mirror); + fh = nfs4_ff_layout_select_ds_fh(mirror, dss_id); if (fh) hdr->args.fh = fh; - nfs4_ff_layout_select_ds_stateid(mirror, &hdr->args.stateid); + nfs4_ff_layout_select_ds_stateid(mirror, dss_id, &hdr->args.stateid); /* * Note that if we ever decide to split across DSes, @@ -1916,7 +2177,8 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) hdr->mds_offset = offset; /* Start IO accounting for local read */ - localio = ff_local_open_fh(lseg, idx, ds->ds_clp, ds_cred, fh, FMODE_READ); + localio = ff_local_open_fh(lseg, idx, dss_id, ds->ds_clp, ds_cred, fh, + FMODE_READ); if (localio) { hdr->task.tk_start = ktime_get(); ff_layout_read_record_layoutstats_start(&hdr->task, hdr); @@ -1953,25 +2215,30 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync) int vers; struct nfs_fh *fh; u32 idx = hdr->pgio_mirror_idx; + u32 dss_id; bool ds_fatal_error = false; mirror = FF_LAYOUT_COMP(lseg, idx); - ds = nfs4_ff_layout_prepare_ds(lseg, mirror, true); + dss_id = nfs4_ff_layout_calc_dss_id( + FF_LAYOUT_LSEG(lseg)->stripe_unit, + mirror->dss_count, + offset); + ds = nfs4_ff_layout_prepare_ds(lseg, mirror, dss_id, true); if (IS_ERR(ds)) { ds_fatal_error = nfs_error_is_fatal(PTR_ERR(ds)); goto out_failed; } ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp, - hdr->inode); + hdr->inode, dss_id); if (IS_ERR(ds_clnt)) goto out_failed; - ds_cred = ff_layout_get_ds_cred(mirror, &lseg->pls_range, hdr->cred); + ds_cred = ff_layout_get_ds_cred(mirror, &lseg->pls_range, hdr->cred, dss_id); if (!ds_cred) goto out_failed; - vers = nfs4_ff_layout_ds_version(mirror); + vers = nfs4_ff_layout_ds_version(mirror, dss_id); dprintk("%s ino %lu sync %d req %zu@%llu DS: %s cl_count %d vers %d\n", __func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count, @@ -1981,12 +2248,12 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync) hdr->pgio_done_cb = ff_layout_write_done_cb; refcount_inc(&ds->ds_clp->cl_count); hdr->ds_clp = ds->ds_clp; - hdr->ds_commit_idx = idx; - fh = nfs4_ff_layout_select_ds_fh(mirror); + hdr->ds_commit_idx = calc_commit_idx(lseg, idx, dss_id); + fh = nfs4_ff_layout_select_ds_fh(mirror, dss_id); if (fh) hdr->args.fh = fh; - nfs4_ff_layout_select_ds_stateid(mirror, &hdr->args.stateid); + nfs4_ff_layout_select_ds_stateid(mirror, dss_id, &hdr->args.stateid); /* * Note that if we ever decide to split across DSes, @@ -1995,7 +2262,7 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync) hdr->args.offset = offset; /* Start IO accounting for local write */ - localio = ff_local_open_fh(lseg, idx, ds->ds_clp, ds_cred, fh, + localio = ff_local_open_fh(lseg, idx, dss_id, ds->ds_clp, ds_cred, fh, FMODE_READ|FMODE_WRITE); if (localio) { hdr->task.tk_start = ktime_get(); @@ -2019,20 +2286,15 @@ out_failed: return PNFS_NOT_ATTEMPTED; } -static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i) -{ - return i; -} - static struct nfs_fh * -select_ds_fh_from_commit(struct pnfs_layout_segment *lseg, u32 i) +select_ds_fh_from_commit(struct pnfs_layout_segment *lseg, u32 i, u32 dss_id) { struct nfs4_ff_layout_segment *flseg = FF_LAYOUT_LSEG(lseg); /* FIXME: Assume that there is only one NFS version available * for the DS. */ - return &flseg->mirror_array[i]->fh_versions[0]; + return &flseg->mirror_array[i]->dss[dss_id].fh_versions[0]; } static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how) @@ -2043,7 +2305,7 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how) struct nfsd_file *localio; struct nfs4_ff_layout_mirror *mirror; const struct cred *ds_cred; - u32 idx; + u32 idx, dss_id; int vers, ret; struct nfs_fh *fh; @@ -2051,22 +2313,23 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how) test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))) goto out_err; - idx = calc_ds_index_from_commit(lseg, data->ds_commit_index); + idx = calc_mirror_idx_from_commit(lseg, data->ds_commit_index); mirror = FF_LAYOUT_COMP(lseg, idx); - ds = nfs4_ff_layout_prepare_ds(lseg, mirror, true); + dss_id = calc_dss_id_from_commit(lseg, data->ds_commit_index); + ds = nfs4_ff_layout_prepare_ds(lseg, mirror, dss_id, true); if (IS_ERR(ds)) goto out_err; ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp, - data->inode); + data->inode, dss_id); if (IS_ERR(ds_clnt)) goto out_err; - ds_cred = ff_layout_get_ds_cred(mirror, &lseg->pls_range, data->cred); + ds_cred = ff_layout_get_ds_cred(mirror, &lseg->pls_range, data->cred, dss_id); if (!ds_cred) goto out_err; - vers = nfs4_ff_layout_ds_version(mirror); + vers = nfs4_ff_layout_ds_version(mirror, dss_id); dprintk("%s ino %lu, how %d cl_count %d vers %d\n", __func__, data->inode->i_ino, how, refcount_read(&ds->ds_clp->cl_count), @@ -2075,12 +2338,12 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how) data->cred = ds_cred; refcount_inc(&ds->ds_clp->cl_count); data->ds_clp = ds->ds_clp; - fh = select_ds_fh_from_commit(lseg, data->ds_commit_index); + fh = select_ds_fh_from_commit(lseg, idx, dss_id); if (fh) data->args.fh = fh; /* Start IO accounting for local commit */ - localio = ff_local_open_fh(lseg, idx, ds->ds_clp, ds_cred, fh, + localio = ff_local_open_fh(lseg, idx, dss_id, ds->ds_clp, ds_cred, fh, FMODE_READ|FMODE_WRITE); if (localio) { data->task.tk_start = ktime_get(); @@ -2144,25 +2407,28 @@ static void ff_layout_cancel_io(struct pnfs_layout_segment *lseg) struct nfs4_pnfs_ds *ds; struct nfs_client *ds_clp; struct rpc_clnt *clnt; - u32 idx; + u32 idx, dss_id; for (idx = 0; idx < flseg->mirror_array_cnt; idx++) { mirror = flseg->mirror_array[idx]; - mirror_ds = mirror->mirror_ds; - if (IS_ERR_OR_NULL(mirror_ds)) - continue; - ds = mirror->mirror_ds->ds; - if (!ds) - continue; - ds_clp = ds->ds_clp; - if (!ds_clp) - continue; - clnt = ds_clp->cl_rpcclient; - if (!clnt) - continue; - if (!rpc_cancel_tasks(clnt, -EAGAIN, ff_layout_match_io, lseg)) - continue; - rpc_clnt_disconnect(clnt); + for (dss_id = 0; dss_id < mirror->dss_count; dss_id++) { + mirror_ds = mirror->dss[dss_id].mirror_ds; + if (IS_ERR_OR_NULL(mirror_ds)) + continue; + ds = mirror->dss[dss_id].mirror_ds->ds; + if (!ds) + continue; + ds_clp = ds->ds_clp; + if (!ds_clp) + continue; + clnt = ds_clp->cl_rpcclient; + if (!clnt) + continue; + if (!rpc_cancel_tasks(clnt, -EAGAIN, + ff_layout_match_io, lseg)) + continue; + rpc_clnt_disconnect(clnt); + } } } @@ -2184,8 +2450,9 @@ ff_layout_setup_ds_info(struct pnfs_ds_commit_info *fl_cinfo, struct nfs4_ff_layout_segment *flseg = FF_LAYOUT_LSEG(lseg); struct inode *inode = lseg->pls_layout->plh_inode; struct pnfs_commit_array *array, *new; + u32 size = flseg->mirror_array_cnt * flseg->mirror_array[0]->dss_count; - new = pnfs_alloc_commit_array(flseg->mirror_array_cnt, + new = pnfs_alloc_commit_array(size, nfs_io_gfp_mask()); if (new) { spin_lock(&inode->i_lock); @@ -2549,11 +2816,11 @@ ff_layout_encode_io_latency(struct xdr_stream *xdr, static void ff_layout_encode_ff_layoutupdate(struct xdr_stream *xdr, const struct nfs42_layoutstat_devinfo *devinfo, - struct nfs4_ff_layout_mirror *mirror) + struct nfs4_ff_layout_ds_stripe *dss_info) { struct nfs4_pnfs_ds_addr *da; - struct nfs4_pnfs_ds *ds = mirror->mirror_ds->ds; - struct nfs_fh *fh = &mirror->fh_versions[0]; + struct nfs4_pnfs_ds *ds = dss_info->mirror_ds->ds; + struct nfs_fh *fh = &dss_info->fh_versions[0]; __be32 *p; da = list_first_entry(&ds->ds_addrs, struct nfs4_pnfs_ds_addr, da_node); @@ -2565,13 +2832,17 @@ ff_layout_encode_ff_layoutupdate(struct xdr_stream *xdr, p = xdr_reserve_space(xdr, 4 + fh->size); xdr_encode_opaque(p, fh->data, fh->size); /* ff_io_latency4 read */ - spin_lock(&mirror->lock); - ff_layout_encode_io_latency(xdr, &mirror->read_stat.io_stat); + spin_lock(&dss_info->mirror->lock); + ff_layout_encode_io_latency(xdr, + &dss_info->read_stat.io_stat); /* ff_io_latency4 write */ - ff_layout_encode_io_latency(xdr, &mirror->write_stat.io_stat); - spin_unlock(&mirror->lock); + ff_layout_encode_io_latency(xdr, + &dss_info->write_stat.io_stat); + spin_unlock(&dss_info->mirror->lock); /* nfstime4 */ - ff_layout_encode_nfstime(xdr, ktime_sub(ktime_get(), mirror->start_time)); + ff_layout_encode_nfstime(xdr, + ktime_sub(ktime_get(), + dss_info->start_time)); /* bool */ p = xdr_reserve_space(xdr, 4); *p = cpu_to_be32(false); @@ -2595,7 +2866,8 @@ ff_layout_encode_layoutstats(struct xdr_stream *xdr, const void *args, static void ff_layout_free_layoutstats(struct nfs4_xdr_opaque_data *opaque) { - struct nfs4_ff_layout_mirror *mirror = opaque->data; + struct nfs4_ff_layout_ds_stripe *dss_info = opaque->data; + struct nfs4_ff_layout_mirror *mirror = dss_info->mirror; ff_layout_put_mirror(mirror); } @@ -2612,37 +2884,47 @@ ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo, { struct nfs4_flexfile_layout *ff_layout = FF_LAYOUT_FROM_HDR(lo); struct nfs4_ff_layout_mirror *mirror; + struct nfs4_ff_layout_ds_stripe *dss_info; struct nfs4_deviceid_node *dev; - int i = 0; + int i = 0, dss_id; list_for_each_entry(mirror, &ff_layout->mirrors, mirrors) { - if (i >= dev_limit) - break; - if (IS_ERR_OR_NULL(mirror->mirror_ds)) - continue; - if (!test_and_clear_bit(NFS4_FF_MIRROR_STAT_AVAIL, - &mirror->flags) && - type != NFS4_FF_OP_LAYOUTRETURN) - continue; - /* mirror refcount put in cleanup_layoutstats */ - if (!refcount_inc_not_zero(&mirror->ref)) - continue; - dev = &mirror->mirror_ds->id_node; - memcpy(&devinfo->dev_id, &dev->deviceid, NFS4_DEVICEID4_SIZE); - devinfo->offset = 0; - devinfo->length = NFS4_MAX_UINT64; - spin_lock(&mirror->lock); - devinfo->read_count = mirror->read_stat.io_stat.ops_completed; - devinfo->read_bytes = mirror->read_stat.io_stat.bytes_completed; - devinfo->write_count = mirror->write_stat.io_stat.ops_completed; - devinfo->write_bytes = mirror->write_stat.io_stat.bytes_completed; - spin_unlock(&mirror->lock); - devinfo->layout_type = LAYOUT_FLEX_FILES; - devinfo->ld_private.ops = &layoutstat_ops; - devinfo->ld_private.data = mirror; - - devinfo++; - i++; + for (dss_id = 0; dss_id < mirror->dss_count; ++dss_id) { + dss_info = &mirror->dss[dss_id]; + if (i >= dev_limit) + break; + if (IS_ERR_OR_NULL(dss_info->mirror_ds)) + continue; + if (!test_and_clear_bit(NFS4_FF_MIRROR_STAT_AVAIL, + &mirror->flags) && + type != NFS4_FF_OP_LAYOUTRETURN) + continue; + /* mirror refcount put in cleanup_layoutstats */ + if (!refcount_inc_not_zero(&mirror->ref)) + continue; + dev = &dss_info->mirror_ds->id_node; + memcpy(&devinfo->dev_id, + &dev->deviceid, + NFS4_DEVICEID4_SIZE); + devinfo->offset = 0; + devinfo->length = NFS4_MAX_UINT64; + spin_lock(&mirror->lock); + devinfo->read_count = + dss_info->read_stat.io_stat.ops_completed; + devinfo->read_bytes = + dss_info->read_stat.io_stat.bytes_completed; + devinfo->write_count = + dss_info->write_stat.io_stat.ops_completed; + devinfo->write_bytes = + dss_info->write_stat.io_stat.bytes_completed; + spin_unlock(&mirror->lock); + devinfo->layout_type = LAYOUT_FLEX_FILES; + devinfo->ld_private.ops = &layoutstat_ops; + devinfo->ld_private.data = &mirror->dss[dss_id]; + + devinfo++; + i++; + } } return i; } diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index 095df09017a5..17a008c8e97c 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -21,6 +21,8 @@ * due to network error etc. */ #define NFS4_FLEXFILE_LAYOUT_MAX_MIRROR_CNT 4096 +#define NFS4_FLEXFILE_LAYOUT_MAX_STRIPE_CNT 4096 + /* LAYOUTSTATS report interval in ms */ #define FF_LAYOUTSTATS_REPORT_INTERVAL (60000L) #define FF_LAYOUTSTATS_MAXDEV 4 @@ -71,12 +73,12 @@ struct nfs4_ff_layoutstat { struct nfs4_ff_busy_timer busy_timer; }; -struct nfs4_ff_layout_mirror { - struct pnfs_layout_hdr *layout; - struct list_head mirrors; - u32 ds_count; - u32 efficiency; +struct nfs4_ff_layout_mirror; + +struct nfs4_ff_layout_ds_stripe { + struct nfs4_ff_layout_mirror *mirror; struct nfs4_deviceid devid; + u32 efficiency; struct nfs4_ff_layout_ds *mirror_ds; u32 fh_versions_cnt; struct nfs_fh *fh_versions; @@ -84,12 +86,19 @@ struct nfs4_ff_layout_mirror { const struct cred __rcu *ro_cred; const struct cred __rcu *rw_cred; struct nfs_file_localio nfl; - refcount_t ref; - spinlock_t lock; - unsigned long flags; struct nfs4_ff_layoutstat read_stat; struct nfs4_ff_layoutstat write_stat; ktime_t start_time; +}; + +struct nfs4_ff_layout_mirror { + struct pnfs_layout_hdr *layout; + struct list_head mirrors; + u32 dss_count; + struct nfs4_ff_layout_ds_stripe *dss; + refcount_t ref; + spinlock_t lock; + unsigned long flags; u32 report_interval; }; @@ -150,12 +159,12 @@ FF_LAYOUT_COMP(struct pnfs_layout_segment *lseg, u32 idx) } static inline struct nfs4_deviceid_node * -FF_LAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg, u32 idx) +FF_LAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg, u32 idx, u32 dss_id) { struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, idx); if (mirror != NULL) { - struct nfs4_ff_layout_ds *mirror_ds = mirror->mirror_ds; + struct nfs4_ff_layout_ds *mirror_ds = mirror->dss[dss_id].mirror_ds; if (!IS_ERR_OR_NULL(mirror_ds)) return &mirror_ds->id_node; @@ -182,9 +191,22 @@ ff_layout_no_read_on_rw(struct pnfs_layout_segment *lseg) } static inline int -nfs4_ff_layout_ds_version(const struct nfs4_ff_layout_mirror *mirror) +nfs4_ff_layout_ds_version(const struct nfs4_ff_layout_mirror *mirror, u32 dss_id) +{ + return mirror->dss[dss_id].mirror_ds->ds_versions[0].version; +} + +static inline u32 +nfs4_ff_layout_calc_dss_id(const u64 stripe_unit, const u32 dss_count, const loff_t offset) { - return mirror->mirror_ds->ds_versions[0].version; + u64 tmp = offset; + + if (dss_count == 1 || stripe_unit == 0) + return 0; + + do_div(tmp, stripe_unit); + + return do_div(tmp, dss_count); } struct nfs4_ff_layout_ds * @@ -193,9 +215,9 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, void nfs4_ff_layout_put_deviceid(struct nfs4_ff_layout_ds *mirror_ds); void nfs4_ff_layout_free_deviceid(struct nfs4_ff_layout_ds *mirror_ds); int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo, - struct nfs4_ff_layout_mirror *mirror, u64 offset, - u64 length, int status, enum nfs_opnum4 opnum, - gfp_t gfp_flags); + struct nfs4_ff_layout_mirror *mirror, + u32 dss_id, u64 offset, u64 length, int status, + enum nfs_opnum4 opnum, gfp_t gfp_flags); void ff_layout_send_layouterror(struct pnfs_layout_segment *lseg); int ff_layout_encode_ds_ioerr(struct xdr_stream *xdr, const struct list_head *head); void ff_layout_free_ds_ioerr(struct list_head *head); @@ -204,23 +226,27 @@ unsigned int ff_layout_fetch_ds_ioerr(struct pnfs_layout_hdr *lo, struct list_head *head, unsigned int maxnum); struct nfs_fh * -nfs4_ff_layout_select_ds_fh(struct nfs4_ff_layout_mirror *mirror); +nfs4_ff_layout_select_ds_fh(struct nfs4_ff_layout_mirror *mirror, u32 dss_id); void nfs4_ff_layout_select_ds_stateid(const struct nfs4_ff_layout_mirror *mirror, - nfs4_stateid *stateid); + u32 dss_id, + nfs4_stateid *stateid); struct nfs4_pnfs_ds * nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, struct nfs4_ff_layout_mirror *mirror, + u32 dss_id, bool fail_return); struct rpc_clnt * nfs4_ff_find_or_create_ds_client(struct nfs4_ff_layout_mirror *mirror, struct nfs_client *ds_clp, - struct inode *inode); + struct inode *inode, + u32 dss_id); const struct cred *ff_layout_get_ds_cred(struct nfs4_ff_layout_mirror *mirror, const struct pnfs_layout_range *range, - const struct cred *mdscred); + const struct cred *mdscred, + u32 dss_id); bool ff_layout_avoid_mds_available_ds(struct pnfs_layout_segment *lseg); bool ff_layout_avoid_read_on_rw(struct pnfs_layout_segment *lseg); diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index 30365ec782bb..c55ea8fa3bfa 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -44,7 +44,7 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, { struct xdr_stream stream; struct xdr_buf buf; - struct page *scratch; + struct folio *scratch; struct list_head dsaddrs; struct nfs4_pnfs_ds_addr *da; struct nfs4_ff_layout_ds *new_ds = NULL; @@ -56,7 +56,7 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, int i, ret = -ENOMEM; /* set up xdr stream */ - scratch = alloc_page(gfp_flags); + scratch = folio_alloc(gfp_flags, 0); if (!scratch) goto out_err; @@ -70,7 +70,7 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, INIT_LIST_HEAD(&dsaddrs); xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen); - xdr_set_scratch_page(&stream, scratch); + xdr_set_scratch_folio(&stream, scratch); /* multipath count */ p = xdr_inline_decode(&stream, 4); @@ -163,7 +163,7 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, kfree(da); } - __free_page(scratch); + folio_put(scratch); return new_ds; out_err_drain_dsaddrs: @@ -177,7 +177,7 @@ out_err_drain_dsaddrs: kfree(ds_versions); out_scratch: - __free_page(scratch); + folio_put(scratch); out_err: kfree(new_ds); @@ -250,16 +250,16 @@ ff_layout_add_ds_error_locked(struct nfs4_flexfile_layout *flo, } int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo, - struct nfs4_ff_layout_mirror *mirror, u64 offset, - u64 length, int status, enum nfs_opnum4 opnum, - gfp_t gfp_flags) + struct nfs4_ff_layout_mirror *mirror, + u32 dss_id, u64 offset, u64 length, int status, + enum nfs_opnum4 opnum, gfp_t gfp_flags) { struct nfs4_ff_layout_ds_err *dserr; if (status == 0) return 0; - if (IS_ERR_OR_NULL(mirror->mirror_ds)) + if (IS_ERR_OR_NULL(mirror->dss[dss_id].mirror_ds)) return -EINVAL; dserr = kmalloc(sizeof(*dserr), gfp_flags); @@ -271,8 +271,8 @@ int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo, dserr->length = length; dserr->status = status; dserr->opnum = opnum; - nfs4_stateid_copy(&dserr->stateid, &mirror->stateid); - memcpy(&dserr->deviceid, &mirror->mirror_ds->id_node.deviceid, + nfs4_stateid_copy(&dserr->stateid, &mirror->dss[dss_id].stateid); + memcpy(&dserr->deviceid, &mirror->dss[dss_id].mirror_ds->id_node.deviceid, NFS4_DEVICEID4_SIZE); spin_lock(&flo->generic_hdr.plh_inode->i_lock); @@ -282,14 +282,14 @@ int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo, } static const struct cred * -ff_layout_get_mirror_cred(struct nfs4_ff_layout_mirror *mirror, u32 iomode) +ff_layout_get_mirror_cred(struct nfs4_ff_layout_mirror *mirror, u32 iomode, u32 dss_id) { const struct cred *cred, __rcu **pcred; if (iomode == IOMODE_READ) - pcred = &mirror->ro_cred; + pcred = &mirror->dss[dss_id].ro_cred; else - pcred = &mirror->rw_cred; + pcred = &mirror->dss[dss_id].rw_cred; rcu_read_lock(); do { @@ -304,43 +304,45 @@ ff_layout_get_mirror_cred(struct nfs4_ff_layout_mirror *mirror, u32 iomode) } struct nfs_fh * -nfs4_ff_layout_select_ds_fh(struct nfs4_ff_layout_mirror *mirror) +nfs4_ff_layout_select_ds_fh(struct nfs4_ff_layout_mirror *mirror, u32 dss_id) { /* FIXME: For now assume there is only 1 version available for the DS */ - return &mirror->fh_versions[0]; + return &mirror->dss[dss_id].fh_versions[0]; } void nfs4_ff_layout_select_ds_stateid(const struct nfs4_ff_layout_mirror *mirror, - nfs4_stateid *stateid) + u32 dss_id, + nfs4_stateid *stateid) { - if (nfs4_ff_layout_ds_version(mirror) == 4) - nfs4_stateid_copy(stateid, &mirror->stateid); + if (nfs4_ff_layout_ds_version(mirror, dss_id) == 4) + nfs4_stateid_copy(stateid, &mirror->dss[dss_id].stateid); } static bool ff_layout_init_mirror_ds(struct pnfs_layout_hdr *lo, - struct nfs4_ff_layout_mirror *mirror) + struct nfs4_ff_layout_mirror *mirror, + u32 dss_id) { if (mirror == NULL) goto outerr; - if (mirror->mirror_ds == NULL) { + if (mirror->dss[dss_id].mirror_ds == NULL) { struct nfs4_deviceid_node *node; struct nfs4_ff_layout_ds *mirror_ds = ERR_PTR(-ENODEV); node = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode), - &mirror->devid, lo->plh_lc_cred, + &mirror->dss[dss_id].devid, lo->plh_lc_cred, GFP_KERNEL); if (node) mirror_ds = FF_LAYOUT_MIRROR_DS(node); /* check for race with another call to this function */ - if (cmpxchg(&mirror->mirror_ds, NULL, mirror_ds) && + if (cmpxchg(&mirror->dss[dss_id].mirror_ds, NULL, mirror_ds) && mirror_ds != ERR_PTR(-ENODEV)) nfs4_put_deviceid_node(node); } - if (IS_ERR(mirror->mirror_ds)) + if (IS_ERR(mirror->dss[dss_id].mirror_ds)) goto outerr; return true; @@ -352,6 +354,7 @@ outerr: * nfs4_ff_layout_prepare_ds - prepare a DS connection for an RPC call * @lseg: the layout segment we're operating on * @mirror: layout mirror describing the DS to use + * @dss_id: DS stripe id to select stripe to use * @fail_return: return layout on connect failure? * * Try to prepare a DS connection to accept an RPC call. This involves @@ -368,6 +371,7 @@ outerr: struct nfs4_pnfs_ds * nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, struct nfs4_ff_layout_mirror *mirror, + u32 dss_id, bool fail_return) { struct nfs4_pnfs_ds *ds; @@ -376,10 +380,10 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, unsigned int max_payload; int status = -EAGAIN; - if (!ff_layout_init_mirror_ds(lseg->pls_layout, mirror)) + if (!ff_layout_init_mirror_ds(lseg->pls_layout, mirror, dss_id)) goto noconnect; - ds = mirror->mirror_ds->ds; + ds = mirror->dss[dss_id].mirror_ds->ds; if (READ_ONCE(ds->ds_clp)) goto out; /* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */ @@ -388,10 +392,10 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, /* FIXME: For now we assume the server sent only one version of NFS * to use for the DS. */ - status = nfs4_pnfs_ds_connect(s, ds, &mirror->mirror_ds->id_node, + status = nfs4_pnfs_ds_connect(s, ds, &mirror->dss[dss_id].mirror_ds->id_node, dataserver_timeo, dataserver_retrans, - mirror->mirror_ds->ds_versions[0].version, - mirror->mirror_ds->ds_versions[0].minor_version); + mirror->dss[dss_id].mirror_ds->ds_versions[0].version, + mirror->dss[dss_id].mirror_ds->ds_versions[0].minor_version); /* connect success, check rsize/wsize limit */ if (!status) { @@ -404,15 +408,15 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, max_payload = nfs_block_size(rpc_max_payload(ds->ds_clp->cl_rpcclient), NULL); - if (mirror->mirror_ds->ds_versions[0].rsize > max_payload) - mirror->mirror_ds->ds_versions[0].rsize = max_payload; - if (mirror->mirror_ds->ds_versions[0].wsize > max_payload) - mirror->mirror_ds->ds_versions[0].wsize = max_payload; + if (mirror->dss[dss_id].mirror_ds->ds_versions[0].rsize > max_payload) + mirror->dss[dss_id].mirror_ds->ds_versions[0].rsize = max_payload; + if (mirror->dss[dss_id].mirror_ds->ds_versions[0].wsize > max_payload) + mirror->dss[dss_id].mirror_ds->ds_versions[0].wsize = max_payload; goto out; } noconnect: ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout), - mirror, lseg->pls_range.offset, + mirror, dss_id, lseg->pls_range.offset, lseg->pls_range.length, NFS4ERR_NXIO, OP_ILLEGAL, GFP_NOIO); ff_layout_send_layouterror(lseg); @@ -426,12 +430,13 @@ out: const struct cred * ff_layout_get_ds_cred(struct nfs4_ff_layout_mirror *mirror, const struct pnfs_layout_range *range, - const struct cred *mdscred) + const struct cred *mdscred, + u32 dss_id) { const struct cred *cred; - if (mirror && !mirror->mirror_ds->ds_versions[0].tightly_coupled) { - cred = ff_layout_get_mirror_cred(mirror, range->iomode); + if (mirror && !mirror->dss[dss_id].mirror_ds->ds_versions[0].tightly_coupled) { + cred = ff_layout_get_mirror_cred(mirror, range->iomode, dss_id); if (!cred) cred = get_cred(mdscred); } else { @@ -445,15 +450,17 @@ ff_layout_get_ds_cred(struct nfs4_ff_layout_mirror *mirror, * @mirror: pointer to the mirror * @ds_clp: nfs_client for the DS * @inode: pointer to inode + * @dss_id: DS stripe id * * Find or create a DS rpc client with th MDS server rpc client auth flavor * in the nfs_client cl_ds_clients list. */ struct rpc_clnt * nfs4_ff_find_or_create_ds_client(struct nfs4_ff_layout_mirror *mirror, - struct nfs_client *ds_clp, struct inode *inode) + struct nfs_client *ds_clp, struct inode *inode, + u32 dss_id) { - switch (mirror->mirror_ds->ds_versions[0].version) { + switch (mirror->dss[dss_id].mirror_ds->ds_versions[0].version) { case 3: /* For NFSv3 DS, flavor is set when creating DS connections */ return ds_clp->cl_rpcclient; @@ -559,16 +566,18 @@ static bool ff_read_layout_has_available_ds(struct pnfs_layout_segment *lseg) { struct nfs4_ff_layout_mirror *mirror; struct nfs4_deviceid_node *devid; - u32 idx; + u32 idx, dss_id; for (idx = 0; idx < FF_LAYOUT_MIRROR_COUNT(lseg); idx++) { mirror = FF_LAYOUT_COMP(lseg, idx); - if (mirror) { - if (!mirror->mirror_ds) + if (!mirror) + continue; + for (dss_id = 0; dss_id < mirror->dss_count; dss_id++) { + if (!mirror->dss[dss_id].mirror_ds) return true; - if (IS_ERR(mirror->mirror_ds)) + if (IS_ERR(mirror->dss[dss_id].mirror_ds)) continue; - devid = &mirror->mirror_ds->id_node; + devid = &mirror->dss[dss_id].mirror_ds->id_node; if (!nfs4_test_deviceid_unavailable(devid)) return true; } @@ -581,17 +590,21 @@ static bool ff_rw_layout_has_available_ds(struct pnfs_layout_segment *lseg) { struct nfs4_ff_layout_mirror *mirror; struct nfs4_deviceid_node *devid; - u32 idx; + u32 idx, dss_id; for (idx = 0; idx < FF_LAYOUT_MIRROR_COUNT(lseg); idx++) { mirror = FF_LAYOUT_COMP(lseg, idx); - if (!mirror || IS_ERR(mirror->mirror_ds)) - return false; - if (!mirror->mirror_ds) - continue; - devid = &mirror->mirror_ds->id_node; - if (nfs4_test_deviceid_unavailable(devid)) + if (!mirror) return false; + for (dss_id = 0; dss_id < mirror->dss_count; dss_id++) { + if (IS_ERR(mirror->dss[dss_id].mirror_ds)) + return false; + if (!mirror->dss[dss_id].mirror_ds) + continue; + devid = &mirror->dss[dss_id].mirror_ds->id_node; + if (nfs4_test_deviceid_unavailable(devid)) + return false; + } } return FF_LAYOUT_MIRROR_COUNT(lseg) != 0; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 9bdaf7f38bed..18b57c7c2f97 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1073,6 +1073,21 @@ out_no_revalidate: if (S_ISDIR(inode->i_mode)) stat->blksize = NFS_SERVER(inode)->dtsize; stat->btime = NFS_I(inode)->btime; + + /* Special handling for STATX_DIOALIGN and STATX_DIO_READ_ALIGN + * - NFS doesn't have DIO alignment constraints, avoid getting + * these DIO attrs from remote and just respond with most + * accommodating limits (so client will issue supported DIO). + * - this is unintuitive, but the most coarse-grained + * dio_offset_align is the most accommodating. + */ + if ((request_mask & (STATX_DIOALIGN | STATX_DIO_READ_ALIGN)) && + S_ISREG(inode->i_mode)) { + stat->result_mask |= STATX_DIOALIGN | STATX_DIO_READ_ALIGN; + stat->dio_mem_align = 4; /* 4-byte alignment */ + stat->dio_offset_align = PAGE_SIZE; + stat->dio_read_offset_align = stat->dio_offset_align; + } out: trace_nfs_getattr_exit(inode, err); return err; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index c0a44f389f8f..2ecd38e1d17a 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -456,6 +456,16 @@ extern int nfs_wait_bit_killable(struct wait_bit_key *key, int mode); #if IS_ENABLED(CONFIG_NFS_LOCALIO) /* localio.c */ +struct nfs_local_dio { + u32 mem_align; + u32 offset_align; + loff_t middle_offset; + loff_t end_offset; + ssize_t start_len; /* Length for misaligned first extent */ + ssize_t middle_len; /* Length for DIO-aligned middle extent */ + ssize_t end_len; /* Length for misaligned last extent */ +}; + extern void nfs_local_probe_async(struct nfs_client *); extern void nfs_local_probe_async_work(struct work_struct *); extern struct nfsd_file *nfs_local_open_fh(struct nfs_client *, diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c index 97abf62f109d..b575f0e6c7c8 100644 --- a/fs/nfs/localio.c +++ b/fs/nfs/localio.c @@ -30,6 +30,8 @@ #define NFSDBG_FACILITY NFSDBG_VFS +#define NFSLOCAL_MAX_IOS 3 + struct nfs_local_kiocb { struct kiocb kiocb; struct bio_vec *bvec; @@ -37,6 +39,14 @@ struct nfs_local_kiocb { struct work_struct work; void (*aio_complete_work)(struct work_struct *); struct nfsd_file *localio; + /* Begin mostly DIO-specific members */ + size_t end_len; + short int end_iter_index; + short int n_iters; + bool iter_is_dio_aligned[NFSLOCAL_MAX_IOS]; + loff_t offset[NFSLOCAL_MAX_IOS] ____cacheline_aligned; + struct iov_iter iters[NFSLOCAL_MAX_IOS]; + /* End mostly DIO-specific members */ }; struct nfs_local_fsync_ctx { @@ -49,11 +59,6 @@ struct nfs_local_fsync_ctx { static bool localio_enabled __read_mostly = true; module_param(localio_enabled, bool, 0644); -static bool localio_O_DIRECT_semantics __read_mostly = false; -module_param(localio_O_DIRECT_semantics, bool, 0644); -MODULE_PARM_DESC(localio_O_DIRECT_semantics, - "LOCALIO will use O_DIRECT semantics to filesystem."); - static inline bool nfs_client_is_local(const struct nfs_client *clp) { return !!rcu_access_pointer(clp->cl_uuid.net); @@ -231,13 +236,13 @@ __nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred, struct nfsd_file __rcu **pnf, const fmode_t mode) { + int status = 0; struct nfsd_file *localio; localio = nfs_open_local_fh(&clp->cl_uuid, clp->cl_rpcclient, cred, fh, nfl, pnf, mode); if (IS_ERR(localio)) { - int status = PTR_ERR(localio); - trace_nfs_local_open_fh(fh, mode, status); + status = PTR_ERR(localio); switch (status) { case -ENOMEM: case -ENXIO: @@ -247,6 +252,7 @@ __nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred, nfs_local_probe(clp); } } + trace_nfs_local_open_fh(fh, mode, status); return localio; } @@ -281,23 +287,6 @@ nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred, } EXPORT_SYMBOL_GPL(nfs_local_open_fh); -static struct bio_vec * -nfs_bvec_alloc_and_import_pagevec(struct page **pagevec, - unsigned int npages, gfp_t flags) -{ - struct bio_vec *bvec, *p; - - bvec = kmalloc_array(npages, sizeof(*bvec), flags); - if (bvec != NULL) { - for (p = bvec; npages > 0; p++, pagevec++, npages--) { - p->bv_page = *pagevec; - p->bv_len = PAGE_SIZE; - p->bv_offset = 0; - } - } - return bvec; -} - static void nfs_local_iocb_free(struct nfs_local_kiocb *iocb) { @@ -311,40 +300,191 @@ nfs_local_iocb_alloc(struct nfs_pgio_header *hdr, { struct nfs_local_kiocb *iocb; - iocb = kmalloc(sizeof(*iocb), flags); + iocb = kzalloc(sizeof(*iocb), flags); if (iocb == NULL) return NULL; - iocb->bvec = nfs_bvec_alloc_and_import_pagevec(hdr->page_array.pagevec, - hdr->page_array.npages, flags); + + iocb->bvec = kmalloc_array(hdr->page_array.npages, + sizeof(struct bio_vec), flags); if (iocb->bvec == NULL) { kfree(iocb); return NULL; } - if (localio_O_DIRECT_semantics && - test_bit(NFS_IOHDR_ODIRECT, &hdr->flags)) { - iocb->kiocb.ki_filp = file; - iocb->kiocb.ki_flags = IOCB_DIRECT; - } else - init_sync_kiocb(&iocb->kiocb, file); + init_sync_kiocb(&iocb->kiocb, file); - iocb->kiocb.ki_pos = hdr->args.offset; iocb->hdr = hdr; iocb->kiocb.ki_flags &= ~IOCB_APPEND; iocb->aio_complete_work = NULL; + iocb->end_iter_index = -1; + return iocb; } -static void -nfs_local_iter_init(struct iov_iter *i, struct nfs_local_kiocb *iocb, int dir) +static bool +nfs_is_local_dio_possible(struct nfs_local_kiocb *iocb, int rw, + size_t len, struct nfs_local_dio *local_dio) +{ + struct nfs_pgio_header *hdr = iocb->hdr; + loff_t offset = hdr->args.offset; + u32 nf_dio_mem_align, nf_dio_offset_align, nf_dio_read_offset_align; + loff_t start_end, orig_end, middle_end; + + nfs_to->nfsd_file_dio_alignment(iocb->localio, &nf_dio_mem_align, + &nf_dio_offset_align, &nf_dio_read_offset_align); + if (rw == ITER_DEST) + nf_dio_offset_align = nf_dio_read_offset_align; + + if (unlikely(!nf_dio_mem_align || !nf_dio_offset_align)) + return false; + if (unlikely(nf_dio_offset_align > PAGE_SIZE)) + return false; + if (unlikely(len < nf_dio_offset_align)) + return false; + + local_dio->mem_align = nf_dio_mem_align; + local_dio->offset_align = nf_dio_offset_align; + + start_end = round_up(offset, nf_dio_offset_align); + orig_end = offset + len; + middle_end = round_down(orig_end, nf_dio_offset_align); + + local_dio->middle_offset = start_end; + local_dio->end_offset = middle_end; + + local_dio->start_len = start_end - offset; + local_dio->middle_len = middle_end - start_end; + local_dio->end_len = orig_end - middle_end; + + if (rw == ITER_DEST) + trace_nfs_local_dio_read(hdr->inode, offset, len, local_dio); + else + trace_nfs_local_dio_write(hdr->inode, offset, len, local_dio); + return true; +} + +static bool nfs_iov_iter_aligned_bvec(const struct iov_iter *i, + unsigned int addr_mask, unsigned int len_mask) +{ + const struct bio_vec *bvec = i->bvec; + size_t skip = i->iov_offset; + size_t size = i->count; + + if (size & len_mask) + return false; + do { + size_t len = bvec->bv_len; + + if (len > size) + len = size; + if ((unsigned long)(bvec->bv_offset + skip) & addr_mask) + return false; + bvec++; + size -= len; + skip = 0; + } while (size); + + return true; +} + +/* + * Setup as many as 3 iov_iter based on extents described by @local_dio. + * Returns the number of iov_iter that were setup. + */ +static int +nfs_local_iters_setup_dio(struct nfs_local_kiocb *iocb, int rw, + unsigned int nvecs, size_t len, + struct nfs_local_dio *local_dio) +{ + int n_iters = 0; + struct iov_iter *iters = iocb->iters; + + /* Setup misaligned start? */ + if (local_dio->start_len) { + iov_iter_bvec(&iters[n_iters], rw, iocb->bvec, nvecs, len); + iters[n_iters].count = local_dio->start_len; + iocb->offset[n_iters] = iocb->hdr->args.offset; + iocb->iter_is_dio_aligned[n_iters] = false; + ++n_iters; + } + + /* Setup misaligned end? + * If so, the end is purposely setup to be issued using buffered IO + * before the middle (which will use DIO, if DIO-aligned, with AIO). + * This creates problems if/when the end results in a partial write. + * So must save index and length of end to handle this corner case. + */ + if (local_dio->end_len) { + iov_iter_bvec(&iters[n_iters], rw, iocb->bvec, nvecs, len); + iocb->offset[n_iters] = local_dio->end_offset; + iov_iter_advance(&iters[n_iters], + local_dio->start_len + local_dio->middle_len); + iocb->iter_is_dio_aligned[n_iters] = false; + /* Save index and length of end */ + iocb->end_iter_index = n_iters; + iocb->end_len = local_dio->end_len; + ++n_iters; + } + + /* Setup DIO-aligned middle to be issued last, to allow for + * DIO with AIO completion (see nfs_local_call_{read,write}). + */ + iov_iter_bvec(&iters[n_iters], rw, iocb->bvec, nvecs, len); + if (local_dio->start_len) + iov_iter_advance(&iters[n_iters], local_dio->start_len); + iters[n_iters].count -= local_dio->end_len; + iocb->offset[n_iters] = local_dio->middle_offset; + + iocb->iter_is_dio_aligned[n_iters] = + nfs_iov_iter_aligned_bvec(&iters[n_iters], + local_dio->mem_align-1, local_dio->offset_align-1); + + if (unlikely(!iocb->iter_is_dio_aligned[n_iters])) { + trace_nfs_local_dio_misaligned(iocb->hdr->inode, + iocb->hdr->args.offset, len, local_dio); + return 0; /* no DIO-aligned IO possible */ + } + ++n_iters; + + iocb->n_iters = n_iters; + return n_iters; +} + +static noinline_for_stack void +nfs_local_iters_init(struct nfs_local_kiocb *iocb, int rw) { struct nfs_pgio_header *hdr = iocb->hdr; + struct page **pagevec = hdr->page_array.pagevec; + unsigned long v, total; + unsigned int base; + size_t len; + + v = 0; + total = hdr->args.count; + base = hdr->args.pgbase; + while (total && v < hdr->page_array.npages) { + len = min_t(size_t, total, PAGE_SIZE - base); + bvec_set_page(&iocb->bvec[v], *pagevec, len, base); + total -= len; + ++pagevec; + ++v; + base = 0; + } + len = hdr->args.count - total; + + if (test_bit(NFS_IOHDR_ODIRECT, &hdr->flags)) { + struct nfs_local_dio local_dio; + + if (nfs_is_local_dio_possible(iocb, rw, len, &local_dio) && + nfs_local_iters_setup_dio(iocb, rw, v, len, &local_dio) != 0) + return; /* is DIO-aligned */ + } - iov_iter_bvec(i, dir, iocb->bvec, hdr->page_array.npages, - hdr->args.count + hdr->args.pgbase); - if (hdr->args.pgbase != 0) - iov_iter_advance(i, hdr->args.pgbase); + /* Use buffered IO */ + iocb->offset[0] = hdr->args.offset; + iov_iter_bvec(&iocb->iters[0], rw, iocb->bvec, v, len); + iocb->n_iters = 1; } static void @@ -367,10 +507,12 @@ nfs_local_pgio_init(struct nfs_pgio_header *hdr, static void nfs_local_pgio_done(struct nfs_pgio_header *hdr, long status) { + /* Must handle partial completions */ if (status >= 0) { - hdr->res.count = status; - hdr->res.op_status = NFS4_OK; - hdr->task.tk_status = 0; + hdr->res.count += status; + /* @hdr was initialized to 0 (zeroed during allocation) */ + if (hdr->task.tk_status == 0) + hdr->res.op_status = NFS4_OK; } else { hdr->res.op_status = nfs_localio_errno_to_nfs4_stat(status); hdr->task.tk_status = status; @@ -378,12 +520,18 @@ nfs_local_pgio_done(struct nfs_pgio_header *hdr, long status) } static void +nfs_local_iocb_release(struct nfs_local_kiocb *iocb) +{ + nfs_local_file_put(iocb->localio); + nfs_local_iocb_free(iocb); +} + +static void nfs_local_pgio_release(struct nfs_local_kiocb *iocb) { struct nfs_pgio_header *hdr = iocb->hdr; - nfs_local_file_put(iocb->localio); - nfs_local_iocb_free(iocb); + nfs_local_iocb_release(iocb); nfs_local_hdr_release(hdr, hdr->task.tk_ops); } @@ -405,7 +553,10 @@ nfs_local_read_done(struct nfs_local_kiocb *iocb, long status) struct nfs_pgio_header *hdr = iocb->hdr; struct file *filp = iocb->kiocb.ki_filp; - nfs_local_pgio_done(hdr, status); + if ((iocb->kiocb.ki_flags & IOCB_DIRECT) && status == -EINVAL) { + /* Underlying FS will return -EINVAL if misaligned DIO is attempted. */ + pr_info_ratelimited("nfs: Unexpected direct I/O read alignment failure\n"); + } /* * Must clear replen otherwise NFSv3 data corruption will occur @@ -434,6 +585,7 @@ static void nfs_local_read_aio_complete(struct kiocb *kiocb, long ret) struct nfs_local_kiocb *iocb = container_of(kiocb, struct nfs_local_kiocb, kiocb); + nfs_local_pgio_done(iocb->hdr, ret); nfs_local_read_done(iocb, ret); nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_read_aio_complete_work */ } @@ -444,14 +596,25 @@ static void nfs_local_call_read(struct work_struct *work) container_of(work, struct nfs_local_kiocb, work); struct file *filp = iocb->kiocb.ki_filp; const struct cred *save_cred; - struct iov_iter iter; ssize_t status; save_cred = override_creds(filp->f_cred); - nfs_local_iter_init(&iter, iocb, READ); + for (int i = 0; i < iocb->n_iters ; i++) { + if (iocb->iter_is_dio_aligned[i]) { + iocb->kiocb.ki_flags |= IOCB_DIRECT; + iocb->kiocb.ki_complete = nfs_local_read_aio_complete; + iocb->aio_complete_work = nfs_local_read_aio_complete_work; + } - status = filp->f_op->read_iter(&iocb->kiocb, &iter); + iocb->kiocb.ki_pos = iocb->offset[i]; + status = filp->f_op->read_iter(&iocb->kiocb, &iocb->iters[i]); + if (status != -EIOCBQUEUED) { + nfs_local_pgio_done(iocb->hdr, status); + if (iocb->hdr->task.tk_status) + break; + } + } revert_creds(save_cred); @@ -462,33 +625,17 @@ static void nfs_local_call_read(struct work_struct *work) } static int -nfs_do_local_read(struct nfs_pgio_header *hdr, - struct nfsd_file *localio, +nfs_local_do_read(struct nfs_local_kiocb *iocb, const struct rpc_call_ops *call_ops) { - struct nfs_local_kiocb *iocb; - struct file *file = nfs_to->nfsd_file_file(localio); - - /* Don't support filesystems without read_iter */ - if (!file->f_op->read_iter) - return -EAGAIN; + struct nfs_pgio_header *hdr = iocb->hdr; dprintk("%s: vfs_read count=%u pos=%llu\n", __func__, hdr->args.count, hdr->args.offset); - iocb = nfs_local_iocb_alloc(hdr, file, GFP_KERNEL); - if (iocb == NULL) - return -ENOMEM; - iocb->localio = localio; - nfs_local_pgio_init(hdr, call_ops); hdr->res.eof = false; - if (iocb->kiocb.ki_flags & IOCB_DIRECT) { - iocb->kiocb.ki_complete = nfs_local_read_aio_complete; - iocb->aio_complete_work = nfs_local_read_aio_complete_work; - } - INIT_WORK(&iocb->work, nfs_local_call_read); queue_work(nfslocaliod_workqueue, &iocb->work); @@ -597,7 +744,13 @@ nfs_local_write_done(struct nfs_local_kiocb *iocb, long status) dprintk("%s: wrote %ld bytes.\n", __func__, status > 0 ? status : 0); + if ((iocb->kiocb.ki_flags & IOCB_DIRECT) && status == -EINVAL) { + /* Underlying FS will return -EINVAL if misaligned DIO is attempted. */ + pr_info_ratelimited("nfs: Unexpected direct I/O write alignment failure\n"); + } + /* Handle short writes as if they are ENOSPC */ + status = hdr->res.count; if (status > 0 && status < hdr->args.count) { hdr->mds_offset += status; hdr->args.offset += status; @@ -605,11 +758,11 @@ nfs_local_write_done(struct nfs_local_kiocb *iocb, long status) hdr->args.count -= status; nfs_set_pgio_error(hdr, -ENOSPC, hdr->args.offset); status = -ENOSPC; + /* record -ENOSPC in terms of nfs_local_pgio_done */ + nfs_local_pgio_done(hdr, status); } - if (status < 0) + if (hdr->task.tk_status < 0) nfs_reset_boot_verifier(inode); - - nfs_local_pgio_done(hdr, status); } static void nfs_local_write_aio_complete_work(struct work_struct *work) @@ -626,6 +779,7 @@ static void nfs_local_write_aio_complete(struct kiocb *kiocb, long ret) struct nfs_local_kiocb *iocb = container_of(kiocb, struct nfs_local_kiocb, kiocb); + nfs_local_pgio_done(iocb->hdr, ret); nfs_local_write_done(iocb, ret); nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_write_aio_complete_work */ } @@ -637,16 +791,53 @@ static void nfs_local_call_write(struct work_struct *work) struct file *filp = iocb->kiocb.ki_filp; unsigned long old_flags = current->flags; const struct cred *save_cred; - struct iov_iter iter; ssize_t status; current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO; save_cred = override_creds(filp->f_cred); - nfs_local_iter_init(&iter, iocb, WRITE); - file_start_write(filp); - status = filp->f_op->write_iter(&iocb->kiocb, &iter); + for (int i = 0; i < iocb->n_iters ; i++) { + if (iocb->iter_is_dio_aligned[i]) { + iocb->kiocb.ki_flags |= IOCB_DIRECT; + iocb->kiocb.ki_complete = nfs_local_write_aio_complete; + iocb->aio_complete_work = nfs_local_write_aio_complete_work; + } +retry: + iocb->kiocb.ki_pos = iocb->offset[i]; + status = filp->f_op->write_iter(&iocb->kiocb, &iocb->iters[i]); + if (status != -EIOCBQUEUED) { + if (unlikely(status >= 0 && status < iocb->iters[i].count)) { + /* partial write */ + if (i == iocb->end_iter_index) { + /* Must not account partial end, otherwise, due + * to end being issued before middle: the partial + * write accounting in nfs_local_write_done() + * would incorrectly advance hdr->args.offset + */ + status = 0; + } else { + /* Partial write at start or buffered middle, + * exit early. + */ + nfs_local_pgio_done(iocb->hdr, status); + break; + } + } else if (unlikely(status == -ENOTBLK && + (iocb->kiocb.ki_flags & IOCB_DIRECT))) { + /* VFS will return -ENOTBLK if DIO WRITE fails to + * invalidate the page cache. Retry using buffered IO. + */ + iocb->kiocb.ki_flags &= ~IOCB_DIRECT; + iocb->kiocb.ki_complete = NULL; + iocb->aio_complete_work = NULL; + goto retry; + } + nfs_local_pgio_done(iocb->hdr, status); + if (iocb->hdr->task.tk_status) + break; + } + } file_end_write(filp); revert_creds(save_cred); @@ -660,26 +851,15 @@ static void nfs_local_call_write(struct work_struct *work) } static int -nfs_do_local_write(struct nfs_pgio_header *hdr, - struct nfsd_file *localio, +nfs_local_do_write(struct nfs_local_kiocb *iocb, const struct rpc_call_ops *call_ops) { - struct nfs_local_kiocb *iocb; - struct file *file = nfs_to->nfsd_file_file(localio); - - /* Don't support filesystems without write_iter */ - if (!file->f_op->write_iter) - return -EAGAIN; + struct nfs_pgio_header *hdr = iocb->hdr; dprintk("%s: vfs_write count=%u pos=%llu %s\n", __func__, hdr->args.count, hdr->args.offset, (hdr->args.stable == NFS_UNSTABLE) ? "unstable" : "stable"); - iocb = nfs_local_iocb_alloc(hdr, file, GFP_NOIO); - if (iocb == NULL) - return -ENOMEM; - iocb->localio = localio; - switch (hdr->args.stable) { default: break; @@ -694,43 +874,74 @@ nfs_do_local_write(struct nfs_pgio_header *hdr, nfs_set_local_verifier(hdr->inode, hdr->res.verf, hdr->args.stable); - if (iocb->kiocb.ki_flags & IOCB_DIRECT) { - iocb->kiocb.ki_complete = nfs_local_write_aio_complete; - iocb->aio_complete_work = nfs_local_write_aio_complete_work; - } - INIT_WORK(&iocb->work, nfs_local_call_write); queue_work(nfslocaliod_workqueue, &iocb->work); return 0; } +static struct nfs_local_kiocb * +nfs_local_iocb_init(struct nfs_pgio_header *hdr, struct nfsd_file *localio) +{ + struct file *file = nfs_to->nfsd_file_file(localio); + struct nfs_local_kiocb *iocb; + gfp_t gfp_mask; + int rw; + + if (hdr->rw_mode & FMODE_READ) { + if (!file->f_op->read_iter) + return ERR_PTR(-EOPNOTSUPP); + gfp_mask = GFP_KERNEL; + rw = ITER_DEST; + } else { + if (!file->f_op->write_iter) + return ERR_PTR(-EOPNOTSUPP); + gfp_mask = GFP_NOIO; + rw = ITER_SOURCE; + } + + iocb = nfs_local_iocb_alloc(hdr, file, gfp_mask); + if (iocb == NULL) + return ERR_PTR(-ENOMEM); + iocb->hdr = hdr; + iocb->localio = localio; + + nfs_local_iters_init(iocb, rw); + + return iocb; +} + int nfs_local_doio(struct nfs_client *clp, struct nfsd_file *localio, struct nfs_pgio_header *hdr, const struct rpc_call_ops *call_ops) { + struct nfs_local_kiocb *iocb; int status = 0; if (!hdr->args.count) return 0; + iocb = nfs_local_iocb_init(hdr, localio); + if (IS_ERR(iocb)) + return PTR_ERR(iocb); + switch (hdr->rw_mode) { case FMODE_READ: - status = nfs_do_local_read(hdr, localio, call_ops); + status = nfs_local_do_read(iocb, call_ops); break; case FMODE_WRITE: - status = nfs_do_local_write(hdr, localio, call_ops); + status = nfs_local_do_write(iocb, call_ops); break; default: dprintk("%s: invalid mode: %d\n", __func__, hdr->rw_mode); - status = -EINVAL; + status = -EOPNOTSUPP; } if (status != 0) { if (status == -EAGAIN) nfs_localio_disable_client(clp); - nfs_local_file_put(localio); + nfs_local_iocb_release(iocb); hdr->task.tk_status = status; nfs_local_hdr_release(hdr, call_ops); } diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 6e75c6c2d234..9eff09158518 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -23,8 +23,8 @@ #include <linux/nfs2.h> #include <linux/nfs_fs.h> #include <linux/nfs_common.h> -#include "nfstrace.h" #include "internal.h" +#include "nfstrace.h" #define NFSDBG_FACILITY NFSDBG_XDR diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 4ae01c10b7e2..e17d72908412 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -23,8 +23,8 @@ #include <linux/nfsacl.h> #include <linux/nfs_common.h> -#include "nfstrace.h" #include "internal.h" +#include "nfstrace.h" #define NFSDBG_FACILITY NFSDBG_XDR diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 6a0b5871ba3b..d537fb0c230e 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -1514,7 +1514,7 @@ static ssize_t _nfs42_proc_listxattrs(struct inode *inode, void *buf, ret = -ENOMEM; - res.scratch = alloc_page(GFP_KERNEL); + res.scratch = folio_alloc(GFP_KERNEL, 0); if (!res.scratch) goto out; @@ -1552,7 +1552,7 @@ out_free_pages: } kfree(pages); out_free_scratch: - __free_page(res.scratch); + folio_put(res.scratch); out: return ret; diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 4cc915d5741d..e10d83ba835e 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -1781,7 +1781,7 @@ static int nfs4_xdr_dec_listxattrs(struct rpc_rqst *rqstp, struct compound_hdr hdr; int status; - xdr_set_scratch_page(xdr, res->scratch); + xdr_set_scratch_folio(xdr, res->scratch); status = decode_compound_hdr(xdr, &hdr); if (status) diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index c9a0d1e420c6..7f43e890d356 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -456,4 +456,5 @@ const struct file_operations nfs4_file_operations = { #else .llseek = nfs_file_llseek, #endif + .fop_flags = FOP_DONTCACHE, }; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ce61253efd45..f58098417142 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -391,7 +391,9 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent *p++ = htonl(attrs); /* bitmap */ *p++ = htonl(12); /* attribute buffer length */ *p++ = htonl(NF4DIR); + spin_lock(&dentry->d_lock); p = xdr_encode_hyper(p, NFS_FILEID(d_inode(dentry->d_parent))); + spin_unlock(&dentry->d_lock); readdir->pgbase = (char *)p - (char *)start; readdir->count -= readdir->pgbase; @@ -6160,7 +6162,7 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, } /* for decoding across pages */ - res.acl_scratch = alloc_page(GFP_KERNEL); + res.acl_scratch = folio_alloc(GFP_KERNEL, 0); if (!res.acl_scratch) goto out_free; @@ -6196,7 +6198,7 @@ out_free: while (--i >= 0) __free_page(pages[i]); if (res.acl_scratch) - __free_page(res.acl_scratch); + folio_put(res.acl_scratch); kfree(pages); return ret; } @@ -7872,10 +7874,10 @@ int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, return err; do { err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW); - if (err != -NFS4ERR_DELAY) + if (err != -NFS4ERR_DELAY && err != -NFS4ERR_GRACE) break; ssleep(1); - } while (err == -NFS4ERR_DELAY); + } while (err == -NFS4ERR_DELAY || err == -NFSERR_GRACE); return nfs4_handle_delegation_recall_error(server, state, stateid, fl, err); } @@ -9442,7 +9444,7 @@ static int nfs4_verify_back_channel_attrs(struct nfs41_create_session_args *args goto out; if (rcvd->max_rqst_sz > sent->max_rqst_sz) return -EINVAL; - if (rcvd->max_resp_sz < sent->max_resp_sz) + if (rcvd->max_resp_sz > sent->max_resp_sz) return -EINVAL; if (rcvd->max_resp_sz_cached > sent->max_resp_sz_cached) return -EINVAL; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 7612e977e80b..01179f7de322 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -2744,6 +2744,9 @@ out_error: case -ENETUNREACH: nfs_mark_client_ready(clp, -EIO); break; + case -EINVAL: + nfs_mark_client_ready(clp, status); + break; default: ssleep(1); break; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 49ff98571fa5..1d0e6c10f921 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4930,7 +4930,7 @@ static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap, } /* - * The prefered block size for layout directed io + * The preferred block size for layout directed io */ static int decode_attr_layout_blksize(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) @@ -6585,7 +6585,7 @@ nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr, int status; if (res->acl_scratch != NULL) - xdr_set_scratch_page(xdr, res->acl_scratch); + xdr_set_scratch_folio(xdr, res->acl_scratch); status = decode_compound_hdr(xdr, &hdr); if (status) goto out; diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 627115179795..6ce55e8e6b67 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -45,6 +45,23 @@ { BIT(NFS_INO_LAYOUTSTATS), "LAYOUTSTATS" }, \ { BIT(NFS_INO_ODIRECT), "ODIRECT" }) +#define nfs_show_wb_flags(v) \ + __print_flags(v, "|", \ + { BIT(PG_BUSY), "BUSY" }, \ + { BIT(PG_MAPPED), "MAPPED" }, \ + { BIT(PG_FOLIO), "FOLIO" }, \ + { BIT(PG_CLEAN), "CLEAN" }, \ + { BIT(PG_COMMIT_TO_DS), "COMMIT_TO_DS" }, \ + { BIT(PG_INODE_REF), "INODE_REF" }, \ + { BIT(PG_HEADLOCK), "HEADLOCK" }, \ + { BIT(PG_TEARDOWN), "TEARDOWN" }, \ + { BIT(PG_UNLOCKPAGE), "UNLOCKPAGE" }, \ + { BIT(PG_UPTODATE), "UPTODATE" }, \ + { BIT(PG_WB_END), "WB_END" }, \ + { BIT(PG_REMOVE), "REMOVE" }, \ + { BIT(PG_CONTENDED1), "CONTENDED1" }, \ + { BIT(PG_CONTENDED2), "CONTENDED2" }) + DECLARE_EVENT_CLASS(nfs_inode_event, TP_PROTO( const struct inode *inode @@ -967,7 +984,7 @@ DECLARE_EVENT_CLASS(nfs_folio_event, __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); __entry->version = inode_peek_iversion_raw(inode); - __entry->offset = offset, + __entry->offset = offset; __entry->count = count; ), @@ -1017,8 +1034,8 @@ DECLARE_EVENT_CLASS(nfs_folio_event_done, __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); __entry->version = inode_peek_iversion_raw(inode); - __entry->offset = offset, - __entry->count = count, + __entry->offset = offset; + __entry->count = count; __entry->ret = ret; ), @@ -1051,6 +1068,73 @@ DEFINE_NFS_FOLIO_EVENT_DONE(nfs_writeback_folio_done); DEFINE_NFS_FOLIO_EVENT(nfs_invalidate_folio); DEFINE_NFS_FOLIO_EVENT_DONE(nfs_launder_folio_done); +DEFINE_NFS_FOLIO_EVENT(nfs_try_to_update_request); +DEFINE_NFS_FOLIO_EVENT_DONE(nfs_try_to_update_request_done); + +DEFINE_NFS_FOLIO_EVENT(nfs_update_folio); +DEFINE_NFS_FOLIO_EVENT_DONE(nfs_update_folio_done); + +DEFINE_NFS_FOLIO_EVENT(nfs_write_begin); +DEFINE_NFS_FOLIO_EVENT_DONE(nfs_write_begin_done); + +DEFINE_NFS_FOLIO_EVENT(nfs_write_end); +DEFINE_NFS_FOLIO_EVENT_DONE(nfs_write_end_done); + +DEFINE_NFS_FOLIO_EVENT(nfs_writepages); +DEFINE_NFS_FOLIO_EVENT_DONE(nfs_writepages_done); + +DECLARE_EVENT_CLASS(nfs_kiocb_event, + TP_PROTO( + const struct kiocb *iocb, + const struct iov_iter *iter + ), + + TP_ARGS(iocb, iter), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + __field(u64, version) + __field(loff_t, offset) + __field(size_t, count) + __field(int, flags) + ), + + TP_fast_assign( + const struct inode *inode = file_inode(iocb->ki_filp); + const struct nfs_inode *nfsi = NFS_I(inode); + + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = nfsi->fileid; + __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); + __entry->version = inode_peek_iversion_raw(inode); + __entry->offset = iocb->ki_pos; + __entry->count = iov_iter_count(iter); + __entry->flags = iocb->ki_flags; + ), + + TP_printk( + "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu offset=%lld count=%zu ki_flags=%s", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, __entry->version, + __entry->offset, __entry->count, + __print_flags(__entry->flags, "|", TRACE_IOCB_STRINGS) + ) +); + +#define DEFINE_NFS_KIOCB_EVENT(name) \ + DEFINE_EVENT(nfs_kiocb_event, name, \ + TP_PROTO( \ + const struct kiocb *iocb, \ + const struct iov_iter *iter \ + ), \ + TP_ARGS(iocb, iter)) + +DEFINE_NFS_KIOCB_EVENT(nfs_file_read); +DEFINE_NFS_KIOCB_EVENT(nfs_file_write); + TRACE_EVENT(nfs_aop_readahead, TP_PROTO( const struct inode *inode, @@ -1398,6 +1482,55 @@ TRACE_EVENT(nfs_writeback_done, ) ); +DECLARE_EVENT_CLASS(nfs_page_class, + TP_PROTO( + const struct nfs_page *req + ), + + TP_ARGS(req), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + __field(const struct nfs_page *__private, req) + __field(loff_t, offset) + __field(unsigned int, count) + __field(unsigned long, flags) + ), + + TP_fast_assign( + const struct inode *inode = folio_inode(req->wb_folio); + const struct nfs_inode *nfsi = NFS_I(inode); + + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = nfsi->fileid; + __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); + __entry->req = req; + __entry->offset = req_offset(req); + __entry->count = req->wb_bytes; + __entry->flags = req->wb_flags; + ), + + TP_printk( + "fileid=%02x:%02x:%llu fhandle=0x%08x req=%p offset=%lld count=%u flags=%s", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, __entry->fhandle, + __entry->req, __entry->offset, __entry->count, + nfs_show_wb_flags(__entry->flags) + ) +); + +#define DEFINE_NFS_PAGE_EVENT(name) \ + DEFINE_EVENT(nfs_page_class, name, \ + TP_PROTO( \ + const struct nfs_page *req \ + ), \ + TP_ARGS(req)) + +DEFINE_NFS_PAGE_EVENT(nfs_writepage_setup); +DEFINE_NFS_PAGE_EVENT(nfs_do_writepage); + DECLARE_EVENT_CLASS(nfs_page_error_class, TP_PROTO( const struct inode *inode, @@ -1599,6 +1732,76 @@ DEFINE_NFS_DIRECT_REQ_EVENT(nfs_direct_write_completion); DEFINE_NFS_DIRECT_REQ_EVENT(nfs_direct_write_schedule_iovec); DEFINE_NFS_DIRECT_REQ_EVENT(nfs_direct_write_reschedule_io); +#if IS_ENABLED(CONFIG_NFS_LOCALIO) + +DECLARE_EVENT_CLASS(nfs_local_dio_class, + TP_PROTO( + const struct inode *inode, + loff_t offset, + ssize_t count, + const struct nfs_local_dio *local_dio + ), + TP_ARGS(inode, offset, count, local_dio), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u64, fileid) + __field(u32, fhandle) + __field(loff_t, offset) + __field(ssize_t, count) + __field(u32, mem_align) + __field(u32, offset_align) + __field(loff_t, start) + __field(ssize_t, start_len) + __field(loff_t, middle) + __field(ssize_t, middle_len) + __field(loff_t, end) + __field(ssize_t, end_len) + ), + TP_fast_assign( + const struct nfs_inode *nfsi = NFS_I(inode); + const struct nfs_fh *fh = &nfsi->fh; + + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = nfsi->fileid; + __entry->fhandle = nfs_fhandle_hash(fh); + __entry->offset = offset; + __entry->count = count; + __entry->mem_align = local_dio->mem_align; + __entry->offset_align = local_dio->offset_align; + __entry->start = offset; + __entry->start_len = local_dio->start_len; + __entry->middle = local_dio->middle_offset; + __entry->middle_len = local_dio->middle_len; + __entry->end = local_dio->end_offset; + __entry->end_len = local_dio->end_len; + ), + TP_printk("fileid=%02x:%02x:%llu fhandle=0x%08x " + "offset=%lld count=%zd " + "mem_align=%u offset_align=%u " + "start=%llu+%zd middle=%llu+%zd end=%llu+%zd", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, __entry->offset, __entry->count, + __entry->mem_align, __entry->offset_align, + __entry->start, __entry->start_len, + __entry->middle, __entry->middle_len, + __entry->end, __entry->end_len) +) + +#define DEFINE_NFS_LOCAL_DIO_EVENT(name) \ +DEFINE_EVENT(nfs_local_dio_class, nfs_local_dio_##name, \ + TP_PROTO(const struct inode *inode, \ + loff_t offset, \ + ssize_t count, \ + const struct nfs_local_dio *local_dio),\ + TP_ARGS(inode, offset, count, local_dio)) + +DEFINE_NFS_LOCAL_DIO_EVENT(read); +DEFINE_NFS_LOCAL_DIO_EVENT(write); +DEFINE_NFS_LOCAL_DIO_EVENT(misaligned); + +#endif /* CONFIG_NFS_LOCALIO */ + TRACE_EVENT(nfs_fh_to_dentry, TP_PROTO( const struct super_block *sb, @@ -1713,10 +1916,10 @@ TRACE_EVENT(nfs_local_open_fh, ), TP_printk( - "error=%d fhandle=0x%08x mode=%s", - __entry->error, + "fhandle=0x%08x mode=%s result=%d", __entry->fhandle, - show_fs_fmode_flags(__entry->fmode) + show_fs_fmode_flags(__entry->fmode), + __entry->error ) ); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 647c53d1418a..0fb6905736d5 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -296,7 +296,7 @@ static void nfs_folio_end_writeback(struct folio *folio) { struct nfs_server *nfss = NFS_SERVER(folio->mapping->host); - folio_end_writeback(folio); + folio_end_writeback_no_dropbehind(folio); if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) { nfss->write_congested = 0; @@ -593,6 +593,7 @@ static int nfs_do_writepage(struct folio *folio, struct writeback_control *wbc, if (IS_ERR(req)) return PTR_ERR(req); + trace_nfs_do_writepage(req); nfs_folio_set_writeback(folio); WARN_ON_ONCE(test_bit(PG_CLEAN, &req->wb_flags)); @@ -656,12 +657,14 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) int priority = 0; int err; + trace_nfs_writepages(inode, wbc->range_start, wbc->range_end - wbc->range_start); + /* Wait with writeback until write congestion eases */ if (wbc->sync_mode == WB_SYNC_NONE && nfss->write_congested) { err = wait_event_killable(nfss->write_congestion_wait, nfss->write_congested == 0); if (err) - return err; + goto out_err; } nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); @@ -692,10 +695,10 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) } while (err < 0 && !nfs_error_is_fatal(err)); nfs_io_completion_put(ioc); - if (err < 0) - goto out_err; - return 0; + if (err > 0) + err = 0; out_err: + trace_nfs_writepages_done(inode, wbc->range_start, wbc->range_end - wbc->range_start, err); return err; } @@ -745,6 +748,8 @@ static void nfs_inode_remove_request(struct nfs_page *req) clear_bit(PG_MAPPED, &req->wb_head->wb_flags); } spin_unlock(&mapping->i_private_lock); + + folio_end_dropbehind(folio); } nfs_page_group_unlock(req); @@ -926,7 +931,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) req->wb_nio = 0; memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf)); nfs_mark_request_commit(req, hdr->lseg, &cinfo, - hdr->pgio_mirror_idx); + hdr->ds_commit_idx); goto next; } remove_req: @@ -1017,11 +1022,12 @@ static struct nfs_page *nfs_try_to_update_request(struct folio *folio, unsigned int end; int error; + trace_nfs_try_to_update_request(folio_inode(folio), offset, bytes); end = offset + bytes; req = nfs_lock_and_join_requests(folio); if (IS_ERR_OR_NULL(req)) - return req; + goto out; rqend = req->wb_offset + req->wb_bytes; /* @@ -1043,6 +1049,9 @@ static struct nfs_page *nfs_try_to_update_request(struct folio *folio, else req->wb_bytes = rqend - req->wb_offset; req->wb_nio = 0; +out: + trace_nfs_try_to_update_request_done(folio_inode(folio), offset, bytes, + PTR_ERR_OR_ZERO(req)); return req; out_flushme: /* @@ -1053,6 +1062,7 @@ out_flushme: nfs_mark_request_dirty(req); nfs_unlock_and_release_request(req); error = nfs_wb_folio(folio->mapping->host, folio); + trace_nfs_try_to_update_request_done(folio_inode(folio), offset, bytes, error); return (error < 0) ? ERR_PTR(error) : NULL; } @@ -1090,6 +1100,7 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, req = nfs_setup_write_request(ctx, folio, offset, count); if (IS_ERR(req)) return PTR_ERR(req); + trace_nfs_writepage_setup(req); /* Update file length */ nfs_grow_file(folio, offset, count); nfs_mark_uptodate(req); @@ -1290,6 +1301,8 @@ int nfs_update_folio(struct file *file, struct folio *folio, nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE); + trace_nfs_update_folio(inode, offset, count); + dprintk("NFS: nfs_update_folio(%pD2 %d@%lld)\n", file, count, (long long)(folio_pos(folio) + offset)); @@ -1309,6 +1322,7 @@ int nfs_update_folio(struct file *file, struct folio *folio, if (status < 0) nfs_set_pageerror(mapping); out: + trace_nfs_update_folio_done(inode, offset, count, status); dprintk("NFS: nfs_update_folio returns %d (isize %lld)\n", status, (long long)i_size_read(inode)); return status; @@ -1806,7 +1820,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) nfs_mapping_set_error(folio, status); nfs_inode_remove_request(req); } - dprintk_cont(", error = %d\n", status); + dprintk(", error = %d\n", status); goto next; } @@ -1816,11 +1830,11 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) /* We have a match */ if (folio) nfs_inode_remove_request(req); - dprintk_cont(" OK\n"); + dprintk(" OK\n"); goto next; } /* We have a mismatch. Write the page again */ - dprintk_cont(" mismatch\n"); + dprintk(" mismatch\n"); nfs_mark_request_dirty(req); atomic_long_inc(&NFS_I(data->inode)->redirtied_pages); next: diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 85ca663c052c..e010d90aeb27 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -231,6 +231,9 @@ nfsd_file_alloc(struct net *net, struct inode *inode, unsigned char need, refcount_set(&nf->nf_ref, 1); nf->nf_may = need; nf->nf_mark = NULL; + nf->nf_dio_mem_align = 0; + nf->nf_dio_offset_align = 0; + nf->nf_dio_read_offset_align = 0; return nf; } @@ -1070,6 +1073,35 @@ nfsd_file_is_cached(struct inode *inode) } static __be32 +nfsd_file_get_dio_attrs(const struct svc_fh *fhp, struct nfsd_file *nf) +{ + struct inode *inode = file_inode(nf->nf_file); + struct kstat stat; + __be32 status; + + /* Currently only need to get DIO alignment info for regular files */ + if (!S_ISREG(inode->i_mode)) + return nfs_ok; + + status = fh_getattr(fhp, &stat); + if (status != nfs_ok) + return status; + + trace_nfsd_file_get_dio_attrs(inode, &stat); + + if (stat.result_mask & STATX_DIOALIGN) { + nf->nf_dio_mem_align = stat.dio_mem_align; + nf->nf_dio_offset_align = stat.dio_offset_align; + } + if (stat.result_mask & STATX_DIO_READ_ALIGN) + nf->nf_dio_read_offset_align = stat.dio_read_offset_align; + else + nf->nf_dio_read_offset_align = nf->nf_dio_offset_align; + + return nfs_ok; +} + +static __be32 nfsd_file_do_acquire(struct svc_rqst *rqstp, struct net *net, struct svc_cred *cred, struct auth_domain *client, @@ -1187,6 +1219,8 @@ open_file: } status = nfserrno(ret); trace_nfsd_file_open(nf, status); + if (status == nfs_ok) + status = nfsd_file_get_dio_attrs(fhp, nf); } } else status = nfserr_jukebox; diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index 722b26c71e45..237a05c74211 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -54,6 +54,10 @@ struct nfsd_file { struct list_head nf_gc; struct rcu_head nf_rcu; ktime_t nf_birthtime; + + u32 nf_dio_mem_align; + u32 nf_dio_offset_align; + u32 nf_dio_read_offset_align; }; int nfsd_file_cache_init(void); diff --git a/fs/nfsd/localio.c b/fs/nfsd/localio.c index cb237f1b902a..9e0a37cd29d8 100644 --- a/fs/nfsd/localio.c +++ b/fs/nfsd/localio.c @@ -117,6 +117,16 @@ nfsd_open_local_fh(struct net *net, struct auth_domain *dom, return localio; } +static void nfsd_file_dio_alignment(struct nfsd_file *nf, + u32 *nf_dio_mem_align, + u32 *nf_dio_offset_align, + u32 *nf_dio_read_offset_align) +{ + *nf_dio_mem_align = nf->nf_dio_mem_align; + *nf_dio_offset_align = nf->nf_dio_offset_align; + *nf_dio_read_offset_align = nf->nf_dio_read_offset_align; +} + static const struct nfsd_localio_operations nfsd_localio_ops = { .nfsd_net_try_get = nfsd_net_try_get, .nfsd_net_put = nfsd_net_put, @@ -124,6 +134,7 @@ static const struct nfsd_localio_operations nfsd_localio_ops = { .nfsd_file_put_local = nfsd_file_put_local, .nfsd_file_get_local = nfsd_file_get_local, .nfsd_file_file = nfsd_file_file, + .nfsd_file_dio_alignment = nfsd_file_dio_alignment, }; void nfsd_localio_ops_init(void) diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index c19a74a36c45..2b79129703d5 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1954,7 +1954,7 @@ int nfsd_nl_listener_set_doit(struct sk_buff *skb, struct genl_info *info) * remaining listeners and recreate the list. */ if (delete) - svc_xprt_destroy_all(serv, net); + svc_xprt_destroy_all(serv, net, false); /* walk list of addrs again, open any that still don't exist */ nlmsg_for_each_attr_type(attr, NFSD_A_SERVER_SOCK_ADDR, info->nlhdr, diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 82b0111ac469..7057ddd7a0a8 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -535,16 +535,13 @@ void nfsd_destroy_serv(struct net *net) #endif } - svc_xprt_destroy_all(serv, net); - /* * write_ports can create the server without actually starting - * any threads--if we get shut down before any threads are + * any threads. If we get shut down before any threads are * started, then nfsd_destroy_serv will be run before any of this * other initialization has been done except the rpcb information. */ - svc_rpcb_cleanup(serv, net); - + svc_xprt_destroy_all(serv, net, true); nfsd_shutdown_net(net); svc_destroy(&serv); } diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index a664fdf1161e..6e2c8e2aab10 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -1133,6 +1133,33 @@ TRACE_EVENT(nfsd_file_alloc, ) ); +TRACE_EVENT(nfsd_file_get_dio_attrs, + TP_PROTO( + const struct inode *inode, + const struct kstat *stat + ), + TP_ARGS(inode, stat), + TP_STRUCT__entry( + __field(const void *, inode) + __field(unsigned long, mask) + __field(u32, mem_align) + __field(u32, offset_align) + __field(u32, read_offset_align) + ), + TP_fast_assign( + __entry->inode = inode; + __entry->mask = stat->result_mask; + __entry->mem_align = stat->dio_mem_align; + __entry->offset_align = stat->dio_offset_align; + __entry->read_offset_align = stat->dio_read_offset_align; + ), + TP_printk("inode=%p flags=%s mem_align=%u offset_align=%u read_offset_align=%u", + __entry->inode, show_statx_mask(__entry->mask), + __entry->mem_align, __entry->offset_align, + __entry->read_offset_align + ) +); + TRACE_EVENT(nfsd_file_acquire, TP_PROTO( const struct svc_rqst *rqstp, diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index eff04959606f..fde3e0c11dba 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -185,6 +185,10 @@ static inline __be32 fh_getattr(const struct svc_fh *fh, struct kstat *stat) u32 request_mask = STATX_BASIC_STATS; struct path p = {.mnt = fh->fh_export->ex_path.mnt, .dentry = fh->fh_dentry}; + struct inode *inode = d_inode(p.dentry); + + if (S_ISREG(inode->i_mode)) + request_mask |= (STATX_DIOALIGN | STATX_DIO_READ_ALIGN); if (fh->fh_maxsize == NFS4_FHSIZE) request_mask |= (STATX_BTIME | STATX_CHANGE_COOKIE); |