summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/lockd/clntproc.c14
-rw-r--r--fs/lockd/host.c22
-rw-r--r--fs/lockd/mon.c14
-rw-r--r--fs/lockd/svcproc.c2
-rw-r--r--fs/nfs/blocklayout/blocklayout.c94
-rw-r--r--fs/nfs/blocklayout/blocklayout.h7
-rw-r--r--fs/nfs/blocklayout/dev.c7
-rw-r--r--fs/nfs/direct.c4
-rw-r--r--fs/nfs/export.c5
-rw-r--r--fs/nfs/filelayout/filelayout.c4
-rw-r--r--fs/nfs/inode.c53
-rw-r--r--fs/nfs/io.c2
-rw-r--r--fs/nfs/nfs4client.c24
-rw-r--r--fs/nfs/nfs4idmap.c6
-rw-r--r--fs/nfs/nfs4namespace.c2
-rw-r--r--fs/nfs/nfs4proc.c42
-rw-r--r--fs/nfs/nfs4state.c5
-rw-r--r--fs/nfs/nfs4sysctl.c2
-rw-r--r--fs/nfs/nfs4xdr.c64
-rw-r--r--fs/nfs/nfstrace.h22
-rw-r--r--fs/nfs/pagelist.c8
-rw-r--r--fs/nfs/pnfs.c6
-rw-r--r--fs/nfs/pnfs.h6
-rw-r--r--fs/nfs/pnfs_dev.c1
-rw-r--r--fs/nfs/write.c2
-rw-r--r--include/linux/lockd/lockd.h9
-rw-r--r--include/linux/nfs4.h12
-rw-r--r--include/linux/sunrpc/clnt.h1
-rw-r--r--include/linux/sunrpc/xprtrdma.h2
-rw-r--r--include/trace/events/rdma.h129
-rw-r--r--include/trace/events/rpcrdma.h890
-rw-r--r--include/trace/events/sunrpc.h12
-rw-r--r--include/uapi/linux/nfs.h1
-rw-r--r--net/sunrpc/clnt.c16
-rw-r--r--net/sunrpc/sched.c26
-rw-r--r--net/sunrpc/xprt.c2
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c78
-rw-r--r--net/sunrpc/xprtrdma/fmr_ops.c157
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c329
-rw-r--r--net/sunrpc/xprtrdma/module.c12
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c162
-rw-r--r--net/sunrpc/xprtrdma/transport.c128
-rw-r--r--net/sunrpc/xprtrdma/verbs.c280
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h116
-rw-r--r--net/sunrpc/xprtsock.c36
45 files changed, 1995 insertions, 821 deletions
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 066ac313ae5c..a2c0dfc6fdc0 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -48,13 +48,13 @@ void nlmclnt_next_cookie(struct nlm_cookie *c)
static struct nlm_lockowner *nlm_get_lockowner(struct nlm_lockowner *lockowner)
{
- atomic_inc(&lockowner->count);
+ refcount_inc(&lockowner->count);
return lockowner;
}
static void nlm_put_lockowner(struct nlm_lockowner *lockowner)
{
- if (!atomic_dec_and_lock(&lockowner->count, &lockowner->host->h_lock))
+ if (!refcount_dec_and_lock(&lockowner->count, &lockowner->host->h_lock))
return;
list_del(&lockowner->list);
spin_unlock(&lockowner->host->h_lock);
@@ -105,7 +105,7 @@ static struct nlm_lockowner *nlm_find_lockowner(struct nlm_host *host, fl_owner_
res = __nlm_find_lockowner(host, owner);
if (res == NULL && new != NULL) {
res = new;
- atomic_set(&new->count, 1);
+ refcount_set(&new->count, 1);
new->owner = owner;
new->pid = __nlm_alloc_pid(host);
new->host = nlm_get_host(host);
@@ -204,7 +204,7 @@ struct nlm_rqst *nlm_alloc_call(struct nlm_host *host)
for(;;) {
call = kzalloc(sizeof(*call), GFP_KERNEL);
if (call != NULL) {
- atomic_set(&call->a_count, 1);
+ refcount_set(&call->a_count, 1);
locks_init_lock(&call->a_args.lock.fl);
locks_init_lock(&call->a_res.lock.fl);
call->a_host = nlm_get_host(host);
@@ -222,7 +222,7 @@ void nlmclnt_release_call(struct nlm_rqst *call)
{
const struct nlmclnt_operations *nlmclnt_ops = call->a_host->h_nlmclnt_ops;
- if (!atomic_dec_and_test(&call->a_count))
+ if (!refcount_dec_and_test(&call->a_count))
return;
if (nlmclnt_ops && nlmclnt_ops->nlmclnt_release_call)
nlmclnt_ops->nlmclnt_release_call(call->a_callback_data);
@@ -678,7 +678,7 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
goto out;
}
- atomic_inc(&req->a_count);
+ refcount_inc(&req->a_count);
status = nlmclnt_async_call(nfs_file_cred(fl->fl_file), req,
NLMPROC_UNLOCK, &nlmclnt_unlock_ops);
if (status < 0)
@@ -769,7 +769,7 @@ static int nlmclnt_cancel(struct nlm_host *host, int block, struct file_lock *fl
nlmclnt_setlockargs(req, fl);
req->a_args.block = block;
- atomic_inc(&req->a_count);
+ refcount_inc(&req->a_count);
status = nlmclnt_async_call(nfs_file_cred(fl->fl_file), req,
NLMPROC_CANCEL, &nlmclnt_cancel_ops);
if (status == 0 && req->a_res.status == nlm_lck_denied)
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 826a89184f90..d35cd6be0675 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -114,7 +114,7 @@ static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni,
unsigned long now = jiffies;
if (nsm != NULL)
- atomic_inc(&nsm->sm_count);
+ refcount_inc(&nsm->sm_count);
else {
host = NULL;
nsm = nsm_get_handle(ni->net, ni->sap, ni->salen,
@@ -151,7 +151,7 @@ static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni,
host->h_state = 0;
host->h_nsmstate = 0;
host->h_pidcount = 0;
- atomic_set(&host->h_count, 1);
+ refcount_set(&host->h_count, 1);
mutex_init(&host->h_mutex);
host->h_nextrebind = now + NLM_HOST_REBIND;
host->h_expires = now + NLM_HOST_EXPIRE;
@@ -290,7 +290,7 @@ void nlmclnt_release_host(struct nlm_host *host)
WARN_ON_ONCE(host->h_server);
- if (atomic_dec_and_test(&host->h_count)) {
+ if (refcount_dec_and_test(&host->h_count)) {
WARN_ON_ONCE(!list_empty(&host->h_lockowners));
WARN_ON_ONCE(!list_empty(&host->h_granted));
WARN_ON_ONCE(!list_empty(&host->h_reclaim));
@@ -388,6 +388,8 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
ln->nrhosts++;
nrhosts++;
+ refcount_inc(&host->h_count);
+
dprintk("lockd: %s created host %s (%s)\n",
__func__, host->h_name, host->h_addrbuf);
@@ -410,7 +412,7 @@ void nlmsvc_release_host(struct nlm_host *host)
dprintk("lockd: release server host %s\n", host->h_name);
WARN_ON_ONCE(!host->h_server);
- atomic_dec(&host->h_count);
+ refcount_dec(&host->h_count);
}
/*
@@ -504,7 +506,7 @@ struct nlm_host * nlm_get_host(struct nlm_host *host)
{
if (host) {
dprintk("lockd: get host %s\n", host->h_name);
- atomic_inc(&host->h_count);
+ refcount_inc(&host->h_count);
host->h_expires = jiffies + NLM_HOST_EXPIRE;
}
return host;
@@ -593,7 +595,7 @@ static void nlm_complain_hosts(struct net *net)
if (net && host->net != net)
continue;
dprintk(" %s (cnt %d use %d exp %ld net %x)\n",
- host->h_name, atomic_read(&host->h_count),
+ host->h_name, refcount_read(&host->h_count),
host->h_inuse, host->h_expires, host->net->ns.inum);
}
}
@@ -662,16 +664,16 @@ nlm_gc_hosts(struct net *net)
for_each_host_safe(host, next, chain, nlm_server_hosts) {
if (net && host->net != net)
continue;
- if (atomic_read(&host->h_count) || host->h_inuse
- || time_before(jiffies, host->h_expires)) {
+ if (host->h_inuse || time_before(jiffies, host->h_expires)) {
dprintk("nlm_gc_hosts skipping %s "
"(cnt %d use %d exp %ld net %x)\n",
- host->h_name, atomic_read(&host->h_count),
+ host->h_name, refcount_read(&host->h_count),
host->h_inuse, host->h_expires,
host->net->ns.inum);
continue;
}
- nlm_destroy_host_locked(host);
+ if (refcount_dec_if_one(&host->h_count))
+ nlm_destroy_host_locked(host);
}
if (net) {
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 96cfb2967ac7..654594ef4f94 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -191,7 +191,7 @@ void nsm_unmonitor(const struct nlm_host *host)
struct nsm_res res;
int status;
- if (atomic_read(&nsm->sm_count) == 1
+ if (refcount_read(&nsm->sm_count) == 1
&& nsm->sm_monitored && !nsm->sm_sticky) {
dprintk("lockd: nsm_unmonitor(%s)\n", nsm->sm_name);
@@ -279,7 +279,7 @@ static struct nsm_handle *nsm_create_handle(const struct sockaddr *sap,
if (unlikely(new == NULL))
return NULL;
- atomic_set(&new->sm_count, 1);
+ refcount_set(&new->sm_count, 1);
new->sm_name = (char *)(new + 1);
memcpy(nsm_addr(new), sap, salen);
new->sm_addrlen = salen;
@@ -337,13 +337,13 @@ retry:
cached = nsm_lookup_addr(&ln->nsm_handles, sap);
if (cached != NULL) {
- atomic_inc(&cached->sm_count);
+ refcount_inc(&cached->sm_count);
spin_unlock(&nsm_lock);
kfree(new);
dprintk("lockd: found nsm_handle for %s (%s), "
"cnt %d\n", cached->sm_name,
cached->sm_addrbuf,
- atomic_read(&cached->sm_count));
+ refcount_read(&cached->sm_count));
return cached;
}
@@ -388,12 +388,12 @@ struct nsm_handle *nsm_reboot_lookup(const struct net *net,
return cached;
}
- atomic_inc(&cached->sm_count);
+ refcount_inc(&cached->sm_count);
spin_unlock(&nsm_lock);
dprintk("lockd: host %s (%s) rebooted, cnt %d\n",
cached->sm_name, cached->sm_addrbuf,
- atomic_read(&cached->sm_count));
+ refcount_read(&cached->sm_count));
return cached;
}
@@ -404,7 +404,7 @@ struct nsm_handle *nsm_reboot_lookup(const struct net *net,
*/
void nsm_release(struct nsm_handle *nsm)
{
- if (atomic_dec_and_lock(&nsm->sm_count, &nsm_lock)) {
+ if (refcount_dec_and_lock(&nsm->sm_count, &nsm_lock)) {
list_del(&nsm->sm_link);
spin_unlock(&nsm_lock);
dprintk("lockd: destroyed nsm_handle for %s (%s)\n",
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 0d670c5c378f..ea77c66d3cc3 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -295,7 +295,7 @@ static void nlmsvc_callback_exit(struct rpc_task *task, void *data)
void nlmsvc_release_call(struct nlm_rqst *call)
{
- if (!atomic_dec_and_test(&call->a_count))
+ if (!refcount_dec_and_test(&call->a_count))
return;
nlmsvc_release_host(call->a_host);
kfree(call);
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 995d707537da..7cb5c38c19e4 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -137,6 +137,11 @@ bl_alloc_init_bio(int npg, struct block_device *bdev, sector_t disk_sector,
return bio;
}
+static bool offset_in_map(u64 offset, struct pnfs_block_dev_map *map)
+{
+ return offset >= map->start && offset < map->start + map->len;
+}
+
static struct bio *
do_add_page_to_bio(struct bio *bio, int npg, int rw, sector_t isect,
struct page *page, struct pnfs_block_dev_map *map,
@@ -156,8 +161,8 @@ do_add_page_to_bio(struct bio *bio, int npg, int rw, sector_t isect,
/* translate to physical disk offset */
disk_addr = (u64)isect << SECTOR_SHIFT;
- if (disk_addr < map->start || disk_addr >= map->start + map->len) {
- if (!dev->map(dev, disk_addr, map))
+ if (!offset_in_map(disk_addr, map)) {
+ if (!dev->map(dev, disk_addr, map) || !offset_in_map(disk_addr, map))
return ERR_PTR(-EIO);
bio = bl_submit_bio(bio);
}
@@ -184,6 +189,29 @@ retry:
return bio;
}
+static void bl_mark_devices_unavailable(struct nfs_pgio_header *header, bool rw)
+{
+ struct pnfs_block_layout *bl = BLK_LSEG2EXT(header->lseg);
+ size_t bytes_left = header->args.count;
+ sector_t isect, extent_length = 0;
+ struct pnfs_block_extent be;
+
+ isect = header->args.offset >> SECTOR_SHIFT;
+ bytes_left += header->args.offset - (isect << SECTOR_SHIFT);
+
+ while (bytes_left > 0) {
+ if (!ext_tree_lookup(bl, isect, &be, rw))
+ return;
+ extent_length = be.be_length - (isect - be.be_f_offset);
+ nfs4_mark_deviceid_unavailable(be.be_device);
+ isect += extent_length;
+ if (bytes_left > extent_length << SECTOR_SHIFT)
+ bytes_left -= extent_length << SECTOR_SHIFT;
+ else
+ bytes_left = 0;
+ }
+}
+
static void bl_end_io_read(struct bio *bio)
{
struct parallel_io *par = bio->bi_private;
@@ -194,6 +222,7 @@ static void bl_end_io_read(struct bio *bio)
if (!header->pnfs_error)
header->pnfs_error = -EIO;
pnfs_set_lo_fail(header->lseg);
+ bl_mark_devices_unavailable(header, false);
}
bio_put(bio);
@@ -323,6 +352,7 @@ static void bl_end_io_write(struct bio *bio)
if (!header->pnfs_error)
header->pnfs_error = -EIO;
pnfs_set_lo_fail(header->lseg);
+ bl_mark_devices_unavailable(header, true);
}
bio_put(bio);
put_parallel(par);
@@ -552,6 +582,31 @@ static int decode_sector_number(__be32 **rp, sector_t *sp)
return 0;
}
+static struct nfs4_deviceid_node *
+bl_find_get_deviceid(struct nfs_server *server,
+ const struct nfs4_deviceid *id, struct rpc_cred *cred,
+ gfp_t gfp_mask)
+{
+ struct nfs4_deviceid_node *node;
+ unsigned long start, end;
+
+retry:
+ node = nfs4_find_get_deviceid(server, id, cred, gfp_mask);
+ if (!node)
+ return ERR_PTR(-ENODEV);
+
+ if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags) == 0)
+ return node;
+
+ end = jiffies;
+ start = end - PNFS_DEVICE_RETRY_TIMEOUT;
+ if (!time_in_range(node->timestamp_unavailable, start, end)) {
+ nfs4_delete_deviceid(node->ld, node->nfs_client, id);
+ goto retry;
+ }
+ return ERR_PTR(-ENODEV);
+}
+
static int
bl_alloc_extent(struct xdr_stream *xdr, struct pnfs_layout_hdr *lo,
struct layout_verification *lv, struct list_head *extents,
@@ -573,16 +628,18 @@ bl_alloc_extent(struct xdr_stream *xdr, struct pnfs_layout_hdr *lo,
memcpy(&id, p, NFS4_DEVICEID4_SIZE);
p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE);
- error = -EIO;
- be->be_device = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode), &id,
+ be->be_device = bl_find_get_deviceid(NFS_SERVER(lo->plh_inode), &id,
lo->plh_lc_cred, gfp_mask);
- if (!be->be_device)
+ if (IS_ERR(be->be_device)) {
+ error = PTR_ERR(be->be_device);
goto out_free_be;
+ }
/*
* The next three values are read in as bytes, but stored in the
* extent structure in 512-byte granularity.
*/
+ error = -EIO;
if (decode_sector_number(&p, &be->be_f_offset) < 0)
goto out_put_deviceid;
if (decode_sector_number(&p, &be->be_length) < 0)
@@ -692,11 +749,16 @@ out_free_scratch:
__free_page(scratch);
out:
dprintk("%s returns %d\n", __func__, status);
- if (status) {
+ switch (status) {
+ case -ENODEV:
+ /* Our extent block devices are unavailable */
+ set_bit(NFS_LSEG_UNAVAILABLE, &lseg->pls_flags);
+ case 0:
+ return lseg;
+ default:
kfree(lseg);
return ERR_PTR(status);
}
- return lseg;
}
static void
@@ -798,6 +860,13 @@ bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
}
pnfs_generic_pg_init_read(pgio, req);
+
+ if (pgio->pg_lseg &&
+ test_bit(NFS_LSEG_UNAVAILABLE, &pgio->pg_lseg->pls_flags)) {
+ pnfs_error_mark_layout_for_return(pgio->pg_inode, pgio->pg_lseg);
+ pnfs_set_lo_fail(pgio->pg_lseg);
+ nfs_pageio_reset_read_mds(pgio);
+ }
}
/*
@@ -853,6 +922,14 @@ bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
wb_size = nfs_dreq_bytes_left(pgio->pg_dreq);
pnfs_generic_pg_init_write(pgio, req, wb_size);
+
+ if (pgio->pg_lseg &&
+ test_bit(NFS_LSEG_UNAVAILABLE, &pgio->pg_lseg->pls_flags)) {
+
+ pnfs_error_mark_layout_for_return(pgio->pg_inode, pgio->pg_lseg);
+ pnfs_set_lo_fail(pgio->pg_lseg);
+ nfs_pageio_reset_write_mds(pgio);
+ }
}
/*
@@ -887,6 +964,7 @@ static struct pnfs_layoutdriver_type blocklayout_type = {
.name = "LAYOUT_BLOCK_VOLUME",
.owner = THIS_MODULE,
.flags = PNFS_LAYOUTRET_ON_SETATTR |
+ PNFS_LAYOUTRET_ON_ERROR |
PNFS_READ_WHOLE_PAGE,
.read_pagelist = bl_read_pagelist,
.write_pagelist = bl_write_pagelist,
@@ -910,6 +988,7 @@ static struct pnfs_layoutdriver_type scsilayout_type = {
.name = "LAYOUT_SCSI",
.owner = THIS_MODULE,
.flags = PNFS_LAYOUTRET_ON_SETATTR |
+ PNFS_LAYOUTRET_ON_ERROR |
PNFS_READ_WHOLE_PAGE,
.read_pagelist = bl_read_pagelist,
.write_pagelist = bl_write_pagelist,
@@ -967,6 +1046,7 @@ static void __exit nfs4blocklayout_exit(void)
}
MODULE_ALIAS("nfs-layouttype4-3");
+MODULE_ALIAS("nfs-layouttype4-5");
module_init(nfs4blocklayout_init);
module_exit(nfs4blocklayout_exit);
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index efc007f00742..716bc75e9ed2 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -92,10 +92,9 @@ struct pnfs_block_volume {
};
struct pnfs_block_dev_map {
- sector_t start;
- sector_t len;
-
- sector_t disk_offset;
+ u64 start;
+ u64 len;
+ u64 disk_offset;
struct block_device *bdev;
};
diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c
index 95f74bd2c067..a7efd83779d2 100644
--- a/fs/nfs/blocklayout/dev.c
+++ b/fs/nfs/blocklayout/dev.c
@@ -533,14 +533,11 @@ bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
goto out_free_volumes;
ret = bl_parse_deviceid(server, top, volumes, nr_volumes - 1, gfp_mask);
- if (ret) {
- bl_free_device(top);
- kfree(top);
- goto out_free_volumes;
- }
node = &top->node;
nfs4_init_deviceid_node(node, server, &pdev->dev_id);
+ if (ret)
+ nfs4_mark_deviceid_unavailable(node);
out_free_volumes:
kfree(volumes);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index d2972d537469..8c10b0562e75 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -775,10 +775,8 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
spin_lock(&dreq->lock);
- if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
- dreq->flags = 0;
+ if (test_bit(NFS_IOHDR_ERROR, &hdr->flags))
dreq->error = hdr->error;
- }
if (dreq->error == 0) {
nfs_direct_good_bytes(dreq, hdr);
if (nfs_write_need_commit(hdr)) {
diff --git a/fs/nfs/export.c b/fs/nfs/export.c
index 83fd09fc8f77..ab5de3246c5c 100644
--- a/fs/nfs/export.c
+++ b/fs/nfs/export.c
@@ -48,10 +48,6 @@ nfs_encode_fh(struct inode *inode, __u32 *p, int *max_len, struct inode *parent)
*max_len = len;
return FILEID_INVALID;
}
- if (IS_AUTOMOUNT(inode)) {
- *max_len = FILEID_INVALID;
- goto out;
- }
p[FILEID_HIGH_OFF] = NFS_FILEID(inode) >> 32;
p[FILEID_LOW_OFF] = NFS_FILEID(inode);
@@ -59,7 +55,6 @@ nfs_encode_fh(struct inode *inode, __u32 *p, int *max_len, struct inode *parent)
p[len - 1] = 0; /* Padding */
nfs_copy_fh(clnt_fh, server_fh);
*max_len = len;
-out:
dprintk("%s: result fh fileid %llu mode %u size %d\n",
__func__, NFS_FILEID(inode), inode->i_mode, *max_len);
return *max_len;
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index 4e54d8b5413a..d175724ff566 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -895,9 +895,7 @@ fl_pnfs_update_layout(struct inode *ino,
lseg = pnfs_update_layout(ino, ctx, pos, count, iomode, strict_iomode,
gfp_flags);
- if (!lseg)
- lseg = ERR_PTR(-ENOMEM);
- if (IS_ERR(lseg))
+ if (IS_ERR_OR_NULL(lseg))
goto out;
lo = NFS_I(ino)->layout;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 93552c482992..ceeaf0fb6657 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -735,12 +735,20 @@ int nfs_getattr(const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
- int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME;
+ struct nfs_server *server = NFS_SERVER(inode);
+ unsigned long cache_validity;
int err = 0;
+ bool force_sync = query_flags & AT_STATX_FORCE_SYNC;
+ bool do_update = false;
trace_nfs_getattr_enter(inode);
+
+ if ((query_flags & AT_STATX_DONT_SYNC) && !force_sync)
+ goto out_no_update;
+
/* Flush out writes to the server in order to update c/mtime. */
- if (S_ISREG(inode->i_mode)) {
+ if ((request_mask & (STATX_CTIME|STATX_MTIME)) &&
+ S_ISREG(inode->i_mode)) {
err = filemap_write_and_wait(inode->i_mapping);
if (err)
goto out;
@@ -757,24 +765,42 @@ int nfs_getattr(const struct path *path, struct kstat *stat,
*/
if ((path->mnt->mnt_flags & MNT_NOATIME) ||
((path->mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)))
- need_atime = 0;
-
- if (need_atime || nfs_need_revalidate_inode(inode)) {
- struct nfs_server *server = NFS_SERVER(inode);
-
+ request_mask &= ~STATX_ATIME;
+
+ /* Is the user requesting attributes that might need revalidation? */
+ if (!(request_mask & (STATX_MODE|STATX_NLINK|STATX_ATIME|STATX_CTIME|
+ STATX_MTIME|STATX_UID|STATX_GID|
+ STATX_SIZE|STATX_BLOCKS)))
+ goto out_no_revalidate;
+
+ /* Check whether the cached attributes are stale */
+ do_update |= force_sync || nfs_attribute_cache_expired(inode);
+ cache_validity = READ_ONCE(NFS_I(inode)->cache_validity);
+ do_update |= cache_validity &
+ (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL);
+ if (request_mask & STATX_ATIME)
+ do_update |= cache_validity & NFS_INO_INVALID_ATIME;
+ if (request_mask & (STATX_CTIME|STATX_MTIME))
+ do_update |= cache_validity & NFS_INO_REVAL_PAGECACHE;
+ if (do_update) {
+ /* Update the attribute cache */
if (!(server->flags & NFS_MOUNT_NOAC))
nfs_readdirplus_parent_cache_miss(path->dentry);
else
nfs_readdirplus_parent_cache_hit(path->dentry);
err = __nfs_revalidate_inode(server, inode);
+ if (err)
+ goto out;
} else
nfs_readdirplus_parent_cache_hit(path->dentry);
- if (!err) {
- generic_fillattr(inode, stat);
- stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode));
- if (S_ISDIR(inode->i_mode))
- stat->blksize = NFS_SERVER(inode)->dtsize;
- }
+out_no_revalidate:
+ /* Only return attributes that were revalidated. */
+ stat->result_mask &= request_mask;
+out_no_update:
+ generic_fillattr(inode, stat);
+ stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode));
+ if (S_ISDIR(inode->i_mode))
+ stat->blksize = NFS_SERVER(inode)->dtsize;
out:
trace_nfs_getattr_exit(inode, err);
return err;
@@ -1144,7 +1170,6 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map
if (mapping->nrpages != 0) {
if (S_ISREG(inode->i_mode)) {
- unmap_mapping_range(mapping, 0, 0, 0);
ret = nfs_sync_mapping(mapping);
if (ret < 0)
return ret;
diff --git a/fs/nfs/io.c b/fs/nfs/io.c
index 20fef85d2bb1..9034b4926909 100644
--- a/fs/nfs/io.c
+++ b/fs/nfs/io.c
@@ -99,7 +99,7 @@ static void nfs_block_buffered(struct nfs_inode *nfsi, struct inode *inode)
{
if (!test_bit(NFS_INO_ODIRECT, &nfsi->flags)) {
set_bit(NFS_INO_ODIRECT, &nfsi->flags);
- nfs_wb_all(inode);
+ nfs_sync_mapping(inode->i_mapping);
}
}
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 65a7e5da508c..04612c24d394 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -861,6 +861,7 @@ static int nfs4_set_client(struct nfs_server *server,
set_bit(NFS_CS_MIGRATION, &cl_init.init_flags);
if (test_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status))
set_bit(NFS_CS_TSM_POSSIBLE, &cl_init.init_flags);
+ server->port = rpc_get_port(addr);
/* Allocate or find a client reference we can use */
clp = nfs_get_client(&cl_init);
@@ -1123,19 +1124,36 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
/* Initialise the client representation from the parent server */
nfs_server_copy_userdata(server, parent_server);
- /* Get a client representation.
- * Note: NFSv4 always uses TCP, */
+ /* Get a client representation */
+#ifdef CONFIG_SUNRPC_XPRT_RDMA
+ rpc_set_port(data->addr, NFS_RDMA_PORT);
error = nfs4_set_client(server, data->hostname,
data->addr,
data->addrlen,
parent_client->cl_ipaddr,
- rpc_protocol(parent_server->client),
+ XPRT_TRANSPORT_RDMA,
+ parent_server->client->cl_timeout,
+ parent_client->cl_mvops->minor_version,
+ parent_client->cl_net);
+ if (!error)
+ goto init_server;
+#endif /* CONFIG_SUNRPC_XPRT_RDMA */
+
+ rpc_set_port(data->addr, NFS_PORT);
+ error = nfs4_set_client(server, data->hostname,
+ data->addr,
+ data->addrlen,
+ parent_client->cl_ipaddr,
+ XPRT_TRANSPORT_TCP,
parent_server->client->cl_timeout,
parent_client->cl_mvops->minor_version,
parent_client->cl_net);
if (error < 0)
goto error;
+#ifdef CONFIG_SUNRPC_XPRT_RDMA
+init_server:
+#endif
error = nfs_init_server_rpcclient(server, parent_server->client->cl_timeout, data->authflavor);
if (error < 0)
goto error;
diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c
index 30426c1a1bbd..22dc30a679a0 100644
--- a/fs/nfs/nfs4idmap.c
+++ b/fs/nfs/nfs4idmap.c
@@ -568,9 +568,13 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons,
struct idmap_msg *im;
struct idmap *idmap = (struct idmap *)aux;
struct key *key = cons->key;
- int ret = -ENOMEM;
+ int ret = -ENOKEY;
+
+ if (!aux)
+ goto out1;
/* msg and im are freed in idmap_pipe_destroy_msg */
+ ret = -ENOMEM;
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data)
goto out1;
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index 8c3f327d858d..24f06dcc2b08 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -270,8 +270,6 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
if (mountdata->addrlen == 0)
continue;
- rpc_set_port(mountdata->addr, NFS_PORT);
-
memcpy(page2, buf->data, buf->len);
page2[buf->len] = '\0';
mountdata->hostname = page2;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 17a03f2c4330..47f3c273245e 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2020,7 +2020,7 @@ static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *sta
return ret;
}
-static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct nfs4_state *state, const nfs4_stateid *stateid, int err)
+static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct nfs4_state *state, const nfs4_stateid *stateid, struct file_lock *fl, int err)
{
switch (err) {
default:
@@ -2067,7 +2067,11 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct
return -EAGAIN;
case -ENOMEM:
case -NFS4ERR_DENIED:
- /* kill_proc(fl->fl_pid, SIGLOST, 1); */
+ if (fl) {
+ struct nfs4_lock_state *lsp = fl->fl_u.nfs4_fl.owner;
+ if (lsp)
+ set_bit(NFS_LOCK_LOST, &lsp->ls_flags);
+ }
return 0;
}
return err;
@@ -2103,7 +2107,7 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx,
err = nfs4_open_recover_helper(opendata, FMODE_READ);
}
nfs4_opendata_put(opendata);
- return nfs4_handle_delegation_recall_error(server, state, stateid, err);
+ return nfs4_handle_delegation_recall_error(server, state, stateid, NULL, err);
}
static void nfs4_open_confirm_prepare(struct rpc_task *task, void *calldata)
@@ -3150,6 +3154,11 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
struct nfs4_state *state = calldata->state;
struct nfs_server *server = NFS_SERVER(calldata->inode);
nfs4_stateid *res_stateid = NULL;
+ struct nfs4_exception exception = {
+ .state = state,
+ .inode = calldata->inode,
+ .stateid = &calldata->arg.stateid,
+ };
dprintk("%s: begin!\n", __func__);
if (!nfs4_sequence_done(task, &calldata->res.seq_res))
@@ -3215,7 +3224,9 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
case -NFS4ERR_BAD_STATEID:
break;
default:
- if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN)
+ task->tk_status = nfs4_async_handle_exception(task,
+ server, task->tk_status, &exception);
+ if (exception.retry)
goto out_restart;
}
nfs_clear_open_stateid(state, &calldata->arg.stateid,
@@ -5759,6 +5770,10 @@ struct nfs4_delegreturndata {
static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
{
struct nfs4_delegreturndata *data = calldata;
+ struct nfs4_exception exception = {
+ .inode = data->inode,
+ .stateid = &data->stateid,
+ };
if (!nfs4_sequence_done(task, &data->res.seq_res))
return;
@@ -5820,10 +5835,11 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
}
/* Fallthrough */
default:
- if (nfs4_async_handle_error(task, data->res.server,
- NULL, NULL) == -EAGAIN) {
+ task->tk_status = nfs4_async_handle_exception(task,
+ data->res.server, task->tk_status,
+ &exception);
+ if (exception.retry)
goto out_restart;
- }
}
data->rpc_status = task->tk_status;
return;
@@ -6061,6 +6077,10 @@ static void nfs4_locku_release_calldata(void *data)
static void nfs4_locku_done(struct rpc_task *task, void *data)
{
struct nfs4_unlockdata *calldata = data;
+ struct nfs4_exception exception = {
+ .inode = calldata->lsp->ls_state->inode,
+ .stateid = &calldata->arg.stateid,
+ };
if (!nfs4_sequence_done(task, &calldata->res.seq_res))
return;
@@ -6084,8 +6104,10 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
rpc_restart_call_prepare(task);
break;
default:
- if (nfs4_async_handle_error(task, calldata->server,
- NULL, NULL) == -EAGAIN)
+ task->tk_status = nfs4_async_handle_exception(task,
+ calldata->server, task->tk_status,
+ &exception);
+ if (exception.retry)
rpc_restart_call_prepare(task);
}
nfs_release_seqid(calldata->arg.seqid);
@@ -6741,7 +6763,7 @@ int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state,
if (err != 0)
return err;
err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW);
- return nfs4_handle_delegation_recall_error(server, state, stateid, err);
+ return nfs4_handle_delegation_recall_error(server, state, stateid, fl, err);
}
struct nfs_release_lockowner_data {
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index e4f4a09ed9f4..91a4d4eeb235 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1482,6 +1482,7 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_
struct inode *inode = state->inode;
struct nfs_inode *nfsi = NFS_I(inode);
struct file_lock *fl;
+ struct nfs4_lock_state *lsp;
int status = 0;
struct file_lock_context *flctx = inode->i_flctx;
struct list_head *list;
@@ -1522,7 +1523,9 @@ restart:
case -NFS4ERR_DENIED:
case -NFS4ERR_RECLAIM_BAD:
case -NFS4ERR_RECLAIM_CONFLICT:
- /* kill_proc(fl->fl_pid, SIGLOST, 1); */
+ lsp = fl->fl_u.nfs4_fl.owner;
+ if (lsp)
+ set_bit(NFS_LOCK_LOST, &lsp->ls_flags);
status = 0;
}
spin_lock(&flctx->flc_lock);
diff --git a/fs/nfs/nfs4sysctl.c b/fs/nfs/nfs4sysctl.c
index 0d91d84e5822..c394e4447100 100644
--- a/fs/nfs/nfs4sysctl.c
+++ b/fs/nfs/nfs4sysctl.c
@@ -32,7 +32,7 @@ static struct ctl_table nfs4_cb_sysctls[] = {
.data = &nfs_idmap_cache_timeout,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
+ .proc_handler = proc_dointvec,
},
{ }
};
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 77c6729e57f0..65c9c4175145 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -7678,6 +7678,22 @@ nfs4_stat_to_errno(int stat)
.p_name = #proc, \
}
+#if defined(CONFIG_NFS_V4_1)
+#define PROC41(proc, argtype, restype) \
+ PROC(proc, argtype, restype)
+#else
+#define PROC41(proc, argtype, restype) \
+ STUB(proc)
+#endif
+
+#if defined(CONFIG_NFS_V4_2)
+#define PROC42(proc, argtype, restype) \
+ PROC(proc, argtype, restype)
+#else
+#define PROC42(proc, argtype, restype) \
+ STUB(proc)
+#endif
+
const struct rpc_procinfo nfs4_procedures[] = {
PROC(READ, enc_read, dec_read),
PROC(WRITE, enc_write, dec_write),
@@ -7698,7 +7714,6 @@ const struct rpc_procinfo nfs4_procedures[] = {
PROC(ACCESS, enc_access, dec_access),
PROC(GETATTR, enc_getattr, dec_getattr),
PROC(LOOKUP, enc_lookup, dec_lookup),
- PROC(LOOKUPP, enc_lookupp, dec_lookupp),
PROC(LOOKUP_ROOT, enc_lookup_root, dec_lookup_root),
PROC(REMOVE, enc_remove, dec_remove),
PROC(RENAME, enc_rename, dec_rename),
@@ -7717,33 +7732,30 @@ const struct rpc_procinfo nfs4_procedures[] = {
PROC(RELEASE_LOCKOWNER, enc_release_lockowner, dec_release_lockowner),
PROC(SECINFO, enc_secinfo, dec_secinfo),
PROC(FSID_PRESENT, enc_fsid_present, dec_fsid_present),
-#if defined(CONFIG_NFS_V4_1)
- PROC(EXCHANGE_ID, enc_exchange_id, dec_exchange_id),
- PROC(CREATE_SESSION, enc_create_session, dec_create_session),
- PROC(DESTROY_SESSION, enc_destroy_session, dec_destroy_session),
- PROC(SEQUENCE, enc_sequence, dec_sequence),
- PROC(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time),
- PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete),
- PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo),
- PROC(LAYOUTGET, enc_layoutget, dec_layoutget),
- PROC(LAYOUTCOMMIT, enc_layoutcommit, dec_layoutcommit),
- PROC(LAYOUTRETURN, enc_layoutreturn, dec_layoutreturn),
- PROC(SECINFO_NO_NAME, enc_secinfo_no_name, dec_secinfo_no_name),
- PROC(TEST_STATEID, enc_test_stateid, dec_test_stateid),
- PROC(FREE_STATEID, enc_free_stateid, dec_free_stateid),
+ PROC41(EXCHANGE_ID, enc_exchange_id, dec_exchange_id),
+ PROC41(CREATE_SESSION, enc_create_session, dec_create_session),
+ PROC41(DESTROY_SESSION, enc_destroy_session, dec_destroy_session),
+ PROC41(SEQUENCE, enc_sequence, dec_sequence),
+ PROC41(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time),
+ PROC41(RECLAIM_COMPLETE,enc_reclaim_complete, dec_reclaim_complete),
+ PROC41(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo),
+ PROC41(LAYOUTGET, enc_layoutget, dec_layoutget),
+ PROC41(LAYOUTCOMMIT, enc_layoutcommit, dec_layoutcommit),
+ PROC41(LAYOUTRETURN, enc_layoutreturn, dec_layoutreturn),
+ PROC41(SECINFO_NO_NAME, enc_secinfo_no_name, dec_secinfo_no_name),
+ PROC41(TEST_STATEID, enc_test_stateid, dec_test_stateid),
+ PROC41(FREE_STATEID, enc_free_stateid, dec_free_stateid),
STUB(GETDEVICELIST),
- PROC(BIND_CONN_TO_SESSION,
+ PROC41(BIND_CONN_TO_SESSION,
enc_bind_conn_to_session, dec_bind_conn_to_session),
- PROC(DESTROY_CLIENTID, enc_destroy_clientid, dec_destroy_clientid),
-#endif /* CONFIG_NFS_V4_1 */
-#ifdef CONFIG_NFS_V4_2
- PROC(SEEK, enc_seek, dec_seek),
- PROC(ALLOCATE, enc_allocate, dec_allocate),
- PROC(DEALLOCATE, enc_deallocate, dec_deallocate),
- PROC(LAYOUTSTATS, enc_layoutstats, dec_layoutstats),
- PROC(CLONE, enc_clone, dec_clone),
- PROC(COPY, enc_copy, dec_copy),
-#endif /* CONFIG_NFS_V4_2 */
+ PROC41(DESTROY_CLIENTID,enc_destroy_clientid, dec_destroy_clientid),
+ PROC42(SEEK, enc_seek, dec_seek),
+ PROC42(ALLOCATE, enc_allocate, dec_allocate),
+ PROC42(DEALLOCATE, enc_deallocate, dec_deallocate),
+ PROC42(LAYOUTSTATS, enc_layoutstats, dec_layoutstats),
+ PROC42(CLONE, enc_clone, dec_clone),
+ PROC42(COPY, enc_copy, dec_copy),
+ PROC(LOOKUPP, enc_lookupp, dec_lookupp),
};
static unsigned int nfs_version4_counts[ARRAY_SIZE(nfs4_procedures)];
diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
index 610d89d8942e..bd60f8d1e181 100644
--- a/fs/nfs/nfstrace.h
+++ b/fs/nfs/nfstrace.h
@@ -797,15 +797,15 @@ TRACE_EVENT(nfs_readpage_done,
)
);
-/*
- * XXX: I tried using NFS_UNSTABLE and friends in this table, but they
- * all evaluate to 0 for some reason, even if I include linux/nfs.h.
- */
+TRACE_DEFINE_ENUM(NFS_UNSTABLE);
+TRACE_DEFINE_ENUM(NFS_DATA_SYNC);
+TRACE_DEFINE_ENUM(NFS_FILE_SYNC);
+
#define nfs_show_stable(stable) \
__print_symbolic(stable, \
- { 0, " (UNSTABLE)" }, \
- { 1, " (DATA_SYNC)" }, \
- { 2, " (FILE_SYNC)" })
+ { NFS_UNSTABLE, "UNSTABLE" }, \
+ { NFS_DATA_SYNC, "DATA_SYNC" }, \
+ { NFS_FILE_SYNC, "FILE_SYNC" })
TRACE_EVENT(nfs_initiate_write,
TP_PROTO(
@@ -838,12 +838,12 @@ TRACE_EVENT(nfs_initiate_write,
TP_printk(
"fileid=%02x:%02x:%llu fhandle=0x%08x "
- "offset=%lld count=%lu stable=%d%s",
+ "offset=%lld count=%lu stable=%s",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle,
__entry->offset, __entry->count,
- __entry->stable, nfs_show_stable(__entry->stable)
+ nfs_show_stable(__entry->stable)
)
);
@@ -882,13 +882,13 @@ TRACE_EVENT(nfs_writeback_done,
TP_printk(
"fileid=%02x:%02x:%llu fhandle=0x%08x "
- "offset=%lld status=%d stable=%d%s "
+ "offset=%lld status=%d stable=%s "
"verifier 0x%016llx",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle,
__entry->offset, __entry->status,
- __entry->stable, nfs_show_stable(__entry->stable),
+ nfs_show_stable(__entry->stable),
__entry->verifier
)
);
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index d0543e19098a..18a7626ac638 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -537,7 +537,7 @@ EXPORT_SYMBOL_GPL(nfs_pgio_header_free);
* @cinfo: Commit information for the call (writes only)
*/
static void nfs_pgio_rpcsetup(struct nfs_pgio_header *hdr,
- unsigned int count, unsigned int offset,
+ unsigned int count,
int how, struct nfs_commit_info *cinfo)
{
struct nfs_page *req = hdr->req;
@@ -546,10 +546,10 @@ static void nfs_pgio_rpcsetup(struct nfs_pgio_header *hdr,
* NB: take care not to mess about with hdr->commit et al. */
hdr->args.fh = NFS_FH(hdr->inode);
- hdr->args.offset = req_offset(req) + offset;
+ hdr->args.offset = req_offset(req);
/* pnfs_set_layoutcommit needs this */
hdr->mds_offset = hdr->args.offset;
- hdr->args.pgbase = req->wb_pgbase + offset;
+ hdr->args.pgbase = req->wb_pgbase;
hdr->args.pages = hdr->page_array.pagevec;
hdr->args.count = count;
hdr->args.context = get_nfs_open_context(req->wb_context);
@@ -789,7 +789,7 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
desc->pg_ioflags &= ~FLUSH_COND_STABLE;
/* Set up the argument struct */
- nfs_pgio_rpcsetup(hdr, mirror->pg_count, 0, desc->pg_ioflags, &cinfo);
+ nfs_pgio_rpcsetup(hdr, mirror->pg_count, desc->pg_ioflags, &cinfo);
desc->pg_rpc_callops = &nfs_pgio_common_ops;
return 0;
}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index d602fe9e1ac8..c13e826614b5 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -655,7 +655,7 @@ pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
return 0;
list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
if (pnfs_match_lseg_recall(lseg, recall_range, seq)) {
- dprintk("%s: freeing lseg %p iomode %d seq %u"
+ dprintk("%s: freeing lseg %p iomode %d seq %u "
"offset %llu length %llu\n", __func__,
lseg, lseg->pls_range.iomode, lseg->pls_seq,
lseg->pls_range.offset, lseg->pls_range.length);
@@ -2255,7 +2255,7 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
nfs_pageio_reset_write_mds(desc);
mirror->pg_recoalesce = 1;
}
- hdr->release(hdr);
+ hdr->completion_ops->completion(hdr);
}
static enum pnfs_try_status
@@ -2378,7 +2378,7 @@ pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
nfs_pageio_reset_read_mds(desc);
mirror->pg_recoalesce = 1;
}
- hdr->release(hdr);
+ hdr->completion_ops->completion(hdr);
}
/*
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 8d507c361d98..daf6cbf5c15f 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -40,6 +40,7 @@ enum {
NFS_LSEG_ROC, /* roc bit received from server */
NFS_LSEG_LAYOUTCOMMIT, /* layoutcommit bit set for layoutcommit */
NFS_LSEG_LAYOUTRETURN, /* layoutreturn bit set for layoutreturn */
+ NFS_LSEG_UNAVAILABLE, /* unavailable bit set for temporary problem */
};
/* Individual ip address */
@@ -86,6 +87,7 @@ enum pnfs_try_status {
*/
#define NFS4_DEF_DS_TIMEO 600 /* in tenths of a second */
#define NFS4_DEF_DS_RETRANS 5
+#define PNFS_DEVICE_RETRY_TIMEOUT (120*HZ)
/* error codes for internal use */
#define NFS4ERR_RESET_TO_MDS 12001
@@ -524,8 +526,10 @@ static inline int pnfs_return_layout(struct inode *ino)
struct nfs_inode *nfsi = NFS_I(ino);
struct nfs_server *nfss = NFS_SERVER(ino);
- if (pnfs_enabled_sb(nfss) && nfsi->layout)
+ if (pnfs_enabled_sb(nfss) && nfsi->layout) {
+ set_bit(NFS_LAYOUT_RETURN_REQUESTED, &nfsi->layout->plh_flags);
return _pnfs_return_layout(ino);
+ }
return 0;
}
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
index 2961fcd7a2df..e8a07b3f9aaa 100644
--- a/fs/nfs/pnfs_dev.c
+++ b/fs/nfs/pnfs_dev.c
@@ -43,7 +43,6 @@
#define NFS4_DEVICE_ID_HASH_SIZE (1 << NFS4_DEVICE_ID_HASH_BITS)
#define NFS4_DEVICE_ID_HASH_MASK (NFS4_DEVICE_ID_HASH_SIZE - 1)
-#define PNFS_DEVICE_RETRY_TIMEOUT (120*HZ)
static struct hlist_head nfs4_deviceid_cache[NFS4_DEVICE_ID_HASH_SIZE];
static DEFINE_SPINLOCK(nfs4_deviceid_lock);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 12b2d477836b..7428a669d7a7 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1835,6 +1835,8 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags);
next:
nfs_unlock_and_release_request(req);
+ /* Latency breaker */
+ cond_resched();
}
nfss = NFS_SERVER(data->inode);
if (atomic_long_read(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index d7d313fb9cd4..4fd95dbeb52f 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -17,6 +17,7 @@
#include <net/ipv6.h>
#include <linux/fs.h>
#include <linux/kref.h>
+#include <linux/refcount.h>
#include <linux/utsname.h>
#include <linux/lockd/bind.h>
#include <linux/lockd/xdr.h>
@@ -58,7 +59,7 @@ struct nlm_host {
u32 h_state; /* pseudo-state counter */
u32 h_nsmstate; /* true remote NSM state */
u32 h_pidcount; /* Pseudopids */
- atomic_t h_count; /* reference count */
+ refcount_t h_count; /* reference count */
struct mutex h_mutex; /* mutex for pmap binding */
unsigned long h_nextrebind; /* next portmap call */
unsigned long h_expires; /* eligible for GC */
@@ -83,7 +84,7 @@ struct nlm_host {
struct nsm_handle {
struct list_head sm_link;
- atomic_t sm_count;
+ refcount_t sm_count;
char *sm_mon_name;
char *sm_name;
struct sockaddr_storage sm_addr;
@@ -122,7 +123,7 @@ static inline struct sockaddr *nlm_srcaddr(const struct nlm_host *host)
*/
struct nlm_lockowner {
struct list_head list;
- atomic_t count;
+ refcount_t count;
struct nlm_host *host;
fl_owner_t owner;
@@ -136,7 +137,7 @@ struct nlm_wait;
*/
#define NLMCLNT_OHSIZE ((__NEW_UTS_LEN) + 10u)
struct nlm_rqst {
- atomic_t a_count;
+ refcount_t a_count;
unsigned int a_flags; /* initial RPC task flags */
struct nlm_host * a_host; /* host handle */
struct nlm_args a_args; /* arguments */
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 47adac640191..57ffaa20d564 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -457,7 +457,12 @@ enum lock_type4 {
#define NFS4_DEBUG 1
-/* Index of predefined Linux client operations */
+/*
+ * Index of predefined Linux client operations
+ *
+ * To ensure that /proc/net/rpc/nfs remains correctly ordered, please
+ * append only to this enum when adding new client operations.
+ */
enum {
NFSPROC4_CLNT_NULL = 0, /* Unused */
@@ -480,7 +485,6 @@ enum {
NFSPROC4_CLNT_ACCESS,
NFSPROC4_CLNT_GETATTR,
NFSPROC4_CLNT_LOOKUP,
- NFSPROC4_CLNT_LOOKUPP,
NFSPROC4_CLNT_LOOKUP_ROOT,
NFSPROC4_CLNT_REMOVE,
NFSPROC4_CLNT_RENAME,
@@ -500,7 +504,6 @@ enum {
NFSPROC4_CLNT_SECINFO,
NFSPROC4_CLNT_FSID_PRESENT,
- /* nfs41 */
NFSPROC4_CLNT_EXCHANGE_ID,
NFSPROC4_CLNT_CREATE_SESSION,
NFSPROC4_CLNT_DESTROY_SESSION,
@@ -518,13 +521,14 @@ enum {
NFSPROC4_CLNT_BIND_CONN_TO_SESSION,
NFSPROC4_CLNT_DESTROY_CLIENTID,
- /* nfs42 */
NFSPROC4_CLNT_SEEK,
NFSPROC4_CLNT_ALLOCATE,
NFSPROC4_CLNT_DEALLOCATE,
NFSPROC4_CLNT_LAYOUTSTATS,
NFSPROC4_CLNT_CLONE,
NFSPROC4_CLNT_COPY,
+
+ NFSPROC4_CLNT_LOOKUPP,
};
/* nfs41 types */
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 71c237e8240e..ed761f751ecb 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -179,7 +179,6 @@ struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred,
int rpc_restart_call_prepare(struct rpc_task *);
int rpc_restart_call(struct rpc_task *);
void rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int);
-int rpc_protocol(struct rpc_clnt *);
struct net * rpc_net_ns(struct rpc_clnt *);
size_t rpc_max_payload(struct rpc_clnt *);
size_t rpc_max_bc_payload(struct rpc_clnt *);
diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h
index 221b7a2e5406..5859563e3c1f 100644
--- a/include/linux/sunrpc/xprtrdma.h
+++ b/include/linux/sunrpc/xprtrdma.h
@@ -64,7 +64,7 @@ enum rpcrdma_memreg {
RPCRDMA_MEMWINDOWS,
RPCRDMA_MEMWINDOWS_ASYNC,
RPCRDMA_MTHCAFMR,
- RPCRDMA_FRMR,
+ RPCRDMA_FRWR,
RPCRDMA_ALLPHYSICAL,
RPCRDMA_LAST
};
diff --git a/include/trace/events/rdma.h b/include/trace/events/rdma.h
new file mode 100644
index 000000000000..aa19afc73a4e
--- /dev/null
+++ b/include/trace/events/rdma.h
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2017 Oracle. All rights reserved.
+ */
+
+/*
+ * enum ib_event_type, from include/rdma/ib_verbs.h
+ */
+
+#define IB_EVENT_LIST \
+ ib_event(CQ_ERR) \
+ ib_event(QP_FATAL) \
+ ib_event(QP_REQ_ERR) \
+ ib_event(QP_ACCESS_ERR) \
+ ib_event(COMM_EST) \
+ ib_event(SQ_DRAINED) \
+ ib_event(PATH_MIG) \
+ ib_event(PATH_MIG_ERR) \
+ ib_event(DEVICE_FATAL) \
+ ib_event(PORT_ACTIVE) \
+ ib_event(PORT_ERR) \
+ ib_event(LID_CHANGE) \
+ ib_event(PKEY_CHANGE) \
+ ib_event(SM_CHANGE) \
+ ib_event(SRQ_ERR) \
+ ib_event(SRQ_LIMIT_REACHED) \
+ ib_event(QP_LAST_WQE_REACHED) \
+ ib_event(CLIENT_REREGISTER) \
+ ib_event(GID_CHANGE) \
+ ib_event_end(WQ_FATAL)
+
+#undef ib_event
+#undef ib_event_end
+
+#define ib_event(x) TRACE_DEFINE_ENUM(IB_EVENT_##x);
+#define ib_event_end(x) TRACE_DEFINE_ENUM(IB_EVENT_##x);
+
+IB_EVENT_LIST
+
+#undef ib_event
+#undef ib_event_end
+
+#define ib_event(x) { IB_EVENT_##x, #x },
+#define ib_event_end(x) { IB_EVENT_##x, #x }
+
+#define rdma_show_ib_event(x) \
+ __print_symbolic(x, IB_EVENT_LIST)
+
+/*
+ * enum ib_wc_status type, from include/rdma/ib_verbs.h
+ */
+#define IB_WC_STATUS_LIST \
+ ib_wc_status(SUCCESS) \
+ ib_wc_status(LOC_LEN_ERR) \
+ ib_wc_status(LOC_QP_OP_ERR) \
+ ib_wc_status(LOC_EEC_OP_ERR) \
+ ib_wc_status(LOC_PROT_ERR) \
+ ib_wc_status(WR_FLUSH_ERR) \
+ ib_wc_status(MW_BIND_ERR) \
+ ib_wc_status(BAD_RESP_ERR) \
+ ib_wc_status(LOC_ACCESS_ERR) \
+ ib_wc_status(REM_INV_REQ_ERR) \
+ ib_wc_status(REM_ACCESS_ERR) \
+ ib_wc_status(REM_OP_ERR) \
+ ib_wc_status(RETRY_EXC_ERR) \
+ ib_wc_status(RNR_RETRY_EXC_ERR) \
+ ib_wc_status(LOC_RDD_VIOL_ERR) \
+ ib_wc_status(REM_INV_RD_REQ_ERR) \
+ ib_wc_status(REM_ABORT_ERR) \
+ ib_wc_status(INV_EECN_ERR) \
+ ib_wc_status(INV_EEC_STATE_ERR) \
+ ib_wc_status(FATAL_ERR) \
+ ib_wc_status(RESP_TIMEOUT_ERR) \
+ ib_wc_status_end(GENERAL_ERR)
+
+#undef ib_wc_status
+#undef ib_wc_status_end
+
+#define ib_wc_status(x) TRACE_DEFINE_ENUM(IB_WC_##x);
+#define ib_wc_status_end(x) TRACE_DEFINE_ENUM(IB_WC_##x);
+
+IB_WC_STATUS_LIST
+
+#undef ib_wc_status
+#undef ib_wc_status_end
+
+#define ib_wc_status(x) { IB_WC_##x, #x },
+#define ib_wc_status_end(x) { IB_WC_##x, #x }
+
+#define rdma_show_wc_status(x) \
+ __print_symbolic(x, IB_WC_STATUS_LIST)
+
+/*
+ * enum rdma_cm_event_type, from include/rdma/rdma_cm.h
+ */
+#define RDMA_CM_EVENT_LIST \
+ rdma_cm_event(ADDR_RESOLVED) \
+ rdma_cm_event(ADDR_ERROR) \
+ rdma_cm_event(ROUTE_RESOLVED) \
+ rdma_cm_event(ROUTE_ERROR) \
+ rdma_cm_event(CONNECT_REQUEST) \
+ rdma_cm_event(CONNECT_RESPONSE) \
+ rdma_cm_event(CONNECT_ERROR) \
+ rdma_cm_event(UNREACHABLE) \
+ rdma_cm_event(REJECTED) \
+ rdma_cm_event(ESTABLISHED) \
+ rdma_cm_event(DISCONNECTED) \
+ rdma_cm_event(DEVICE_REMOVAL) \
+ rdma_cm_event(MULTICAST_JOIN) \
+ rdma_cm_event(MULTICAST_ERROR) \
+ rdma_cm_event(ADDR_CHANGE) \
+ rdma_cm_event_end(TIMEWAIT_EXIT)
+
+#undef rdma_cm_event
+#undef rdma_cm_event_end
+
+#define rdma_cm_event(x) TRACE_DEFINE_ENUM(RDMA_CM_EVENT_##x);
+#define rdma_cm_event_end(x) TRACE_DEFINE_ENUM(RDMA_CM_EVENT_##x);
+
+RDMA_CM_EVENT_LIST
+
+#undef rdma_cm_event
+#undef rdma_cm_event_end
+
+#define rdma_cm_event(x) { RDMA_CM_EVENT_##x, #x },
+#define rdma_cm_event_end(x) { RDMA_CM_EVENT_##x, #x }
+
+#define rdma_show_cm_event(x) \
+ __print_symbolic(x, RDMA_CM_EVENT_LIST)
diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h
new file mode 100644
index 000000000000..50ed3f8bf534
--- /dev/null
+++ b/include/trace/events/rpcrdma.h
@@ -0,0 +1,890 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2017 Oracle. All rights reserved.
+ */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rpcrdma
+
+#if !defined(_TRACE_RPCRDMA_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_RPCRDMA_H
+
+#include <linux/tracepoint.h>
+#include <trace/events/rdma.h>
+
+/**
+ ** Event classes
+ **/
+
+DECLARE_EVENT_CLASS(xprtrdma_reply_event,
+ TP_PROTO(
+ const struct rpcrdma_rep *rep
+ ),
+
+ TP_ARGS(rep),
+
+ TP_STRUCT__entry(
+ __field(const void *, rep)
+ __field(const void *, r_xprt)
+ __field(u32, xid)
+ __field(u32, version)
+ __field(u32, proc)
+ ),
+
+ TP_fast_assign(
+ __entry->rep = rep;
+ __entry->r_xprt = rep->rr_rxprt;
+ __entry->xid = be32_to_cpu(rep->rr_xid);
+ __entry->version = be32_to_cpu(rep->rr_vers);
+ __entry->proc = be32_to_cpu(rep->rr_proc);
+ ),
+
+ TP_printk("rxprt %p xid=0x%08x rep=%p: version %u proc %u",
+ __entry->r_xprt, __entry->xid, __entry->rep,
+ __entry->version, __entry->proc
+ )
+);
+
+#define DEFINE_REPLY_EVENT(name) \
+ DEFINE_EVENT(xprtrdma_reply_event, name, \
+ TP_PROTO( \
+ const struct rpcrdma_rep *rep \
+ ), \
+ TP_ARGS(rep))
+
+DECLARE_EVENT_CLASS(xprtrdma_rxprt,
+ TP_PROTO(
+ const struct rpcrdma_xprt *r_xprt
+ ),
+
+ TP_ARGS(r_xprt),
+
+ TP_STRUCT__entry(
+ __field(const void *, r_xprt)
+ __string(addr, rpcrdma_addrstr(r_xprt))
+ __string(port, rpcrdma_portstr(r_xprt))
+ ),
+
+ TP_fast_assign(
+ __entry->r_xprt = r_xprt;
+ __assign_str(addr, rpcrdma_addrstr(r_xprt));
+ __assign_str(port, rpcrdma_portstr(r_xprt));
+ ),
+
+ TP_printk("peer=[%s]:%s r_xprt=%p",
+ __get_str(addr), __get_str(port), __entry->r_xprt
+ )
+);
+
+#define DEFINE_RXPRT_EVENT(name) \
+ DEFINE_EVENT(xprtrdma_rxprt, name, \
+ TP_PROTO( \
+ const struct rpcrdma_xprt *r_xprt \
+ ), \
+ TP_ARGS(r_xprt))
+
+DECLARE_EVENT_CLASS(xprtrdma_rdch_event,
+ TP_PROTO(
+ const struct rpc_task *task,
+ unsigned int pos,
+ struct rpcrdma_mr *mr,
+ int nsegs
+ ),
+
+ TP_ARGS(task, pos, mr, nsegs),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ __field(const void *, mr)
+ __field(unsigned int, pos)
+ __field(int, nents)
+ __field(u32, handle)
+ __field(u32, length)
+ __field(u64, offset)
+ __field(int, nsegs)
+ ),
+
+ TP_fast_assign(
+ __entry->task_id = task->tk_pid;
+ __entry->client_id = task->tk_client->cl_clid;
+ __entry->mr = mr;
+ __entry->pos = pos;
+ __entry->nents = mr->mr_nents;
+ __entry->handle = mr->mr_handle;
+ __entry->length = mr->mr_length;
+ __entry->offset = mr->mr_offset;
+ __entry->nsegs = nsegs;
+ ),
+
+ TP_printk("task:%u@%u mr=%p pos=%u %u@0x%016llx:0x%08x (%s)",
+ __entry->task_id, __entry->client_id, __entry->mr,
+ __entry->pos, __entry->length,
+ (unsigned long long)__entry->offset, __entry->handle,
+ __entry->nents < __entry->nsegs ? "more" : "last"
+ )
+);
+
+#define DEFINE_RDCH_EVENT(name) \
+ DEFINE_EVENT(xprtrdma_rdch_event, name, \
+ TP_PROTO( \
+ const struct rpc_task *task, \
+ unsigned int pos, \
+ struct rpcrdma_mr *mr, \
+ int nsegs \
+ ), \
+ TP_ARGS(task, pos, mr, nsegs))
+
+DECLARE_EVENT_CLASS(xprtrdma_wrch_event,
+ TP_PROTO(
+ const struct rpc_task *task,
+ struct rpcrdma_mr *mr,
+ int nsegs
+ ),
+
+ TP_ARGS(task, mr, nsegs),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ __field(const void *, mr)
+ __field(int, nents)
+ __field(u32, handle)
+ __field(u32, length)
+ __field(u64, offset)
+ __field(int, nsegs)
+ ),
+
+ TP_fast_assign(
+ __entry->task_id = task->tk_pid;
+ __entry->client_id = task->tk_client->cl_clid;
+ __entry->mr = mr;
+ __entry->nents = mr->mr_nents;
+ __entry->handle = mr->mr_handle;
+ __entry->length = mr->mr_length;
+ __entry->offset = mr->mr_offset;
+ __entry->nsegs = nsegs;
+ ),
+
+ TP_printk("task:%u@%u mr=%p %u@0x%016llx:0x%08x (%s)",
+ __entry->task_id, __entry->client_id, __entry->mr,
+ __entry->length, (unsigned long long)__entry->offset,
+ __entry->handle,
+ __entry->nents < __entry->nsegs ? "more" : "last"
+ )
+);
+
+#define DEFINE_WRCH_EVENT(name) \
+ DEFINE_EVENT(xprtrdma_wrch_event, name, \
+ TP_PROTO( \
+ const struct rpc_task *task, \
+ struct rpcrdma_mr *mr, \
+ int nsegs \
+ ), \
+ TP_ARGS(task, mr, nsegs))
+
+TRACE_DEFINE_ENUM(FRWR_IS_INVALID);
+TRACE_DEFINE_ENUM(FRWR_IS_VALID);
+TRACE_DEFINE_ENUM(FRWR_FLUSHED_FR);
+TRACE_DEFINE_ENUM(FRWR_FLUSHED_LI);
+
+#define xprtrdma_show_frwr_state(x) \
+ __print_symbolic(x, \
+ { FRWR_IS_INVALID, "INVALID" }, \
+ { FRWR_IS_VALID, "VALID" }, \
+ { FRWR_FLUSHED_FR, "FLUSHED_FR" }, \
+ { FRWR_FLUSHED_LI, "FLUSHED_LI" })
+
+DECLARE_EVENT_CLASS(xprtrdma_frwr_done,
+ TP_PROTO(
+ const struct ib_wc *wc,
+ const struct rpcrdma_frwr *frwr
+ ),
+
+ TP_ARGS(wc, frwr),
+
+ TP_STRUCT__entry(
+ __field(const void *, mr)
+ __field(unsigned int, state)
+ __field(unsigned int, status)
+ __field(unsigned int, vendor_err)
+ ),
+
+ TP_fast_assign(
+ __entry->mr = container_of(frwr, struct rpcrdma_mr, frwr);
+ __entry->state = frwr->fr_state;
+ __entry->status = wc->status;
+ __entry->vendor_err = __entry->status ? wc->vendor_err : 0;
+ ),
+
+ TP_printk(
+ "mr=%p state=%s: %s (%u/0x%x)",
+ __entry->mr, xprtrdma_show_frwr_state(__entry->state),
+ rdma_show_wc_status(__entry->status),
+ __entry->status, __entry->vendor_err
+ )
+);
+
+#define DEFINE_FRWR_DONE_EVENT(name) \
+ DEFINE_EVENT(xprtrdma_frwr_done, name, \
+ TP_PROTO( \
+ const struct ib_wc *wc, \
+ const struct rpcrdma_frwr *frwr \
+ ), \
+ TP_ARGS(wc, frwr))
+
+DECLARE_EVENT_CLASS(xprtrdma_mr,
+ TP_PROTO(
+ const struct rpcrdma_mr *mr
+ ),
+
+ TP_ARGS(mr),
+
+ TP_STRUCT__entry(
+ __field(const void *, mr)
+ __field(u32, handle)
+ __field(u32, length)
+ __field(u64, offset)
+ ),
+
+ TP_fast_assign(
+ __entry->mr = mr;
+ __entry->handle = mr->mr_handle;
+ __entry->length = mr->mr_length;
+ __entry->offset = mr->mr_offset;
+ ),
+
+ TP_printk("mr=%p %u@0x%016llx:0x%08x",
+ __entry->mr, __entry->length,
+ (unsigned long long)__entry->offset,
+ __entry->handle
+ )
+);
+
+#define DEFINE_MR_EVENT(name) \
+ DEFINE_EVENT(xprtrdma_mr, name, \
+ TP_PROTO( \
+ const struct rpcrdma_mr *mr \
+ ), \
+ TP_ARGS(mr))
+
+DECLARE_EVENT_CLASS(xprtrdma_cb_event,
+ TP_PROTO(
+ const struct rpc_rqst *rqst
+ ),
+
+ TP_ARGS(rqst),
+
+ TP_STRUCT__entry(
+ __field(const void *, rqst)
+ __field(const void *, rep)
+ __field(const void *, req)
+ __field(u32, xid)
+ ),
+
+ TP_fast_assign(
+ __entry->rqst = rqst;
+ __entry->req = rpcr_to_rdmar(rqst);
+ __entry->rep = rpcr_to_rdmar(rqst)->rl_reply;
+ __entry->xid = be32_to_cpu(rqst->rq_xid);
+ ),
+
+ TP_printk("xid=0x%08x, rqst=%p req=%p rep=%p",
+ __entry->xid, __entry->rqst, __entry->req, __entry->rep
+ )
+);
+
+#define DEFINE_CB_EVENT(name) \
+ DEFINE_EVENT(xprtrdma_cb_event, name, \
+ TP_PROTO( \
+ const struct rpc_rqst *rqst \
+ ), \
+ TP_ARGS(rqst))
+
+/**
+ ** Connection events
+ **/
+
+TRACE_EVENT(xprtrdma_conn_upcall,
+ TP_PROTO(
+ const struct rpcrdma_xprt *r_xprt,
+ struct rdma_cm_event *event
+ ),
+
+ TP_ARGS(r_xprt, event),
+
+ TP_STRUCT__entry(
+ __field(const void *, r_xprt)
+ __field(unsigned int, event)
+ __field(int, status)
+ __string(addr, rpcrdma_addrstr(r_xprt))
+ __string(port, rpcrdma_portstr(r_xprt))
+ ),
+
+ TP_fast_assign(
+ __entry->r_xprt = r_xprt;
+ __entry->event = event->event;
+ __entry->status = event->status;
+ __assign_str(addr, rpcrdma_addrstr(r_xprt));
+ __assign_str(port, rpcrdma_portstr(r_xprt));
+ ),
+
+ TP_printk("peer=[%s]:%s r_xprt=%p: %s (%u/%d)",
+ __get_str(addr), __get_str(port),
+ __entry->r_xprt, rdma_show_cm_event(__entry->event),
+ __entry->event, __entry->status
+ )
+);
+
+TRACE_EVENT(xprtrdma_disconnect,
+ TP_PROTO(
+ const struct rpcrdma_xprt *r_xprt,
+ int status
+ ),
+
+ TP_ARGS(r_xprt, status),
+
+ TP_STRUCT__entry(
+ __field(const void *, r_xprt)
+ __field(int, status)
+ __field(int, connected)
+ __string(addr, rpcrdma_addrstr(r_xprt))
+ __string(port, rpcrdma_portstr(r_xprt))
+ ),
+
+ TP_fast_assign(
+ __entry->r_xprt = r_xprt;
+ __entry->status = status;
+ __entry->connected = r_xprt->rx_ep.rep_connected;
+ __assign_str(addr, rpcrdma_addrstr(r_xprt));
+ __assign_str(port, rpcrdma_portstr(r_xprt));
+ ),
+
+ TP_printk("peer=[%s]:%s r_xprt=%p: status=%d %sconnected",
+ __get_str(addr), __get_str(port),
+ __entry->r_xprt, __entry->status,
+ __entry->connected == 1 ? "still " : "dis"
+ )
+);
+
+DEFINE_RXPRT_EVENT(xprtrdma_conn_start);
+DEFINE_RXPRT_EVENT(xprtrdma_conn_tout);
+DEFINE_RXPRT_EVENT(xprtrdma_create);
+DEFINE_RXPRT_EVENT(xprtrdma_destroy);
+DEFINE_RXPRT_EVENT(xprtrdma_remove);
+DEFINE_RXPRT_EVENT(xprtrdma_reinsert);
+DEFINE_RXPRT_EVENT(xprtrdma_reconnect);
+DEFINE_RXPRT_EVENT(xprtrdma_inject_dsc);
+
+TRACE_EVENT(xprtrdma_qp_error,
+ TP_PROTO(
+ const struct rpcrdma_xprt *r_xprt,
+ const struct ib_event *event
+ ),
+
+ TP_ARGS(r_xprt, event),
+
+ TP_STRUCT__entry(
+ __field(const void *, r_xprt)
+ __field(unsigned int, event)
+ __string(name, event->device->name)
+ __string(addr, rpcrdma_addrstr(r_xprt))
+ __string(port, rpcrdma_portstr(r_xprt))
+ ),
+
+ TP_fast_assign(
+ __entry->r_xprt = r_xprt;
+ __entry->event = event->event;
+ __assign_str(name, event->device->name);
+ __assign_str(addr, rpcrdma_addrstr(r_xprt));
+ __assign_str(port, rpcrdma_portstr(r_xprt));
+ ),
+
+ TP_printk("peer=[%s]:%s r_xprt=%p: dev %s: %s (%u)",
+ __get_str(addr), __get_str(port), __entry->r_xprt,
+ __get_str(name), rdma_show_ib_event(__entry->event),
+ __entry->event
+ )
+);
+
+/**
+ ** Call events
+ **/
+
+TRACE_EVENT(xprtrdma_createmrs,
+ TP_PROTO(
+ const struct rpcrdma_xprt *r_xprt,
+ unsigned int count
+ ),
+
+ TP_ARGS(r_xprt, count),
+
+ TP_STRUCT__entry(
+ __field(const void *, r_xprt)
+ __field(unsigned int, count)
+ ),
+
+ TP_fast_assign(
+ __entry->r_xprt = r_xprt;
+ __entry->count = count;
+ ),
+
+ TP_printk("r_xprt=%p: created %u MRs",
+ __entry->r_xprt, __entry->count
+ )
+);
+
+DEFINE_RXPRT_EVENT(xprtrdma_nomrs);
+
+DEFINE_RDCH_EVENT(xprtrdma_read_chunk);
+DEFINE_WRCH_EVENT(xprtrdma_write_chunk);
+DEFINE_WRCH_EVENT(xprtrdma_reply_chunk);
+
+TRACE_DEFINE_ENUM(rpcrdma_noch);
+TRACE_DEFINE_ENUM(rpcrdma_readch);
+TRACE_DEFINE_ENUM(rpcrdma_areadch);
+TRACE_DEFINE_ENUM(rpcrdma_writech);
+TRACE_DEFINE_ENUM(rpcrdma_replych);
+
+#define xprtrdma_show_chunktype(x) \
+ __print_symbolic(x, \
+ { rpcrdma_noch, "inline" }, \
+ { rpcrdma_readch, "read list" }, \
+ { rpcrdma_areadch, "*read list" }, \
+ { rpcrdma_writech, "write list" }, \
+ { rpcrdma_replych, "reply chunk" })
+
+TRACE_EVENT(xprtrdma_marshal,
+ TP_PROTO(
+ const struct rpc_rqst *rqst,
+ unsigned int hdrlen,
+ unsigned int rtype,
+ unsigned int wtype
+ ),
+
+ TP_ARGS(rqst, hdrlen, rtype, wtype),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ __field(u32, xid)
+ __field(unsigned int, hdrlen)
+ __field(unsigned int, headlen)
+ __field(unsigned int, pagelen)
+ __field(unsigned int, taillen)
+ __field(unsigned int, rtype)
+ __field(unsigned int, wtype)
+ ),
+
+ TP_fast_assign(
+ __entry->task_id = rqst->rq_task->tk_pid;
+ __entry->client_id = rqst->rq_task->tk_client->cl_clid;
+ __entry->xid = be32_to_cpu(rqst->rq_xid);
+ __entry->hdrlen = hdrlen;
+ __entry->headlen = rqst->rq_snd_buf.head[0].iov_len;
+ __entry->pagelen = rqst->rq_snd_buf.page_len;
+ __entry->taillen = rqst->rq_snd_buf.tail[0].iov_len;
+ __entry->rtype = rtype;
+ __entry->wtype = wtype;
+ ),
+
+ TP_printk("task:%u@%u xid=0x%08x: hdr=%u xdr=%u/%u/%u %s/%s",
+ __entry->task_id, __entry->client_id, __entry->xid,
+ __entry->hdrlen,
+ __entry->headlen, __entry->pagelen, __entry->taillen,
+ xprtrdma_show_chunktype(__entry->rtype),
+ xprtrdma_show_chunktype(__entry->wtype)
+ )
+);
+
+TRACE_EVENT(xprtrdma_post_send,
+ TP_PROTO(
+ const struct rpcrdma_req *req,
+ int status
+ ),
+
+ TP_ARGS(req, status),
+
+ TP_STRUCT__entry(
+ __field(const void *, req)
+ __field(int, num_sge)
+ __field(bool, signaled)
+ __field(int, status)
+ ),
+
+ TP_fast_assign(
+ __entry->req = req;
+ __entry->num_sge = req->rl_sendctx->sc_wr.num_sge;
+ __entry->signaled = req->rl_sendctx->sc_wr.send_flags &
+ IB_SEND_SIGNALED;
+ __entry->status = status;
+ ),
+
+ TP_printk("req=%p, %d SGEs%s, status=%d",
+ __entry->req, __entry->num_sge,
+ (__entry->signaled ? ", signaled" : ""),
+ __entry->status
+ )
+);
+
+TRACE_EVENT(xprtrdma_post_recv,
+ TP_PROTO(
+ const struct rpcrdma_rep *rep,
+ int status
+ ),
+
+ TP_ARGS(rep, status),
+
+ TP_STRUCT__entry(
+ __field(const void *, rep)
+ __field(int, status)
+ ),
+
+ TP_fast_assign(
+ __entry->rep = rep;
+ __entry->status = status;
+ ),
+
+ TP_printk("rep=%p status=%d",
+ __entry->rep, __entry->status
+ )
+);
+
+/**
+ ** Completion events
+ **/
+
+TRACE_EVENT(xprtrdma_wc_send,
+ TP_PROTO(
+ const struct rpcrdma_sendctx *sc,
+ const struct ib_wc *wc
+ ),
+
+ TP_ARGS(sc, wc),
+
+ TP_STRUCT__entry(
+ __field(const void *, req)
+ __field(unsigned int, unmap_count)
+ __field(unsigned int, status)
+ __field(unsigned int, vendor_err)
+ ),
+
+ TP_fast_assign(
+ __entry->req = sc->sc_req;
+ __entry->unmap_count = sc->sc_unmap_count;
+ __entry->status = wc->status;
+ __entry->vendor_err = __entry->status ? wc->vendor_err : 0;
+ ),
+
+ TP_printk("req=%p, unmapped %u pages: %s (%u/0x%x)",
+ __entry->req, __entry->unmap_count,
+ rdma_show_wc_status(__entry->status),
+ __entry->status, __entry->vendor_err
+ )
+);
+
+TRACE_EVENT(xprtrdma_wc_receive,
+ TP_PROTO(
+ const struct rpcrdma_rep *rep,
+ const struct ib_wc *wc
+ ),
+
+ TP_ARGS(rep, wc),
+
+ TP_STRUCT__entry(
+ __field(const void *, rep)
+ __field(unsigned int, byte_len)
+ __field(unsigned int, status)
+ __field(unsigned int, vendor_err)
+ ),
+
+ TP_fast_assign(
+ __entry->rep = rep;
+ __entry->byte_len = wc->byte_len;
+ __entry->status = wc->status;
+ __entry->vendor_err = __entry->status ? wc->vendor_err : 0;
+ ),
+
+ TP_printk("rep=%p, %u bytes: %s (%u/0x%x)",
+ __entry->rep, __entry->byte_len,
+ rdma_show_wc_status(__entry->status),
+ __entry->status, __entry->vendor_err
+ )
+);
+
+DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_fastreg);
+DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_li);
+DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_li_wake);
+
+DEFINE_MR_EVENT(xprtrdma_localinv);
+DEFINE_MR_EVENT(xprtrdma_dma_unmap);
+DEFINE_MR_EVENT(xprtrdma_remoteinv);
+DEFINE_MR_EVENT(xprtrdma_recover_mr);
+
+/**
+ ** Reply events
+ **/
+
+TRACE_EVENT(xprtrdma_reply,
+ TP_PROTO(
+ const struct rpc_task *task,
+ const struct rpcrdma_rep *rep,
+ const struct rpcrdma_req *req,
+ unsigned int credits
+ ),
+
+ TP_ARGS(task, rep, req, credits),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ __field(const void *, rep)
+ __field(const void *, req)
+ __field(u32, xid)
+ __field(unsigned int, credits)
+ ),
+
+ TP_fast_assign(
+ __entry->task_id = task->tk_pid;
+ __entry->client_id = task->tk_client->cl_clid;
+ __entry->rep = rep;
+ __entry->req = req;
+ __entry->xid = be32_to_cpu(rep->rr_xid);
+ __entry->credits = credits;
+ ),
+
+ TP_printk("task:%u@%u xid=0x%08x, %u credits, rep=%p -> req=%p",
+ __entry->task_id, __entry->client_id, __entry->xid,
+ __entry->credits, __entry->rep, __entry->req
+ )
+);
+
+TRACE_EVENT(xprtrdma_defer_cmp,
+ TP_PROTO(
+ const struct rpcrdma_rep *rep
+ ),
+
+ TP_ARGS(rep),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ __field(const void *, rep)
+ __field(u32, xid)
+ ),
+
+ TP_fast_assign(
+ __entry->task_id = rep->rr_rqst->rq_task->tk_pid;
+ __entry->client_id = rep->rr_rqst->rq_task->tk_client->cl_clid;
+ __entry->rep = rep;
+ __entry->xid = be32_to_cpu(rep->rr_xid);
+ ),
+
+ TP_printk("task:%u@%u xid=0x%08x rep=%p",
+ __entry->task_id, __entry->client_id, __entry->xid,
+ __entry->rep
+ )
+);
+
+DEFINE_REPLY_EVENT(xprtrdma_reply_vers);
+DEFINE_REPLY_EVENT(xprtrdma_reply_rqst);
+DEFINE_REPLY_EVENT(xprtrdma_reply_short);
+DEFINE_REPLY_EVENT(xprtrdma_reply_hdr);
+
+TRACE_EVENT(xprtrdma_fixup,
+ TP_PROTO(
+ const struct rpc_rqst *rqst,
+ int len,
+ int hdrlen
+ ),
+
+ TP_ARGS(rqst, len, hdrlen),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ __field(const void *, base)
+ __field(int, len)
+ __field(int, hdrlen)
+ ),
+
+ TP_fast_assign(
+ __entry->task_id = rqst->rq_task->tk_pid;
+ __entry->client_id = rqst->rq_task->tk_client->cl_clid;
+ __entry->base = rqst->rq_rcv_buf.head[0].iov_base;
+ __entry->len = len;
+ __entry->hdrlen = hdrlen;
+ ),
+
+ TP_printk("task:%u@%u base=%p len=%d hdrlen=%d",
+ __entry->task_id, __entry->client_id,
+ __entry->base, __entry->len, __entry->hdrlen
+ )
+);
+
+TRACE_EVENT(xprtrdma_fixup_pg,
+ TP_PROTO(
+ const struct rpc_rqst *rqst,
+ int pageno,
+ const void *pos,
+ int len,
+ int curlen
+ ),
+
+ TP_ARGS(rqst, pageno, pos, len, curlen),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ __field(const void *, pos)
+ __field(int, pageno)
+ __field(int, len)
+ __field(int, curlen)
+ ),
+
+ TP_fast_assign(
+ __entry->task_id = rqst->rq_task->tk_pid;
+ __entry->client_id = rqst->rq_task->tk_client->cl_clid;
+ __entry->pos = pos;
+ __entry->pageno = pageno;
+ __entry->len = len;
+ __entry->curlen = curlen;
+ ),
+
+ TP_printk("task:%u@%u pageno=%d pos=%p len=%d curlen=%d",
+ __entry->task_id, __entry->client_id,
+ __entry->pageno, __entry->pos, __entry->len, __entry->curlen
+ )
+);
+
+TRACE_EVENT(xprtrdma_decode_seg,
+ TP_PROTO(
+ u32 handle,
+ u32 length,
+ u64 offset
+ ),
+
+ TP_ARGS(handle, length, offset),
+
+ TP_STRUCT__entry(
+ __field(u32, handle)
+ __field(u32, length)
+ __field(u64, offset)
+ ),
+
+ TP_fast_assign(
+ __entry->handle = handle;
+ __entry->length = length;
+ __entry->offset = offset;
+ ),
+
+ TP_printk("%u@0x%016llx:0x%08x",
+ __entry->length, (unsigned long long)__entry->offset,
+ __entry->handle
+ )
+);
+
+/**
+ ** Allocation/release of rpcrdma_reqs and rpcrdma_reps
+ **/
+
+TRACE_EVENT(xprtrdma_allocate,
+ TP_PROTO(
+ const struct rpc_task *task,
+ const struct rpcrdma_req *req
+ ),
+
+ TP_ARGS(task, req),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ __field(const void *, req)
+ __field(const void *, rep)
+ __field(size_t, callsize)
+ __field(size_t, rcvsize)
+ ),
+
+ TP_fast_assign(
+ __entry->task_id = task->tk_pid;
+ __entry->client_id = task->tk_client->cl_clid;
+ __entry->req = req;
+ __entry->rep = req ? req->rl_reply : NULL;
+ __entry->callsize = task->tk_rqstp->rq_callsize;
+ __entry->rcvsize = task->tk_rqstp->rq_rcvsize;
+ ),
+
+ TP_printk("task:%u@%u req=%p rep=%p (%zu, %zu)",
+ __entry->task_id, __entry->client_id,
+ __entry->req, __entry->rep,
+ __entry->callsize, __entry->rcvsize
+ )
+);
+
+TRACE_EVENT(xprtrdma_rpc_done,
+ TP_PROTO(
+ const struct rpc_task *task,
+ const struct rpcrdma_req *req
+ ),
+
+ TP_ARGS(task, req),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ __field(const void *, req)
+ __field(const void *, rep)
+ ),
+
+ TP_fast_assign(
+ __entry->task_id = task->tk_pid;
+ __entry->client_id = task->tk_client->cl_clid;
+ __entry->req = req;
+ __entry->rep = req->rl_reply;
+ ),
+
+ TP_printk("task:%u@%u req=%p rep=%p",
+ __entry->task_id, __entry->client_id,
+ __entry->req, __entry->rep
+ )
+);
+
+DEFINE_RXPRT_EVENT(xprtrdma_noreps);
+
+/**
+ ** Callback events
+ **/
+
+TRACE_EVENT(xprtrdma_cb_setup,
+ TP_PROTO(
+ const struct rpcrdma_xprt *r_xprt,
+ unsigned int reqs
+ ),
+
+ TP_ARGS(r_xprt, reqs),
+
+ TP_STRUCT__entry(
+ __field(const void *, r_xprt)
+ __field(unsigned int, reqs)
+ __string(addr, rpcrdma_addrstr(r_xprt))
+ __string(port, rpcrdma_portstr(r_xprt))
+ ),
+
+ TP_fast_assign(
+ __entry->r_xprt = r_xprt;
+ __entry->reqs = reqs;
+ __assign_str(addr, rpcrdma_addrstr(r_xprt));
+ __assign_str(port, rpcrdma_portstr(r_xprt));
+ ),
+
+ TP_printk("peer=[%s]:%s r_xprt=%p: %u reqs",
+ __get_str(addr), __get_str(port),
+ __entry->r_xprt, __entry->reqs
+ )
+);
+
+DEFINE_CB_EVENT(xprtrdma_cb_call);
+DEFINE_CB_EVENT(xprtrdma_cb_reply);
+
+#endif /* _TRACE_RPCRDMA_H */
+
+#include <trace/define_trace.h>
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 8c153f68509e..970c91a83173 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -32,7 +32,7 @@ DECLARE_EVENT_CLASS(rpc_task_status,
__entry->status = task->tk_status;
),
- TP_printk("task:%u@%u, status %d",
+ TP_printk("task:%u@%u status=%d",
__entry->task_id, __entry->client_id,
__entry->status)
);
@@ -66,7 +66,7 @@ TRACE_EVENT(rpc_connect_status,
__entry->status = status;
),
- TP_printk("task:%u@%u, status %d",
+ TP_printk("task:%u@%u status=%d",
__entry->task_id, __entry->client_id,
__entry->status)
);
@@ -175,7 +175,7 @@ DECLARE_EVENT_CLASS(rpc_task_queued,
),
TP_fast_assign(
- __entry->client_id = clnt->cl_clid;
+ __entry->client_id = clnt ? clnt->cl_clid : -1;
__entry->task_id = task->tk_pid;
__entry->timeout = task->tk_timeout;
__entry->runstate = task->tk_runstate;
@@ -184,7 +184,7 @@ DECLARE_EVENT_CLASS(rpc_task_queued,
__assign_str(q_name, rpc_qname(q));
),
- TP_printk("task:%u@%u flags=%4.4x state=%4.4lx status=%d timeout=%lu queue=%s",
+ TP_printk("task:%u@%d flags=%4.4x state=%4.4lx status=%d timeout=%lu queue=%s",
__entry->task_id, __entry->client_id,
__entry->flags,
__entry->runstate,
@@ -390,6 +390,10 @@ DECLARE_EVENT_CLASS(rpc_xprt_event,
__entry->status)
);
+DEFINE_EVENT(rpc_xprt_event, xprt_timer,
+ TP_PROTO(struct rpc_xprt *xprt, __be32 xid, int status),
+ TP_ARGS(xprt, xid, status));
+
DEFINE_EVENT(rpc_xprt_event, xprt_lookup_rqst,
TP_PROTO(struct rpc_xprt *xprt, __be32 xid, int status),
TP_ARGS(xprt, xid, status));
diff --git a/include/uapi/linux/nfs.h b/include/uapi/linux/nfs.h
index 057d22a48416..946cb62d64b0 100644
--- a/include/uapi/linux/nfs.h
+++ b/include/uapi/linux/nfs.h
@@ -12,6 +12,7 @@
#define NFS_PROGRAM 100003
#define NFS_PORT 2049
+#define NFS_RDMA_PORT 20049
#define NFS_MAXDATA 8192
#define NFS_MAXPATHLEN 1024
#define NFS_MAXNAMLEN 255
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index e2a4184f3c5d..6e432ecd7f99 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1376,22 +1376,6 @@ rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize
EXPORT_SYMBOL_GPL(rpc_setbufsize);
/**
- * rpc_protocol - Get transport protocol number for an RPC client
- * @clnt: RPC client to query
- *
- */
-int rpc_protocol(struct rpc_clnt *clnt)
-{
- int protocol;
-
- rcu_read_lock();
- protocol = rcu_dereference(clnt->cl_xprt)->prot;
- rcu_read_unlock();
- return protocol;
-}
-EXPORT_SYMBOL_GPL(rpc_protocol);
-
-/**
* rpc_net_ns - Get the network namespace for this RPC client
* @clnt: RPC client to query
*
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index b1b49edd7c4d..896691afbb1a 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -755,22 +755,20 @@ static void __rpc_execute(struct rpc_task *task)
void (*do_action)(struct rpc_task *);
/*
- * Execute any pending callback first.
+ * Perform the next FSM step or a pending callback.
+ *
+ * tk_action may be NULL if the task has been killed.
+ * In particular, note that rpc_killall_tasks may
+ * do this at any time, so beware when dereferencing.
*/
- do_action = task->tk_callback;
- task->tk_callback = NULL;
- if (do_action == NULL) {
- /*
- * Perform the next FSM step.
- * tk_action may be NULL if the task has been killed.
- * In particular, note that rpc_killall_tasks may
- * do this at any time, so beware when dereferencing.
- */
- do_action = task->tk_action;
- if (do_action == NULL)
- break;
+ do_action = task->tk_action;
+ if (task->tk_callback) {
+ do_action = task->tk_callback;
+ task->tk_callback = NULL;
}
- trace_rpc_task_run_action(task->tk_client, task, task->tk_action);
+ if (!do_action)
+ break;
+ trace_rpc_task_run_action(task->tk_client, task, do_action);
do_action(task);
/*
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 33b74fd84051..2436fd1125fc 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -940,8 +940,8 @@ static void xprt_timer(struct rpc_task *task)
if (task->tk_status != -ETIMEDOUT)
return;
- dprintk("RPC: %5u xprt_timer\n", task->tk_pid);
+ trace_xprt_timer(xprt, req->rq_xid, task->tk_status);
if (!req->rq_reply_bytes_recvd) {
if (xprt->ops->timer)
xprt->ops->timer(xprt, task);
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 8b818bb3518a..ed1a4a3065ee 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -43,7 +43,6 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
req = rpcrdma_create_req(r_xprt);
if (IS_ERR(req))
return PTR_ERR(req);
- __set_bit(RPCRDMA_REQ_F_BACKCHANNEL, &req->rl_flags);
rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE,
DMA_TO_DEVICE, GFP_KERNEL);
@@ -74,21 +73,13 @@ out_fail:
static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt,
unsigned int count)
{
- struct rpcrdma_rep *rep;
int rc = 0;
while (count--) {
- rep = rpcrdma_create_rep(r_xprt);
- if (IS_ERR(rep)) {
- pr_err("RPC: %s: reply buffer alloc failed\n",
- __func__);
- rc = PTR_ERR(rep);
+ rc = rpcrdma_create_rep(r_xprt);
+ if (rc)
break;
- }
-
- rpcrdma_recv_buffer_put(rep);
}
-
return rc;
}
@@ -129,6 +120,7 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
rqst->rq_xprt = &r_xprt->rx_xprt;
INIT_LIST_HEAD(&rqst->rq_list);
INIT_LIST_HEAD(&rqst->rq_bc_list);
+ __set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
if (rpcrdma_bc_setup_rqst(r_xprt, rqst))
goto out_free;
@@ -148,7 +140,7 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
buffer->rb_bc_srv_max_requests = reqs;
request_module("svcrdma");
-
+ trace_xprtrdma_cb_setup(r_xprt, reqs);
return 0;
out_free:
@@ -196,13 +188,7 @@ size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *xprt)
return maxmsg - RPCRDMA_HDRLEN_MIN;
}
-/**
- * rpcrdma_bc_marshal_reply - Send backwards direction reply
- * @rqst: buffer containing RPC reply data
- *
- * Returns zero on success.
- */
-int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
+static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
@@ -226,7 +212,46 @@ int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
if (rpcrdma_prepare_send_sges(r_xprt, req, RPCRDMA_HDRLEN_MIN,
&rqst->rq_snd_buf, rpcrdma_noch))
return -EIO;
+
+ trace_xprtrdma_cb_reply(rqst);
+ return 0;
+}
+
+/**
+ * xprt_rdma_bc_send_reply - marshal and send a backchannel reply
+ * @rqst: RPC rqst with a backchannel RPC reply in rq_snd_buf
+ *
+ * Caller holds the transport's write lock.
+ *
+ * Returns:
+ * %0 if the RPC message has been sent
+ * %-ENOTCONN if the caller should reconnect and call again
+ * %-EIO if a permanent error occurred and the request was not
+ * sent. Do not try to send this message again.
+ */
+int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst)
+{
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
+ struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
+ int rc;
+
+ if (!xprt_connected(rqst->rq_xprt))
+ goto drop_connection;
+
+ rc = rpcrdma_bc_marshal_reply(rqst);
+ if (rc < 0)
+ goto failed_marshal;
+
+ if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
+ goto drop_connection;
return 0;
+
+failed_marshal:
+ if (rc != -ENOTCONN)
+ return rc;
+drop_connection:
+ xprt_disconnect_done(rqst->rq_xprt);
+ return -ENOTCONN;
}
/**
@@ -262,11 +287,6 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
dprintk("RPC: %s: freeing rqst %p (req %p)\n",
__func__, rqst, rpcr_to_rdmar(rqst));
- smp_mb__before_atomic();
- WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state));
- clear_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
- smp_mb__after_atomic();
-
spin_lock_bh(&xprt->bc_pa_lock);
list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
spin_unlock_bh(&xprt->bc_pa_lock);
@@ -274,7 +294,7 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
/**
* rpcrdma_bc_receive_call - Handle a backward direction call
- * @xprt: transport receiving the call
+ * @r_xprt: transport receiving the call
* @rep: receive buffer containing the call
*
* Operational assumptions:
@@ -313,7 +333,6 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
struct rpc_rqst, rq_bc_pa_list);
list_del(&rqst->rq_bc_pa_list);
spin_unlock(&xprt->bc_pa_lock);
- dprintk("RPC: %s: using rqst %p\n", __func__, rqst);
/* Prepare rqst */
rqst->rq_reply_bytes_recvd = 0;
@@ -321,7 +340,6 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
rqst->rq_xid = *p;
rqst->rq_private_buf.len = size;
- set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
buf = &rqst->rq_rcv_buf;
memset(buf, 0, sizeof(*buf));
@@ -335,12 +353,8 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
* the Upper Layer is done decoding it.
*/
req = rpcr_to_rdmar(rqst);
- dprintk("RPC: %s: attaching rep %p to req %p\n",
- __func__, rep, req);
req->rl_reply = rep;
-
- /* Defeat the retransmit detection logic in send_request */
- req->rl_connect_cookie = 0;
+ trace_xprtrdma_cb_call(rqst);
/* Queue rqst for ULP's callback service */
bc_serv = xprt->bc_serv;
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index 29fc84c7ff98..d5f95bb39300 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Copyright (c) 2015 Oracle. All rights reserved.
+ * Copyright (c) 2015, 2017 Oracle. All rights reserved.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
*/
@@ -47,7 +47,7 @@ fmr_is_supported(struct rpcrdma_ia *ia)
}
static int
-fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *mw)
+fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
{
static struct ib_fmr_attr fmr_attr = {
.max_pages = RPCRDMA_MAX_FMR_SGES,
@@ -55,106 +55,108 @@ fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *mw)
.page_shift = PAGE_SHIFT
};
- mw->fmr.fm_physaddrs = kcalloc(RPCRDMA_MAX_FMR_SGES,
+ mr->fmr.fm_physaddrs = kcalloc(RPCRDMA_MAX_FMR_SGES,
sizeof(u64), GFP_KERNEL);
- if (!mw->fmr.fm_physaddrs)
+ if (!mr->fmr.fm_physaddrs)
goto out_free;
- mw->mw_sg = kcalloc(RPCRDMA_MAX_FMR_SGES,
- sizeof(*mw->mw_sg), GFP_KERNEL);
- if (!mw->mw_sg)
+ mr->mr_sg = kcalloc(RPCRDMA_MAX_FMR_SGES,
+ sizeof(*mr->mr_sg), GFP_KERNEL);
+ if (!mr->mr_sg)
goto out_free;
- sg_init_table(mw->mw_sg, RPCRDMA_MAX_FMR_SGES);
+ sg_init_table(mr->mr_sg, RPCRDMA_MAX_FMR_SGES);
- mw->fmr.fm_mr = ib_alloc_fmr(ia->ri_pd, RPCRDMA_FMR_ACCESS_FLAGS,
+ mr->fmr.fm_mr = ib_alloc_fmr(ia->ri_pd, RPCRDMA_FMR_ACCESS_FLAGS,
&fmr_attr);
- if (IS_ERR(mw->fmr.fm_mr))
+ if (IS_ERR(mr->fmr.fm_mr))
goto out_fmr_err;
return 0;
out_fmr_err:
dprintk("RPC: %s: ib_alloc_fmr returned %ld\n", __func__,
- PTR_ERR(mw->fmr.fm_mr));
+ PTR_ERR(mr->fmr.fm_mr));
out_free:
- kfree(mw->mw_sg);
- kfree(mw->fmr.fm_physaddrs);
+ kfree(mr->mr_sg);
+ kfree(mr->fmr.fm_physaddrs);
return -ENOMEM;
}
static int
-__fmr_unmap(struct rpcrdma_mw *mw)
+__fmr_unmap(struct rpcrdma_mr *mr)
{
LIST_HEAD(l);
int rc;
- list_add(&mw->fmr.fm_mr->list, &l);
+ list_add(&mr->fmr.fm_mr->list, &l);
rc = ib_unmap_fmr(&l);
- list_del(&mw->fmr.fm_mr->list);
+ list_del(&mr->fmr.fm_mr->list);
return rc;
}
static void
-fmr_op_release_mr(struct rpcrdma_mw *r)
+fmr_op_release_mr(struct rpcrdma_mr *mr)
{
LIST_HEAD(unmap_list);
int rc;
/* Ensure MW is not on any rl_registered list */
- if (!list_empty(&r->mw_list))
- list_del(&r->mw_list);
+ if (!list_empty(&mr->mr_list))
+ list_del(&mr->mr_list);
- kfree(r->fmr.fm_physaddrs);
- kfree(r->mw_sg);
+ kfree(mr->fmr.fm_physaddrs);
+ kfree(mr->mr_sg);
/* In case this one was left mapped, try to unmap it
* to prevent dealloc_fmr from failing with EBUSY
*/
- rc = __fmr_unmap(r);
+ rc = __fmr_unmap(mr);
if (rc)
pr_err("rpcrdma: final ib_unmap_fmr for %p failed %i\n",
- r, rc);
+ mr, rc);
- rc = ib_dealloc_fmr(r->fmr.fm_mr);
+ rc = ib_dealloc_fmr(mr->fmr.fm_mr);
if (rc)
pr_err("rpcrdma: final ib_dealloc_fmr for %p returned %i\n",
- r, rc);
+ mr, rc);
- kfree(r);
+ kfree(mr);
}
/* Reset of a single FMR.
*/
static void
-fmr_op_recover_mr(struct rpcrdma_mw *mw)
+fmr_op_recover_mr(struct rpcrdma_mr *mr)
{
- struct rpcrdma_xprt *r_xprt = mw->mw_xprt;
+ struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
int rc;
/* ORDER: invalidate first */
- rc = __fmr_unmap(mw);
-
- /* ORDER: then DMA unmap */
- ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
- mw->mw_sg, mw->mw_nents, mw->mw_dir);
+ rc = __fmr_unmap(mr);
if (rc)
goto out_release;
- rpcrdma_put_mw(r_xprt, mw);
+ /* ORDER: then DMA unmap */
+ rpcrdma_mr_unmap_and_put(mr);
+
r_xprt->rx_stats.mrs_recovered++;
return;
out_release:
- pr_err("rpcrdma: FMR reset failed (%d), %p released\n", rc, mw);
+ pr_err("rpcrdma: FMR reset failed (%d), %p released\n", rc, mr);
r_xprt->rx_stats.mrs_orphaned++;
- spin_lock(&r_xprt->rx_buf.rb_mwlock);
- list_del(&mw->mw_all);
- spin_unlock(&r_xprt->rx_buf.rb_mwlock);
+ trace_xprtrdma_dma_unmap(mr);
+ ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
+ mr->mr_sg, mr->mr_nents, mr->mr_dir);
+
+ spin_lock(&r_xprt->rx_buf.rb_mrlock);
+ list_del(&mr->mr_all);
+ spin_unlock(&r_xprt->rx_buf.rb_mrlock);
- fmr_op_release_mr(mw);
+ fmr_op_release_mr(mr);
}
static int
@@ -180,15 +182,15 @@ fmr_op_maxpages(struct rpcrdma_xprt *r_xprt)
*/
static struct rpcrdma_mr_seg *
fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
- int nsegs, bool writing, struct rpcrdma_mw **out)
+ int nsegs, bool writing, struct rpcrdma_mr **out)
{
struct rpcrdma_mr_seg *seg1 = seg;
int len, pageoff, i, rc;
- struct rpcrdma_mw *mw;
+ struct rpcrdma_mr *mr;
u64 *dma_pages;
- mw = rpcrdma_get_mw(r_xprt);
- if (!mw)
+ mr = rpcrdma_mr_get(r_xprt);
+ if (!mr)
return ERR_PTR(-ENOBUFS);
pageoff = offset_in_page(seg1->mr_offset);
@@ -199,12 +201,12 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
nsegs = RPCRDMA_MAX_FMR_SGES;
for (i = 0; i < nsegs;) {
if (seg->mr_page)
- sg_set_page(&mw->mw_sg[i],
+ sg_set_page(&mr->mr_sg[i],
seg->mr_page,
seg->mr_len,
offset_in_page(seg->mr_offset));
else
- sg_set_buf(&mw->mw_sg[i], seg->mr_offset,
+ sg_set_buf(&mr->mr_sg[i], seg->mr_offset,
seg->mr_len);
len += seg->mr_len;
++seg;
@@ -214,40 +216,38 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
break;
}
- mw->mw_dir = rpcrdma_data_dir(writing);
+ mr->mr_dir = rpcrdma_data_dir(writing);
- mw->mw_nents = ib_dma_map_sg(r_xprt->rx_ia.ri_device,
- mw->mw_sg, i, mw->mw_dir);
- if (!mw->mw_nents)
+ mr->mr_nents = ib_dma_map_sg(r_xprt->rx_ia.ri_device,
+ mr->mr_sg, i, mr->mr_dir);
+ if (!mr->mr_nents)
goto out_dmamap_err;
- for (i = 0, dma_pages = mw->fmr.fm_physaddrs; i < mw->mw_nents; i++)
- dma_pages[i] = sg_dma_address(&mw->mw_sg[i]);
- rc = ib_map_phys_fmr(mw->fmr.fm_mr, dma_pages, mw->mw_nents,
+ for (i = 0, dma_pages = mr->fmr.fm_physaddrs; i < mr->mr_nents; i++)
+ dma_pages[i] = sg_dma_address(&mr->mr_sg[i]);
+ rc = ib_map_phys_fmr(mr->fmr.fm_mr, dma_pages, mr->mr_nents,
dma_pages[0]);
if (rc)
goto out_maperr;
- mw->mw_handle = mw->fmr.fm_mr->rkey;
- mw->mw_length = len;
- mw->mw_offset = dma_pages[0] + pageoff;
+ mr->mr_handle = mr->fmr.fm_mr->rkey;
+ mr->mr_length = len;
+ mr->mr_offset = dma_pages[0] + pageoff;
- *out = mw;
+ *out = mr;
return seg;
out_dmamap_err:
pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n",
- mw->mw_sg, i);
- rpcrdma_put_mw(r_xprt, mw);
+ mr->mr_sg, i);
+ rpcrdma_mr_put(mr);
return ERR_PTR(-EIO);
out_maperr:
pr_err("rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n",
len, (unsigned long long)dma_pages[0],
- pageoff, mw->mw_nents, rc);
- ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
- mw->mw_sg, mw->mw_nents, mw->mw_dir);
- rpcrdma_put_mw(r_xprt, mw);
+ pageoff, mr->mr_nents, rc);
+ rpcrdma_mr_unmap_and_put(mr);
return ERR_PTR(-EIO);
}
@@ -256,13 +256,13 @@ out_maperr:
* Sleeps until it is safe for the host CPU to access the
* previously mapped memory regions.
*
- * Caller ensures that @mws is not empty before the call. This
+ * Caller ensures that @mrs is not empty before the call. This
* function empties the list.
*/
static void
-fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
+fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
{
- struct rpcrdma_mw *mw;
+ struct rpcrdma_mr *mr;
LIST_HEAD(unmap_list);
int rc;
@@ -271,10 +271,11 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
* ib_unmap_fmr() is slow, so use a single call instead
* of one call per mapped FMR.
*/
- list_for_each_entry(mw, mws, mw_list) {
+ list_for_each_entry(mr, mrs, mr_list) {
dprintk("RPC: %s: unmapping fmr %p\n",
- __func__, &mw->fmr);
- list_add_tail(&mw->fmr.fm_mr->list, &unmap_list);
+ __func__, &mr->fmr);
+ trace_xprtrdma_localinv(mr);
+ list_add_tail(&mr->fmr.fm_mr->list, &unmap_list);
}
r_xprt->rx_stats.local_inv_needed++;
rc = ib_unmap_fmr(&unmap_list);
@@ -284,14 +285,10 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
/* ORDER: Now DMA unmap all of the req's MRs, and return
* them to the free MW list.
*/
- while (!list_empty(mws)) {
- mw = rpcrdma_pop_mw(mws);
- dprintk("RPC: %s: DMA unmapping fmr %p\n",
- __func__, &mw->fmr);
- list_del(&mw->fmr.fm_mr->list);
- ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
- mw->mw_sg, mw->mw_nents, mw->mw_dir);
- rpcrdma_put_mw(r_xprt, mw);
+ while (!list_empty(mrs)) {
+ mr = rpcrdma_mr_pop(mrs);
+ list_del(&mr->fmr.fm_mr->list);
+ rpcrdma_mr_unmap_and_put(mr);
}
return;
@@ -299,10 +296,10 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
out_reset:
pr_err("rpcrdma: ib_unmap_fmr failed (%i)\n", rc);
- while (!list_empty(mws)) {
- mw = rpcrdma_pop_mw(mws);
- list_del(&mw->fmr.fm_mr->list);
- fmr_op_recover_mr(mw);
+ while (!list_empty(mrs)) {
+ mr = rpcrdma_mr_pop(mrs);
+ list_del(&mr->fmr.fm_mr->list);
+ fmr_op_recover_mr(mr);
}
}
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 773e66e10a15..90f688f19783 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -1,11 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Copyright (c) 2015 Oracle. All rights reserved.
+ * Copyright (c) 2015, 2017 Oracle. All rights reserved.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
*/
/* Lightweight memory registration using Fast Registration Work
- * Requests (FRWR). Also referred to sometimes as FRMR mode.
+ * Requests (FRWR).
*
* FRWR features ordered asynchronous registration and deregistration
* of arbitrarily sized memory regions. This is the fastest and safest
@@ -15,9 +15,9 @@
/* Normal operation
*
* A Memory Region is prepared for RDMA READ or WRITE using a FAST_REG
- * Work Request (frmr_op_map). When the RDMA operation is finished, this
+ * Work Request (frwr_op_map). When the RDMA operation is finished, this
* Memory Region is invalidated using a LOCAL_INV Work Request
- * (frmr_op_unmap).
+ * (frwr_op_unmap_sync).
*
* Typically these Work Requests are not signaled, and neither are RDMA
* SEND Work Requests (with the exception of signaling occasionally to
@@ -26,7 +26,7 @@
*
* As an optimization, frwr_op_unmap marks MRs INVALID before the
* LOCAL_INV WR is posted. If posting succeeds, the MR is placed on
- * rb_mws immediately so that no work (like managing a linked list
+ * rb_mrs immediately so that no work (like managing a linked list
* under a spinlock) is needed in the completion upcall.
*
* But this means that frwr_op_map() can occasionally encounter an MR
@@ -60,7 +60,7 @@
* When frwr_op_map encounters FLUSHED and VALID MRs, they are recovered
* with ib_dereg_mr and then are re-initialized. Because MR recovery
* allocates fresh resources, it is deferred to a workqueue, and the
- * recovered MRs are placed back on the rb_mws list when recovery is
+ * recovered MRs are placed back on the rb_mrs list when recovery is
* complete. frwr_op_map allocates another MR for the current RPC while
* the broken MR is reset.
*
@@ -96,26 +96,26 @@ out_not_supported:
}
static int
-frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r)
+frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
{
- unsigned int depth = ia->ri_max_frmr_depth;
- struct rpcrdma_frmr *f = &r->frmr;
+ unsigned int depth = ia->ri_max_frwr_depth;
+ struct rpcrdma_frwr *frwr = &mr->frwr;
int rc;
- f->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth);
- if (IS_ERR(f->fr_mr))
+ frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth);
+ if (IS_ERR(frwr->fr_mr))
goto out_mr_err;
- r->mw_sg = kcalloc(depth, sizeof(*r->mw_sg), GFP_KERNEL);
- if (!r->mw_sg)
+ mr->mr_sg = kcalloc(depth, sizeof(*mr->mr_sg), GFP_KERNEL);
+ if (!mr->mr_sg)
goto out_list_err;
- sg_init_table(r->mw_sg, depth);
- init_completion(&f->fr_linv_done);
+ sg_init_table(mr->mr_sg, depth);
+ init_completion(&frwr->fr_linv_done);
return 0;
out_mr_err:
- rc = PTR_ERR(f->fr_mr);
+ rc = PTR_ERR(frwr->fr_mr);
dprintk("RPC: %s: ib_alloc_mr status %i\n",
__func__, rc);
return rc;
@@ -124,83 +124,85 @@ out_list_err:
rc = -ENOMEM;
dprintk("RPC: %s: sg allocation failure\n",
__func__);
- ib_dereg_mr(f->fr_mr);
+ ib_dereg_mr(frwr->fr_mr);
return rc;
}
static void
-frwr_op_release_mr(struct rpcrdma_mw *r)
+frwr_op_release_mr(struct rpcrdma_mr *mr)
{
int rc;
- /* Ensure MW is not on any rl_registered list */
- if (!list_empty(&r->mw_list))
- list_del(&r->mw_list);
+ /* Ensure MR is not on any rl_registered list */
+ if (!list_empty(&mr->mr_list))
+ list_del(&mr->mr_list);
- rc = ib_dereg_mr(r->frmr.fr_mr);
+ rc = ib_dereg_mr(mr->frwr.fr_mr);
if (rc)
pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n",
- r, rc);
- kfree(r->mw_sg);
- kfree(r);
+ mr, rc);
+ kfree(mr->mr_sg);
+ kfree(mr);
}
static int
-__frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r)
+__frwr_mr_reset(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
{
- struct rpcrdma_frmr *f = &r->frmr;
+ struct rpcrdma_frwr *frwr = &mr->frwr;
int rc;
- rc = ib_dereg_mr(f->fr_mr);
+ rc = ib_dereg_mr(frwr->fr_mr);
if (rc) {
pr_warn("rpcrdma: ib_dereg_mr status %d, frwr %p orphaned\n",
- rc, r);
+ rc, mr);
return rc;
}
- f->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype,
- ia->ri_max_frmr_depth);
- if (IS_ERR(f->fr_mr)) {
+ frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype,
+ ia->ri_max_frwr_depth);
+ if (IS_ERR(frwr->fr_mr)) {
pr_warn("rpcrdma: ib_alloc_mr status %ld, frwr %p orphaned\n",
- PTR_ERR(f->fr_mr), r);
- return PTR_ERR(f->fr_mr);
+ PTR_ERR(frwr->fr_mr), mr);
+ return PTR_ERR(frwr->fr_mr);
}
- dprintk("RPC: %s: recovered FRMR %p\n", __func__, f);
- f->fr_state = FRMR_IS_INVALID;
+ dprintk("RPC: %s: recovered FRWR %p\n", __func__, frwr);
+ frwr->fr_state = FRWR_IS_INVALID;
return 0;
}
-/* Reset of a single FRMR. Generate a fresh rkey by replacing the MR.
+/* Reset of a single FRWR. Generate a fresh rkey by replacing the MR.
*/
static void
-frwr_op_recover_mr(struct rpcrdma_mw *mw)
+frwr_op_recover_mr(struct rpcrdma_mr *mr)
{
- enum rpcrdma_frmr_state state = mw->frmr.fr_state;
- struct rpcrdma_xprt *r_xprt = mw->mw_xprt;
+ enum rpcrdma_frwr_state state = mr->frwr.fr_state;
+ struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
int rc;
- rc = __frwr_reset_mr(ia, mw);
- if (state != FRMR_FLUSHED_LI)
+ rc = __frwr_mr_reset(ia, mr);
+ if (state != FRWR_FLUSHED_LI) {
+ trace_xprtrdma_dma_unmap(mr);
ib_dma_unmap_sg(ia->ri_device,
- mw->mw_sg, mw->mw_nents, mw->mw_dir);
+ mr->mr_sg, mr->mr_nents, mr->mr_dir);
+ }
if (rc)
goto out_release;
- rpcrdma_put_mw(r_xprt, mw);
+ rpcrdma_mr_put(mr);
r_xprt->rx_stats.mrs_recovered++;
return;
out_release:
- pr_err("rpcrdma: FRMR reset failed %d, %p release\n", rc, mw);
+ pr_err("rpcrdma: FRWR reset failed %d, %p release\n", rc, mr);
r_xprt->rx_stats.mrs_orphaned++;
- spin_lock(&r_xprt->rx_buf.rb_mwlock);
- list_del(&mw->mw_all);
- spin_unlock(&r_xprt->rx_buf.rb_mwlock);
+ spin_lock(&r_xprt->rx_buf.rb_mrlock);
+ list_del(&mr->mr_all);
+ spin_unlock(&r_xprt->rx_buf.rb_mrlock);
- frwr_op_release_mr(mw);
+ frwr_op_release_mr(mr);
}
static int
@@ -214,31 +216,31 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG)
ia->ri_mrtype = IB_MR_TYPE_SG_GAPS;
- ia->ri_max_frmr_depth =
+ ia->ri_max_frwr_depth =
min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
attrs->max_fast_reg_page_list_len);
dprintk("RPC: %s: device's max FR page list len = %u\n",
- __func__, ia->ri_max_frmr_depth);
-
- /* Add room for frmr register and invalidate WRs.
- * 1. FRMR reg WR for head
- * 2. FRMR invalidate WR for head
- * 3. N FRMR reg WRs for pagelist
- * 4. N FRMR invalidate WRs for pagelist
- * 5. FRMR reg WR for tail
- * 6. FRMR invalidate WR for tail
+ __func__, ia->ri_max_frwr_depth);
+
+ /* Add room for frwr register and invalidate WRs.
+ * 1. FRWR reg WR for head
+ * 2. FRWR invalidate WR for head
+ * 3. N FRWR reg WRs for pagelist
+ * 4. N FRWR invalidate WRs for pagelist
+ * 5. FRWR reg WR for tail
+ * 6. FRWR invalidate WR for tail
* 7. The RDMA_SEND WR
*/
depth = 7;
- /* Calculate N if the device max FRMR depth is smaller than
+ /* Calculate N if the device max FRWR depth is smaller than
* RPCRDMA_MAX_DATA_SEGS.
*/
- if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
- delta = RPCRDMA_MAX_DATA_SEGS - ia->ri_max_frmr_depth;
+ if (ia->ri_max_frwr_depth < RPCRDMA_MAX_DATA_SEGS) {
+ delta = RPCRDMA_MAX_DATA_SEGS - ia->ri_max_frwr_depth;
do {
- depth += 2; /* FRMR reg + invalidate */
- delta -= ia->ri_max_frmr_depth;
+ depth += 2; /* FRWR reg + invalidate */
+ delta -= ia->ri_max_frwr_depth;
} while (delta > 0);
}
@@ -252,7 +254,7 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
}
ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
- ia->ri_max_frmr_depth);
+ ia->ri_max_frwr_depth);
return 0;
}
@@ -265,7 +267,7 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_xprt)
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
- RPCRDMA_MAX_HDR_SEGS * ia->ri_max_frmr_depth);
+ RPCRDMA_MAX_HDR_SEGS * ia->ri_max_frwr_depth);
}
static void
@@ -286,16 +288,16 @@ __frwr_sendcompletion_flush(struct ib_wc *wc, const char *wr)
static void
frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
{
- struct rpcrdma_frmr *frmr;
- struct ib_cqe *cqe;
+ struct ib_cqe *cqe = wc->wr_cqe;
+ struct rpcrdma_frwr *frwr =
+ container_of(cqe, struct rpcrdma_frwr, fr_cqe);
/* WARNING: Only wr_cqe and status are reliable at this point */
if (wc->status != IB_WC_SUCCESS) {
- cqe = wc->wr_cqe;
- frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
- frmr->fr_state = FRMR_FLUSHED_FR;
+ frwr->fr_state = FRWR_FLUSHED_FR;
__frwr_sendcompletion_flush(wc, "fastreg");
}
+ trace_xprtrdma_wc_fastreg(wc, frwr);
}
/**
@@ -307,16 +309,16 @@ frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
static void
frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
{
- struct rpcrdma_frmr *frmr;
- struct ib_cqe *cqe;
+ struct ib_cqe *cqe = wc->wr_cqe;
+ struct rpcrdma_frwr *frwr = container_of(cqe, struct rpcrdma_frwr,
+ fr_cqe);
/* WARNING: Only wr_cqe and status are reliable at this point */
if (wc->status != IB_WC_SUCCESS) {
- cqe = wc->wr_cqe;
- frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
- frmr->fr_state = FRMR_FLUSHED_LI;
+ frwr->fr_state = FRWR_FLUSHED_LI;
__frwr_sendcompletion_flush(wc, "localinv");
}
+ trace_xprtrdma_wc_li(wc, frwr);
}
/**
@@ -329,17 +331,17 @@ frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
static void
frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
{
- struct rpcrdma_frmr *frmr;
- struct ib_cqe *cqe;
+ struct ib_cqe *cqe = wc->wr_cqe;
+ struct rpcrdma_frwr *frwr = container_of(cqe, struct rpcrdma_frwr,
+ fr_cqe);
/* WARNING: Only wr_cqe and status are reliable at this point */
- cqe = wc->wr_cqe;
- frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
if (wc->status != IB_WC_SUCCESS) {
- frmr->fr_state = FRMR_FLUSHED_LI;
+ frwr->fr_state = FRWR_FLUSHED_LI;
__frwr_sendcompletion_flush(wc, "localinv");
}
- complete(&frmr->fr_linv_done);
+ complete(&frwr->fr_linv_done);
+ trace_xprtrdma_wc_li_wake(wc, frwr);
}
/* Post a REG_MR Work Request to register a memory region
@@ -347,41 +349,39 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
*/
static struct rpcrdma_mr_seg *
frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
- int nsegs, bool writing, struct rpcrdma_mw **out)
+ int nsegs, bool writing, struct rpcrdma_mr **out)
{
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS;
- struct rpcrdma_mw *mw;
- struct rpcrdma_frmr *frmr;
- struct ib_mr *mr;
+ struct rpcrdma_frwr *frwr;
+ struct rpcrdma_mr *mr;
+ struct ib_mr *ibmr;
struct ib_reg_wr *reg_wr;
struct ib_send_wr *bad_wr;
int rc, i, n;
u8 key;
- mw = NULL;
+ mr = NULL;
do {
- if (mw)
- rpcrdma_defer_mr_recovery(mw);
- mw = rpcrdma_get_mw(r_xprt);
- if (!mw)
+ if (mr)
+ rpcrdma_mr_defer_recovery(mr);
+ mr = rpcrdma_mr_get(r_xprt);
+ if (!mr)
return ERR_PTR(-ENOBUFS);
- } while (mw->frmr.fr_state != FRMR_IS_INVALID);
- frmr = &mw->frmr;
- frmr->fr_state = FRMR_IS_VALID;
- mr = frmr->fr_mr;
- reg_wr = &frmr->fr_regwr;
-
- if (nsegs > ia->ri_max_frmr_depth)
- nsegs = ia->ri_max_frmr_depth;
+ } while (mr->frwr.fr_state != FRWR_IS_INVALID);
+ frwr = &mr->frwr;
+ frwr->fr_state = FRWR_IS_VALID;
+
+ if (nsegs > ia->ri_max_frwr_depth)
+ nsegs = ia->ri_max_frwr_depth;
for (i = 0; i < nsegs;) {
if (seg->mr_page)
- sg_set_page(&mw->mw_sg[i],
+ sg_set_page(&mr->mr_sg[i],
seg->mr_page,
seg->mr_len,
offset_in_page(seg->mr_offset));
else
- sg_set_buf(&mw->mw_sg[i], seg->mr_offset,
+ sg_set_buf(&mr->mr_sg[i], seg->mr_offset,
seg->mr_len);
++seg;
@@ -392,30 +392,29 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
break;
}
- mw->mw_dir = rpcrdma_data_dir(writing);
+ mr->mr_dir = rpcrdma_data_dir(writing);
- mw->mw_nents = ib_dma_map_sg(ia->ri_device, mw->mw_sg, i, mw->mw_dir);
- if (!mw->mw_nents)
+ mr->mr_nents = ib_dma_map_sg(ia->ri_device, mr->mr_sg, i, mr->mr_dir);
+ if (!mr->mr_nents)
goto out_dmamap_err;
- n = ib_map_mr_sg(mr, mw->mw_sg, mw->mw_nents, NULL, PAGE_SIZE);
- if (unlikely(n != mw->mw_nents))
+ ibmr = frwr->fr_mr;
+ n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE);
+ if (unlikely(n != mr->mr_nents))
goto out_mapmr_err;
- dprintk("RPC: %s: Using frmr %p to map %u segments (%llu bytes)\n",
- __func__, frmr, mw->mw_nents, mr->length);
-
- key = (u8)(mr->rkey & 0x000000FF);
- ib_update_fast_reg_key(mr, ++key);
+ key = (u8)(ibmr->rkey & 0x000000FF);
+ ib_update_fast_reg_key(ibmr, ++key);
+ reg_wr = &frwr->fr_regwr;
reg_wr->wr.next = NULL;
reg_wr->wr.opcode = IB_WR_REG_MR;
- frmr->fr_cqe.done = frwr_wc_fastreg;
- reg_wr->wr.wr_cqe = &frmr->fr_cqe;
+ frwr->fr_cqe.done = frwr_wc_fastreg;
+ reg_wr->wr.wr_cqe = &frwr->fr_cqe;
reg_wr->wr.num_sge = 0;
reg_wr->wr.send_flags = 0;
- reg_wr->mr = mr;
- reg_wr->key = mr->rkey;
+ reg_wr->mr = ibmr;
+ reg_wr->key = ibmr->rkey;
reg_wr->access = writing ?
IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
IB_ACCESS_REMOTE_READ;
@@ -424,47 +423,64 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
if (rc)
goto out_senderr;
- mw->mw_handle = mr->rkey;
- mw->mw_length = mr->length;
- mw->mw_offset = mr->iova;
+ mr->mr_handle = ibmr->rkey;
+ mr->mr_length = ibmr->length;
+ mr->mr_offset = ibmr->iova;
- *out = mw;
+ *out = mr;
return seg;
out_dmamap_err:
pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n",
- mw->mw_sg, i);
- frmr->fr_state = FRMR_IS_INVALID;
- rpcrdma_put_mw(r_xprt, mw);
+ mr->mr_sg, i);
+ frwr->fr_state = FRWR_IS_INVALID;
+ rpcrdma_mr_put(mr);
return ERR_PTR(-EIO);
out_mapmr_err:
pr_err("rpcrdma: failed to map mr %p (%d/%d)\n",
- frmr->fr_mr, n, mw->mw_nents);
- rpcrdma_defer_mr_recovery(mw);
+ frwr->fr_mr, n, mr->mr_nents);
+ rpcrdma_mr_defer_recovery(mr);
return ERR_PTR(-EIO);
out_senderr:
- pr_err("rpcrdma: FRMR registration ib_post_send returned %i\n", rc);
- rpcrdma_defer_mr_recovery(mw);
+ pr_err("rpcrdma: FRWR registration ib_post_send returned %i\n", rc);
+ rpcrdma_mr_defer_recovery(mr);
return ERR_PTR(-ENOTCONN);
}
+/* Handle a remotely invalidated mr on the @mrs list
+ */
+static void
+frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
+{
+ struct rpcrdma_mr *mr;
+
+ list_for_each_entry(mr, mrs, mr_list)
+ if (mr->mr_handle == rep->rr_inv_rkey) {
+ list_del(&mr->mr_list);
+ trace_xprtrdma_remoteinv(mr);
+ mr->frwr.fr_state = FRWR_IS_INVALID;
+ rpcrdma_mr_unmap_and_put(mr);
+ break; /* only one invalidated MR per RPC */
+ }
+}
+
/* Invalidate all memory regions that were registered for "req".
*
* Sleeps until it is safe for the host CPU to access the
* previously mapped memory regions.
*
- * Caller ensures that @mws is not empty before the call. This
+ * Caller ensures that @mrs is not empty before the call. This
* function empties the list.
*/
static void
-frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
+frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
{
struct ib_send_wr *first, **prev, *last, *bad_wr;
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
- struct rpcrdma_frmr *f;
- struct rpcrdma_mw *mw;
+ struct rpcrdma_frwr *frwr;
+ struct rpcrdma_mr *mr;
int count, rc;
/* ORDER: Invalidate all of the MRs first
@@ -472,31 +488,27 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
* Chain the LOCAL_INV Work Requests and post them with
* a single ib_post_send() call.
*/
- f = NULL;
+ frwr = NULL;
count = 0;
prev = &first;
- list_for_each_entry(mw, mws, mw_list) {
- mw->frmr.fr_state = FRMR_IS_INVALID;
+ list_for_each_entry(mr, mrs, mr_list) {
+ mr->frwr.fr_state = FRWR_IS_INVALID;
- if (mw->mw_flags & RPCRDMA_MW_F_RI)
- continue;
+ frwr = &mr->frwr;
+ trace_xprtrdma_localinv(mr);
- f = &mw->frmr;
- dprintk("RPC: %s: invalidating frmr %p\n",
- __func__, f);
-
- f->fr_cqe.done = frwr_wc_localinv;
- last = &f->fr_invwr;
+ frwr->fr_cqe.done = frwr_wc_localinv;
+ last = &frwr->fr_invwr;
memset(last, 0, sizeof(*last));
- last->wr_cqe = &f->fr_cqe;
+ last->wr_cqe = &frwr->fr_cqe;
last->opcode = IB_WR_LOCAL_INV;
- last->ex.invalidate_rkey = mw->mw_handle;
+ last->ex.invalidate_rkey = mr->mr_handle;
count++;
*prev = last;
prev = &last->next;
}
- if (!f)
+ if (!frwr)
goto unmap;
/* Strong send queue ordering guarantees that when the
@@ -504,8 +516,8 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
* are complete.
*/
last->send_flags = IB_SEND_SIGNALED;
- f->fr_cqe.done = frwr_wc_localinv_wake;
- reinit_completion(&f->fr_linv_done);
+ frwr->fr_cqe.done = frwr_wc_localinv_wake;
+ reinit_completion(&frwr->fr_linv_done);
/* Transport disconnect drains the receive CQ before it
* replaces the QP. The RPC reply handler won't call us
@@ -515,36 +527,32 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
bad_wr = NULL;
rc = ib_post_send(ia->ri_id->qp, first, &bad_wr);
if (bad_wr != first)
- wait_for_completion(&f->fr_linv_done);
+ wait_for_completion(&frwr->fr_linv_done);
if (rc)
goto reset_mrs;
/* ORDER: Now DMA unmap all of the MRs, and return
- * them to the free MW list.
+ * them to the free MR list.
*/
unmap:
- while (!list_empty(mws)) {
- mw = rpcrdma_pop_mw(mws);
- dprintk("RPC: %s: DMA unmapping frmr %p\n",
- __func__, &mw->frmr);
- ib_dma_unmap_sg(ia->ri_device,
- mw->mw_sg, mw->mw_nents, mw->mw_dir);
- rpcrdma_put_mw(r_xprt, mw);
+ while (!list_empty(mrs)) {
+ mr = rpcrdma_mr_pop(mrs);
+ rpcrdma_mr_unmap_and_put(mr);
}
return;
reset_mrs:
- pr_err("rpcrdma: FRMR invalidate ib_post_send returned %i\n", rc);
+ pr_err("rpcrdma: FRWR invalidate ib_post_send returned %i\n", rc);
/* Find and reset the MRs in the LOCAL_INV WRs that did not
* get posted.
*/
while (bad_wr) {
- f = container_of(bad_wr, struct rpcrdma_frmr,
- fr_invwr);
- mw = container_of(f, struct rpcrdma_mw, frmr);
+ frwr = container_of(bad_wr, struct rpcrdma_frwr,
+ fr_invwr);
+ mr = container_of(frwr, struct rpcrdma_mr, frwr);
- __frwr_reset_mr(ia, mw);
+ __frwr_mr_reset(ia, mr);
bad_wr = bad_wr->next;
}
@@ -553,6 +561,7 @@ reset_mrs:
const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
.ro_map = frwr_op_map,
+ .ro_reminv = frwr_op_reminv,
.ro_unmap_sync = frwr_op_unmap_sync,
.ro_recover_mr = frwr_op_recover_mr,
.ro_open = frwr_op_open,
diff --git a/net/sunrpc/xprtrdma/module.c b/net/sunrpc/xprtrdma/module.c
index 560712bd9fa2..a762d192372b 100644
--- a/net/sunrpc/xprtrdma/module.c
+++ b/net/sunrpc/xprtrdma/module.c
@@ -1,18 +1,20 @@
/*
- * Copyright (c) 2015 Oracle. All rights reserved.
+ * Copyright (c) 2015, 2017 Oracle. All rights reserved.
*/
/* rpcrdma.ko module initialization
*/
+#include <linux/types.h>
+#include <linux/compiler.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/sunrpc/svc_rdma.h>
-#include "xprt_rdma.h"
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define RPCDBG_FACILITY RPCDBG_TRANS
-#endif
+#include <asm/swab.h>
+
+#define CREATE_TRACE_POINTS
+#include "xprt_rdma.h"
MODULE_AUTHOR("Open Grid Computing and Network Appliance, Inc.");
MODULE_DESCRIPTION("RPC/RDMA Transport");
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index a3f2ab283aeb..162e5dd82466 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -292,15 +292,15 @@ encode_item_not_present(struct xdr_stream *xdr)
}
static void
-xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mw *mw)
+xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mr *mr)
{
- *iptr++ = cpu_to_be32(mw->mw_handle);
- *iptr++ = cpu_to_be32(mw->mw_length);
- xdr_encode_hyper(iptr, mw->mw_offset);
+ *iptr++ = cpu_to_be32(mr->mr_handle);
+ *iptr++ = cpu_to_be32(mr->mr_length);
+ xdr_encode_hyper(iptr, mr->mr_offset);
}
static int
-encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw)
+encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr)
{
__be32 *p;
@@ -308,12 +308,12 @@ encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw)
if (unlikely(!p))
return -EMSGSIZE;
- xdr_encode_rdma_segment(p, mw);
+ xdr_encode_rdma_segment(p, mr);
return 0;
}
static int
-encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw,
+encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr,
u32 position)
{
__be32 *p;
@@ -324,7 +324,7 @@ encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw,
*p++ = xdr_one; /* Item present */
*p++ = cpu_to_be32(position);
- xdr_encode_rdma_segment(p, mw);
+ xdr_encode_rdma_segment(p, mr);
return 0;
}
@@ -348,7 +348,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
{
struct xdr_stream *xdr = &req->rl_stream;
struct rpcrdma_mr_seg *seg;
- struct rpcrdma_mw *mw;
+ struct rpcrdma_mr *mr;
unsigned int pos;
int nsegs;
@@ -363,21 +363,17 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
do {
seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
- false, &mw);
+ false, &mr);
if (IS_ERR(seg))
return PTR_ERR(seg);
- rpcrdma_push_mw(mw, &req->rl_registered);
+ rpcrdma_mr_push(mr, &req->rl_registered);
- if (encode_read_segment(xdr, mw, pos) < 0)
+ if (encode_read_segment(xdr, mr, pos) < 0)
return -EMSGSIZE;
- dprintk("RPC: %5u %s: pos %u %u@0x%016llx:0x%08x (%s)\n",
- rqst->rq_task->tk_pid, __func__, pos,
- mw->mw_length, (unsigned long long)mw->mw_offset,
- mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last");
-
+ trace_xprtrdma_read_chunk(rqst->rq_task, pos, mr, nsegs);
r_xprt->rx_stats.read_chunk_count++;
- nsegs -= mw->mw_nents;
+ nsegs -= mr->mr_nents;
} while (nsegs);
return 0;
@@ -404,7 +400,7 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
{
struct xdr_stream *xdr = &req->rl_stream;
struct rpcrdma_mr_seg *seg;
- struct rpcrdma_mw *mw;
+ struct rpcrdma_mr *mr;
int nsegs, nchunks;
__be32 *segcount;
@@ -425,23 +421,19 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
nchunks = 0;
do {
seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
- true, &mw);
+ true, &mr);
if (IS_ERR(seg))
return PTR_ERR(seg);
- rpcrdma_push_mw(mw, &req->rl_registered);
+ rpcrdma_mr_push(mr, &req->rl_registered);
- if (encode_rdma_segment(xdr, mw) < 0)
+ if (encode_rdma_segment(xdr, mr) < 0)
return -EMSGSIZE;
- dprintk("RPC: %5u %s: %u@0x016%llx:0x%08x (%s)\n",
- rqst->rq_task->tk_pid, __func__,
- mw->mw_length, (unsigned long long)mw->mw_offset,
- mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last");
-
+ trace_xprtrdma_write_chunk(rqst->rq_task, mr, nsegs);
r_xprt->rx_stats.write_chunk_count++;
- r_xprt->rx_stats.total_rdma_request += seg->mr_len;
+ r_xprt->rx_stats.total_rdma_request += mr->mr_length;
nchunks++;
- nsegs -= mw->mw_nents;
+ nsegs -= mr->mr_nents;
} while (nsegs);
/* Update count of segments in this Write chunk */
@@ -468,7 +460,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
{
struct xdr_stream *xdr = &req->rl_stream;
struct rpcrdma_mr_seg *seg;
- struct rpcrdma_mw *mw;
+ struct rpcrdma_mr *mr;
int nsegs, nchunks;
__be32 *segcount;
@@ -487,23 +479,19 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
nchunks = 0;
do {
seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
- true, &mw);
+ true, &mr);
if (IS_ERR(seg))
return PTR_ERR(seg);
- rpcrdma_push_mw(mw, &req->rl_registered);
+ rpcrdma_mr_push(mr, &req->rl_registered);
- if (encode_rdma_segment(xdr, mw) < 0)
+ if (encode_rdma_segment(xdr, mr) < 0)
return -EMSGSIZE;
- dprintk("RPC: %5u %s: %u@0x%016llx:0x%08x (%s)\n",
- rqst->rq_task->tk_pid, __func__,
- mw->mw_length, (unsigned long long)mw->mw_offset,
- mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last");
-
+ trace_xprtrdma_reply_chunk(rqst->rq_task, mr, nsegs);
r_xprt->rx_stats.reply_chunk_count++;
- r_xprt->rx_stats.total_rdma_request += seg->mr_len;
+ r_xprt->rx_stats.total_rdma_request += mr->mr_length;
nchunks++;
- nsegs -= mw->mw_nents;
+ nsegs -= mr->mr_nents;
} while (nsegs);
/* Update count of segments in the Reply chunk */
@@ -524,9 +512,6 @@ rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc)
struct ib_sge *sge;
unsigned int count;
- dprintk("RPC: %s: unmapping %u sges for sc=%p\n",
- __func__, sc->sc_unmap_count, sc);
-
/* The first two SGEs contain the transport header and
* the inline buffer. These are always left mapped so
* they can be cheaply re-used.
@@ -754,11 +739,6 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
__be32 *p;
int ret;
-#if defined(CONFIG_SUNRPC_BACKCHANNEL)
- if (test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state))
- return rpcrdma_bc_marshal_reply(rqst);
-#endif
-
rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0);
xdr_init_encode(xdr, &req->rl_hdrbuf,
req->rl_rdmabuf->rg_base);
@@ -821,6 +801,17 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
rtype = rpcrdma_areadch;
}
+ /* If this is a retransmit, discard previously registered
+ * chunks. Very likely the connection has been replaced,
+ * so these registrations are invalid and unusable.
+ */
+ while (unlikely(!list_empty(&req->rl_registered))) {
+ struct rpcrdma_mr *mr;
+
+ mr = rpcrdma_mr_pop(&req->rl_registered);
+ rpcrdma_mr_defer_recovery(mr);
+ }
+
/* This implementation supports the following combinations
* of chunk lists in one RPC-over-RDMA Call message:
*
@@ -868,10 +859,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
if (ret)
goto out_err;
- dprintk("RPC: %5u %s: %s/%s: hdrlen %u rpclen\n",
- rqst->rq_task->tk_pid, __func__,
- transfertypes[rtype], transfertypes[wtype],
- xdr_stream_pos(xdr));
+ trace_xprtrdma_marshal(rqst, xdr_stream_pos(xdr), rtype, wtype);
ret = rpcrdma_prepare_send_sges(r_xprt, req, xdr_stream_pos(xdr),
&rqst->rq_snd_buf, rtype);
@@ -926,8 +914,7 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
curlen = rqst->rq_rcv_buf.head[0].iov_len;
if (curlen > copy_len)
curlen = copy_len;
- dprintk("RPC: %s: srcp 0x%p len %d hdrlen %d\n",
- __func__, srcp, copy_len, curlen);
+ trace_xprtrdma_fixup(rqst, copy_len, curlen);
srcp += curlen;
copy_len -= curlen;
@@ -947,9 +934,8 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
if (curlen > pagelist_len)
curlen = pagelist_len;
- dprintk("RPC: %s: page %d"
- " srcp 0x%p len %d curlen %d\n",
- __func__, i, srcp, copy_len, curlen);
+ trace_xprtrdma_fixup_pg(rqst, i, srcp,
+ copy_len, curlen);
destp = kmap_atomic(ppages[i]);
memcpy(destp + page_base, srcp, curlen);
flush_dcache_page(ppages[i]);
@@ -984,24 +970,6 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
return fixup_copy_count;
}
-/* Caller must guarantee @rep remains stable during this call.
- */
-static void
-rpcrdma_mark_remote_invalidation(struct list_head *mws,
- struct rpcrdma_rep *rep)
-{
- struct rpcrdma_mw *mw;
-
- if (!(rep->rr_wc_flags & IB_WC_WITH_INVALIDATE))
- return;
-
- list_for_each_entry(mw, mws, mw_list)
- if (mw->mw_handle == rep->rr_inv_rkey) {
- mw->mw_flags = RPCRDMA_MW_F_RI;
- break; /* only one invalidated MR per RPC */
- }
-}
-
/* By convention, backchannel calls arrive via rdma_msg type
* messages, and never populate the chunk lists. This makes
* the RPC/RDMA header small and fixed in size, so it is
@@ -1058,26 +1026,19 @@ out_short:
static int decode_rdma_segment(struct xdr_stream *xdr, u32 *length)
{
+ u32 handle;
+ u64 offset;
__be32 *p;
p = xdr_inline_decode(xdr, 4 * sizeof(*p));
if (unlikely(!p))
return -EIO;
- ifdebug(FACILITY) {
- u64 offset;
- u32 handle;
-
- handle = be32_to_cpup(p++);
- *length = be32_to_cpup(p++);
- xdr_decode_hyper(p, &offset);
- dprintk("RPC: %s: segment %u@0x%016llx:0x%08x\n",
- __func__, *length, (unsigned long long)offset,
- handle);
- } else {
- *length = be32_to_cpup(p + 1);
- }
+ handle = be32_to_cpup(p++);
+ *length = be32_to_cpup(p++);
+ xdr_decode_hyper(p, &offset);
+ trace_xprtrdma_decode_seg(handle, *length, offset);
return 0;
}
@@ -1098,8 +1059,6 @@ static int decode_write_chunk(struct xdr_stream *xdr, u32 *length)
*length += seglength;
}
- dprintk("RPC: %s: segcount=%u, %u bytes\n",
- __func__, be32_to_cpup(p), *length);
return 0;
}
@@ -1296,8 +1255,7 @@ out:
* being marshaled.
*/
out_badheader:
- dprintk("RPC: %5u %s: invalid rpcrdma reply (type %u)\n",
- rqst->rq_task->tk_pid, __func__, be32_to_cpu(rep->rr_proc));
+ trace_xprtrdma_reply_hdr(rep);
r_xprt->rx_stats.bad_reply_count++;
status = -EIO;
goto out;
@@ -1339,9 +1297,12 @@ void rpcrdma_deferred_completion(struct work_struct *work)
struct rpcrdma_rep *rep =
container_of(work, struct rpcrdma_rep, rr_work);
struct rpcrdma_req *req = rpcr_to_rdmar(rep->rr_rqst);
+ struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
- rpcrdma_mark_remote_invalidation(&req->rl_registered, rep);
- rpcrdma_release_rqst(rep->rr_rxprt, req);
+ trace_xprtrdma_defer_cmp(rep);
+ if (rep->rr_wc_flags & IB_WC_WITH_INVALIDATE)
+ r_xprt->rx_ia.ri_ops->ro_reminv(rep, &req->rl_registered);
+ rpcrdma_release_rqst(r_xprt, req);
rpcrdma_complete_rqst(rep);
}
@@ -1360,8 +1321,6 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
u32 credits;
__be32 *p;
- dprintk("RPC: %s: incoming rep %p\n", __func__, rep);
-
if (rep->rr_hdrbuf.head[0].iov_len == 0)
goto out_badstatus;
@@ -1405,8 +1364,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
rep->rr_rqst = rqst;
clear_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
- dprintk("RPC: %s: reply %p completes request %p (xid 0x%08x)\n",
- __func__, rep, req, be32_to_cpu(rep->rr_xid));
+ trace_xprtrdma_reply(rqst->rq_task, rep, req, credits);
queue_work_on(req->rl_cpu, rpcrdma_receive_wq, &rep->rr_work);
return;
@@ -1420,8 +1378,7 @@ out_badstatus:
return;
out_badversion:
- dprintk("RPC: %s: invalid version %d\n",
- __func__, be32_to_cpu(rep->rr_vers));
+ trace_xprtrdma_reply_vers(rep);
goto repost;
/* The RPC transaction has already been terminated, or the header
@@ -1429,12 +1386,11 @@ out_badversion:
*/
out_norqst:
spin_unlock(&xprt->recv_lock);
- dprintk("RPC: %s: no match for incoming xid 0x%08x\n",
- __func__, be32_to_cpu(rep->rr_xid));
+ trace_xprtrdma_reply_rqst(rep);
goto repost;
out_shortreply:
- dprintk("RPC: %s: short/invalid reply\n", __func__);
+ trace_xprtrdma_reply_short(rep);
/* If no pending RPC transaction was matched, post a replacement
* receive buffer before returning.
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 6ee1ad8978f3..4b1ecfe979cf 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -67,8 +67,7 @@
static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
-static unsigned int xprt_rdma_inline_write_padding;
-unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
+unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRWR;
int xprt_rdma_pad_optimize;
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
@@ -81,6 +80,7 @@ static unsigned int zero;
static unsigned int max_padding = PAGE_SIZE;
static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS;
static unsigned int max_memreg = RPCRDMA_LAST - 1;
+static unsigned int dummy;
static struct ctl_table_header *sunrpc_table_header;
@@ -114,7 +114,7 @@ static struct ctl_table xr_tunables_table[] = {
},
{
.procname = "rdma_inline_write_padding",
- .data = &xprt_rdma_inline_write_padding,
+ .data = &dummy,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
@@ -259,13 +259,10 @@ xprt_rdma_connect_worker(struct work_struct *work)
xprt_clear_connected(xprt);
- dprintk("RPC: %s: %sconnect\n", __func__,
- r_xprt->rx_ep.rep_connected != 0 ? "re" : "");
rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia);
if (rc)
xprt_wake_pending_tasks(xprt, rc);
- dprintk("RPC: %s: exit\n", __func__);
xprt_clear_connecting(xprt);
}
@@ -275,7 +272,7 @@ xprt_rdma_inject_disconnect(struct rpc_xprt *xprt)
struct rpcrdma_xprt *r_xprt = container_of(xprt, struct rpcrdma_xprt,
rx_xprt);
- pr_info("rpcrdma: injecting transport disconnect on xprt=%p\n", xprt);
+ trace_xprtrdma_inject_dsc(r_xprt);
rdma_disconnect(r_xprt->rx_ia.ri_id);
}
@@ -295,7 +292,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
- dprintk("RPC: %s: called\n", __func__);
+ trace_xprtrdma_destroy(r_xprt);
cancel_delayed_work_sync(&r_xprt->rx_connect_worker);
@@ -306,11 +303,8 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
rpcrdma_ia_close(&r_xprt->rx_ia);
xprt_rdma_free_addresses(xprt);
-
xprt_free(xprt);
- dprintk("RPC: %s: returning\n", __func__);
-
module_put(THIS_MODULE);
}
@@ -361,9 +355,7 @@ xprt_setup_rdma(struct xprt_create *args)
/*
* Set up RDMA-specific connect data.
*/
-
- sap = (struct sockaddr *)&cdata.addr;
- memcpy(sap, args->dstaddr, args->addrlen);
+ sap = args->dstaddr;
/* Ensure xprt->addr holds valid server TCP (not RDMA)
* address, for any side protocols which peek at it */
@@ -373,6 +365,7 @@ xprt_setup_rdma(struct xprt_create *args)
if (rpc_get_port(sap))
xprt_set_bound(xprt);
+ xprt_rdma_format_addresses(xprt, sap);
cdata.max_requests = xprt->max_reqs;
@@ -387,8 +380,6 @@ xprt_setup_rdma(struct xprt_create *args)
if (cdata.inline_rsize > cdata.rsize)
cdata.inline_rsize = cdata.rsize;
- cdata.padding = xprt_rdma_inline_write_padding;
-
/*
* Create new transport instance, which includes initialized
* o ia
@@ -398,7 +389,7 @@ xprt_setup_rdma(struct xprt_create *args)
new_xprt = rpcx_to_rdmax(xprt);
- rc = rpcrdma_ia_open(new_xprt, sap);
+ rc = rpcrdma_ia_open(new_xprt);
if (rc)
goto out1;
@@ -407,31 +398,19 @@ xprt_setup_rdma(struct xprt_create *args)
*/
new_xprt->rx_data = cdata;
new_ep = &new_xprt->rx_ep;
- new_ep->rep_remote_addr = cdata.addr;
rc = rpcrdma_ep_create(&new_xprt->rx_ep,
&new_xprt->rx_ia, &new_xprt->rx_data);
if (rc)
goto out2;
- /*
- * Allocate pre-registered send and receive buffers for headers and
- * any inline data. Also specify any padding which will be provided
- * from a preregistered zero buffer.
- */
rc = rpcrdma_buffer_create(new_xprt);
if (rc)
goto out3;
- /*
- * Register a callback for connection events. This is necessary because
- * connection loss notification is async. We also catch connection loss
- * when reaping receives.
- */
INIT_DELAYED_WORK(&new_xprt->rx_connect_worker,
xprt_rdma_connect_worker);
- xprt_rdma_format_addresses(xprt, sap);
xprt->max_payload = new_xprt->rx_ia.ri_ops->ro_maxpages(new_xprt);
if (xprt->max_payload == 0)
goto out4;
@@ -445,16 +424,19 @@ xprt_setup_rdma(struct xprt_create *args)
dprintk("RPC: %s: %s:%s\n", __func__,
xprt->address_strings[RPC_DISPLAY_ADDR],
xprt->address_strings[RPC_DISPLAY_PORT]);
+ trace_xprtrdma_create(new_xprt);
return xprt;
out4:
- xprt_rdma_free_addresses(xprt);
- rc = -EINVAL;
+ rpcrdma_buffer_destroy(&new_xprt->rx_buf);
+ rc = -ENODEV;
out3:
rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia);
out2:
rpcrdma_ia_close(&new_xprt->rx_ia);
out1:
+ trace_xprtrdma_destroy(new_xprt);
+ xprt_rdma_free_addresses(xprt);
xprt_free(xprt);
return ERR_PTR(rc);
}
@@ -488,16 +470,34 @@ xprt_rdma_close(struct rpc_xprt *xprt)
rpcrdma_ep_disconnect(ep, ia);
}
+/**
+ * xprt_rdma_set_port - update server port with rpcbind result
+ * @xprt: controlling RPC transport
+ * @port: new port value
+ *
+ * Transport connect status is unchanged.
+ */
static void
xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port)
{
- struct sockaddr_in *sap;
+ struct sockaddr *sap = (struct sockaddr *)&xprt->addr;
+ char buf[8];
- sap = (struct sockaddr_in *)&xprt->addr;
- sap->sin_port = htons(port);
- sap = (struct sockaddr_in *)&rpcx_to_rdmad(xprt).addr;
- sap->sin_port = htons(port);
- dprintk("RPC: %s: %u\n", __func__, port);
+ dprintk("RPC: %s: setting port for xprt %p (%s:%s) to %u\n",
+ __func__, xprt,
+ xprt->address_strings[RPC_DISPLAY_ADDR],
+ xprt->address_strings[RPC_DISPLAY_PORT],
+ port);
+
+ rpc_set_port(sap, port);
+
+ kfree(xprt->address_strings[RPC_DISPLAY_PORT]);
+ snprintf(buf, sizeof(buf), "%u", port);
+ xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL);
+
+ kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]);
+ snprintf(buf, sizeof(buf), "%4hx", port);
+ xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL);
}
/**
@@ -516,8 +516,6 @@ xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port)
static void
xprt_rdma_timer(struct rpc_xprt *xprt, struct rpc_task *task)
{
- dprintk("RPC: %5u %s: xprt = %p\n", task->tk_pid, __func__, xprt);
-
xprt_force_disconnect(xprt);
}
@@ -640,7 +638,7 @@ xprt_rdma_allocate(struct rpc_task *task)
req = rpcrdma_buffer_get(&r_xprt->rx_buf);
if (req == NULL)
- return -ENOMEM;
+ goto out_get;
flags = RPCRDMA_DEF_GFP;
if (RPC_IS_SWAPPER(task))
@@ -653,19 +651,18 @@ xprt_rdma_allocate(struct rpc_task *task)
if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags))
goto out_fail;
- dprintk("RPC: %5u %s: send size = %zd, recv size = %zd, req = %p\n",
- task->tk_pid, __func__, rqst->rq_callsize,
- rqst->rq_rcvsize, req);
-
req->rl_cpu = smp_processor_id();
req->rl_connect_cookie = 0; /* our reserved value */
rpcrdma_set_xprtdata(rqst, req);
rqst->rq_buffer = req->rl_sendbuf->rg_base;
rqst->rq_rbuffer = req->rl_recvbuf->rg_base;
+ trace_xprtrdma_allocate(task, req);
return 0;
out_fail:
rpcrdma_buffer_put(req);
+out_get:
+ trace_xprtrdma_allocate(task, NULL);
return -ENOMEM;
}
@@ -682,13 +679,9 @@ xprt_rdma_free(struct rpc_task *task)
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
- if (test_bit(RPCRDMA_REQ_F_BACKCHANNEL, &req->rl_flags))
- return;
-
- dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply);
-
if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags))
rpcrdma_release_rqst(r_xprt, req);
+ trace_xprtrdma_rpc_done(task, req);
rpcrdma_buffer_put(req);
}
@@ -698,22 +691,12 @@ xprt_rdma_free(struct rpc_task *task)
*
* Caller holds the transport's write lock.
*
- * Return values:
- * 0: The request has been sent
- * ENOTCONN: Caller needs to invoke connect logic then call again
- * ENOBUFS: Call again later to send the request
- * EIO: A permanent error occurred. The request was not sent,
- * and don't try it again
- *
- * send_request invokes the meat of RPC RDMA. It must do the following:
- *
- * 1. Marshal the RPC request into an RPC RDMA request, which means
- * putting a header in front of data, and creating IOVs for RDMA
- * from those in the request.
- * 2. In marshaling, detect opportunities for RDMA, and use them.
- * 3. Post a recv message to set up asynch completion, then send
- * the request (rpcrdma_ep_post).
- * 4. No partial sends are possible in the RPC-RDMA protocol (as in UDP).
+ * Returns:
+ * %0 if the RPC message has been sent
+ * %-ENOTCONN if the caller should reconnect and call again
+ * %-ENOBUFS if the caller should call again later
+ * %-EIO if a permanent error occurred and the request was not
+ * sent. Do not try to send this message again.
*/
static int
xprt_rdma_send_request(struct rpc_task *task)
@@ -724,14 +707,14 @@ xprt_rdma_send_request(struct rpc_task *task)
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
int rc = 0;
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+ if (unlikely(!rqst->rq_buffer))
+ return xprt_rdma_bc_send_reply(rqst);
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
+
if (!xprt_connected(xprt))
goto drop_connection;
- /* On retransmit, remove any previously registered chunks */
- if (unlikely(!list_empty(&req->rl_registered)))
- r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt,
- &req->rl_registered);
-
rc = rpcrdma_marshal_req(r_xprt, rqst);
if (rc < 0)
goto failed_marshal;
@@ -744,7 +727,7 @@ xprt_rdma_send_request(struct rpc_task *task)
goto drop_connection;
req->rl_connect_cookie = xprt->connect_cookie;
- set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
+ __set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
goto drop_connection;
@@ -904,8 +887,7 @@ int xprt_rdma_init(void)
"\tMaxInlineRead %d\n\tMaxInlineWrite %d\n",
xprt_rdma_slot_table_entries,
xprt_rdma_max_inline_read, xprt_rdma_max_inline_write);
- dprintk("\tPadding %d\n\tMemreg %d\n",
- xprt_rdma_inline_write_padding, xprt_rdma_memreg_strategy);
+ dprintk("\tPadding 0\n\tMemreg %d\n", xprt_rdma_memreg_strategy);
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
if (!sunrpc_table_header)
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 8607c029c0dd..f4eb63e8e689 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -71,8 +71,8 @@
/*
* internal functions
*/
-static void rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt);
-static void rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf);
+static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt);
+static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf);
static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb);
struct workqueue_struct *rpcrdma_receive_wq __read_mostly;
@@ -108,7 +108,10 @@ static void
rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
{
struct rpcrdma_ep *ep = context;
+ struct rpcrdma_xprt *r_xprt = container_of(ep, struct rpcrdma_xprt,
+ rx_ep);
+ trace_xprtrdma_qp_error(r_xprt, event);
pr_err("rpcrdma: %s on device %s ep %p\n",
ib_event_msg(event->event), event->device->name, context);
@@ -133,6 +136,7 @@ rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
container_of(cqe, struct rpcrdma_sendctx, sc_cqe);
/* WARNING: Only wr_cqe and status are reliable at this point */
+ trace_xprtrdma_wc_send(sc, wc);
if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR)
pr_err("rpcrdma: Send: %s (%u/0x%x)\n",
ib_wc_status_msg(wc->status),
@@ -155,13 +159,11 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
rr_cqe);
/* WARNING: Only wr_id and status are reliable at this point */
+ trace_xprtrdma_wc_receive(rep, wc);
if (wc->status != IB_WC_SUCCESS)
goto out_fail;
/* status == SUCCESS means all fields in wc are trustworthy */
- dprintk("RPC: %s: rep %p opcode 'recv', length %u: success\n",
- __func__, rep, wc->byte_len);
-
rpcrdma_set_xdrlen(&rep->rr_hdrbuf, wc->byte_len);
rep->rr_wc_flags = wc->wc_flags;
rep->rr_inv_rkey = wc->ex.invalidate_rkey;
@@ -192,7 +194,6 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
unsigned int rsize, wsize;
/* Default settings for RPC-over-RDMA Version One */
- r_xprt->rx_ia.ri_reminv_expected = false;
r_xprt->rx_ia.ri_implicit_roundup = xprt_rdma_pad_optimize;
rsize = RPCRDMA_V1_DEF_INLINE_SIZE;
wsize = RPCRDMA_V1_DEF_INLINE_SIZE;
@@ -200,7 +201,6 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
if (pmsg &&
pmsg->cp_magic == rpcrdma_cmp_magic &&
pmsg->cp_version == RPCRDMA_CMP_VERSION) {
- r_xprt->rx_ia.ri_reminv_expected = true;
r_xprt->rx_ia.ri_implicit_roundup = true;
rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size);
wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size);
@@ -221,11 +221,9 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
struct rpcrdma_xprt *xprt = id->context;
struct rpcrdma_ia *ia = &xprt->rx_ia;
struct rpcrdma_ep *ep = &xprt->rx_ep;
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
- struct sockaddr *sap = (struct sockaddr *)&ep->rep_remote_addr;
-#endif
int connstate = 0;
+ trace_xprtrdma_conn_upcall(xprt, event);
switch (event->event) {
case RDMA_CM_EVENT_ADDR_RESOLVED:
case RDMA_CM_EVENT_ROUTE_RESOLVED:
@@ -234,21 +232,17 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
break;
case RDMA_CM_EVENT_ADDR_ERROR:
ia->ri_async_rc = -EHOSTUNREACH;
- dprintk("RPC: %s: CM address resolution error, ep 0x%p\n",
- __func__, ep);
complete(&ia->ri_done);
break;
case RDMA_CM_EVENT_ROUTE_ERROR:
ia->ri_async_rc = -ENETUNREACH;
- dprintk("RPC: %s: CM route resolution error, ep 0x%p\n",
- __func__, ep);
complete(&ia->ri_done);
break;
case RDMA_CM_EVENT_DEVICE_REMOVAL:
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
- pr_info("rpcrdma: removing device %s for %pIS:%u\n",
+ pr_info("rpcrdma: removing device %s for %s:%s\n",
ia->ri_device->name,
- sap, rpc_get_port(sap));
+ rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt));
#endif
set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags);
ep->rep_connected = -ENODEV;
@@ -271,8 +265,8 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
connstate = -ENETDOWN;
goto connected;
case RDMA_CM_EVENT_REJECTED:
- dprintk("rpcrdma: connection to %pIS:%u rejected: %s\n",
- sap, rpc_get_port(sap),
+ dprintk("rpcrdma: connection to %s:%s rejected: %s\n",
+ rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt),
rdma_reject_msg(id, event->status));
connstate = -ECONNREFUSED;
if (event->status == IB_CM_REJ_STALE_CONN)
@@ -287,8 +281,9 @@ connected:
wake_up_all(&ep->rep_connect_wait);
/*FALLTHROUGH*/
default:
- dprintk("RPC: %s: %pIS:%u on %s/%s (ep 0x%p): %s\n",
- __func__, sap, rpc_get_port(sap),
+ dprintk("RPC: %s: %s:%s on %s/%s (ep 0x%p): %s\n",
+ __func__,
+ rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt),
ia->ri_device->name, ia->ri_ops->ro_displayname,
ep, rdma_event_msg(event->event));
break;
@@ -298,13 +293,14 @@ connected:
}
static struct rdma_cm_id *
-rpcrdma_create_id(struct rpcrdma_xprt *xprt,
- struct rpcrdma_ia *ia, struct sockaddr *addr)
+rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia)
{
unsigned long wtimeout = msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1;
struct rdma_cm_id *id;
int rc;
+ trace_xprtrdma_conn_start(xprt);
+
init_completion(&ia->ri_done);
init_completion(&ia->ri_remove_done);
@@ -318,7 +314,9 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
}
ia->ri_async_rc = -ETIMEDOUT;
- rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT);
+ rc = rdma_resolve_addr(id, NULL,
+ (struct sockaddr *)&xprt->rx_xprt.addr,
+ RDMA_RESOLVE_TIMEOUT);
if (rc) {
dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
__func__, rc);
@@ -326,8 +324,7 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
}
rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout);
if (rc < 0) {
- dprintk("RPC: %s: wait() exited: %i\n",
- __func__, rc);
+ trace_xprtrdma_conn_tout(xprt);
goto out;
}
@@ -344,8 +341,7 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
}
rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout);
if (rc < 0) {
- dprintk("RPC: %s: wait() exited: %i\n",
- __func__, rc);
+ trace_xprtrdma_conn_tout(xprt);
goto out;
}
rc = ia->ri_async_rc;
@@ -365,19 +361,18 @@ out:
/**
* rpcrdma_ia_open - Open and initialize an Interface Adapter.
- * @xprt: controlling transport
- * @addr: IP address of remote peer
+ * @xprt: transport with IA to (re)initialize
*
* Returns 0 on success, negative errno if an appropriate
* Interface Adapter could not be found and opened.
*/
int
-rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr)
+rpcrdma_ia_open(struct rpcrdma_xprt *xprt)
{
struct rpcrdma_ia *ia = &xprt->rx_ia;
int rc;
- ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
+ ia->ri_id = rpcrdma_create_id(xprt, ia);
if (IS_ERR(ia->ri_id)) {
rc = PTR_ERR(ia->ri_id);
goto out_err;
@@ -392,7 +387,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr)
}
switch (xprt_rdma_memreg_strategy) {
- case RPCRDMA_FRMR:
+ case RPCRDMA_FRWR:
if (frwr_is_supported(ia)) {
ia->ri_ops = &rpcrdma_frwr_memreg_ops;
break;
@@ -462,10 +457,12 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia)
rpcrdma_dma_unmap_regbuf(req->rl_sendbuf);
rpcrdma_dma_unmap_regbuf(req->rl_recvbuf);
}
- rpcrdma_destroy_mrs(buf);
+ rpcrdma_mrs_destroy(buf);
/* Allow waiters to continue */
complete(&ia->ri_remove_done);
+
+ trace_xprtrdma_remove(r_xprt);
}
/**
@@ -476,7 +473,6 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia)
void
rpcrdma_ia_close(struct rpcrdma_ia *ia)
{
- dprintk("RPC: %s: entering\n", __func__);
if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
if (ia->ri_id->qp)
rdma_destroy_qp(ia->ri_id);
@@ -630,9 +626,6 @@ out1:
void
rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
{
- dprintk("RPC: %s: entering, connected is %d\n",
- __func__, ep->rep_connected);
-
cancel_delayed_work_sync(&ep->rep_connect_worker);
if (ia->ri_id->qp) {
@@ -653,13 +646,12 @@ static int
rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
{
- struct sockaddr *sap = (struct sockaddr *)&r_xprt->rx_data.addr;
int rc, err;
- pr_info("%s: r_xprt = %p\n", __func__, r_xprt);
+ trace_xprtrdma_reinsert(r_xprt);
rc = -EHOSTUNREACH;
- if (rpcrdma_ia_open(r_xprt, sap))
+ if (rpcrdma_ia_open(r_xprt))
goto out1;
rc = -ENOMEM;
@@ -676,7 +668,7 @@ rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt,
goto out3;
}
- rpcrdma_create_mrs(r_xprt);
+ rpcrdma_mrs_create(r_xprt);
return 0;
out3:
@@ -691,16 +683,15 @@ static int
rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep,
struct rpcrdma_ia *ia)
{
- struct sockaddr *sap = (struct sockaddr *)&r_xprt->rx_data.addr;
struct rdma_cm_id *id, *old;
int err, rc;
- dprintk("RPC: %s: reconnecting...\n", __func__);
+ trace_xprtrdma_reconnect(r_xprt);
rpcrdma_ep_disconnect(ep, ia);
rc = -EHOSTUNREACH;
- id = rpcrdma_create_id(r_xprt, ia, sap);
+ id = rpcrdma_create_id(r_xprt, ia);
if (IS_ERR(id))
goto out;
@@ -817,16 +808,14 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
int rc;
rc = rdma_disconnect(ia->ri_id);
- if (!rc) {
+ if (!rc)
/* returns without wait if not connected */
wait_event_interruptible(ep->rep_connect_wait,
ep->rep_connected != 1);
- dprintk("RPC: %s: after wait, %sconnected\n", __func__,
- (ep->rep_connected == 1) ? "still " : "dis");
- } else {
- dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc);
+ else
ep->rep_connected = rc;
- }
+ trace_xprtrdma_disconnect(container_of(ep, struct rpcrdma_xprt,
+ rx_ep), rc);
ib_drain_qp(ia->ri_id->qp);
}
@@ -998,15 +987,15 @@ rpcrdma_mr_recovery_worker(struct work_struct *work)
{
struct rpcrdma_buffer *buf = container_of(work, struct rpcrdma_buffer,
rb_recovery_worker.work);
- struct rpcrdma_mw *mw;
+ struct rpcrdma_mr *mr;
spin_lock(&buf->rb_recovery_lock);
while (!list_empty(&buf->rb_stale_mrs)) {
- mw = rpcrdma_pop_mw(&buf->rb_stale_mrs);
+ mr = rpcrdma_mr_pop(&buf->rb_stale_mrs);
spin_unlock(&buf->rb_recovery_lock);
- dprintk("RPC: %s: recovering MR %p\n", __func__, mw);
- mw->mw_xprt->rx_ia.ri_ops->ro_recover_mr(mw);
+ trace_xprtrdma_recover_mr(mr);
+ mr->mr_xprt->rx_ia.ri_ops->ro_recover_mr(mr);
spin_lock(&buf->rb_recovery_lock);
}
@@ -1014,20 +1003,20 @@ rpcrdma_mr_recovery_worker(struct work_struct *work)
}
void
-rpcrdma_defer_mr_recovery(struct rpcrdma_mw *mw)
+rpcrdma_mr_defer_recovery(struct rpcrdma_mr *mr)
{
- struct rpcrdma_xprt *r_xprt = mw->mw_xprt;
+ struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
spin_lock(&buf->rb_recovery_lock);
- rpcrdma_push_mw(mw, &buf->rb_stale_mrs);
+ rpcrdma_mr_push(mr, &buf->rb_stale_mrs);
spin_unlock(&buf->rb_recovery_lock);
schedule_delayed_work(&buf->rb_recovery_worker, 0);
}
static void
-rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt)
+rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
@@ -1036,32 +1025,32 @@ rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt)
LIST_HEAD(all);
for (count = 0; count < 32; count++) {
- struct rpcrdma_mw *mw;
+ struct rpcrdma_mr *mr;
int rc;
- mw = kzalloc(sizeof(*mw), GFP_KERNEL);
- if (!mw)
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
break;
- rc = ia->ri_ops->ro_init_mr(ia, mw);
+ rc = ia->ri_ops->ro_init_mr(ia, mr);
if (rc) {
- kfree(mw);
+ kfree(mr);
break;
}
- mw->mw_xprt = r_xprt;
+ mr->mr_xprt = r_xprt;
- list_add(&mw->mw_list, &free);
- list_add(&mw->mw_all, &all);
+ list_add(&mr->mr_list, &free);
+ list_add(&mr->mr_all, &all);
}
- spin_lock(&buf->rb_mwlock);
- list_splice(&free, &buf->rb_mws);
+ spin_lock(&buf->rb_mrlock);
+ list_splice(&free, &buf->rb_mrs);
list_splice(&all, &buf->rb_all);
r_xprt->rx_stats.mrs_allocated += count;
- spin_unlock(&buf->rb_mwlock);
+ spin_unlock(&buf->rb_mrlock);
- dprintk("RPC: %s: created %u MRs\n", __func__, count);
+ trace_xprtrdma_createmrs(r_xprt, count);
}
static void
@@ -1072,7 +1061,7 @@ rpcrdma_mr_refresh_worker(struct work_struct *work)
struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt,
rx_buf);
- rpcrdma_create_mrs(r_xprt);
+ rpcrdma_mrs_create(r_xprt);
}
struct rpcrdma_req *
@@ -1093,10 +1082,17 @@ rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
return req;
}
-struct rpcrdma_rep *
+/**
+ * rpcrdma_create_rep - Allocate an rpcrdma_rep object
+ * @r_xprt: controlling transport
+ *
+ * Returns 0 on success or a negative errno on failure.
+ */
+int
rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
+ struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_rep *rep;
int rc;
@@ -1121,12 +1117,18 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
rep->rr_recv_wr.wr_cqe = &rep->rr_cqe;
rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
rep->rr_recv_wr.num_sge = 1;
- return rep;
+
+ spin_lock(&buf->rb_lock);
+ list_add(&rep->rr_list, &buf->rb_recv_bufs);
+ spin_unlock(&buf->rb_lock);
+ return 0;
out_free:
kfree(rep);
out:
- return ERR_PTR(rc);
+ dprintk("RPC: %s: reply buffer %d alloc failed\n",
+ __func__, rc);
+ return rc;
}
int
@@ -1137,10 +1139,10 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
buf->rb_max_requests = r_xprt->rx_data.max_requests;
buf->rb_bc_srv_max_requests = 0;
- spin_lock_init(&buf->rb_mwlock);
+ spin_lock_init(&buf->rb_mrlock);
spin_lock_init(&buf->rb_lock);
spin_lock_init(&buf->rb_recovery_lock);
- INIT_LIST_HEAD(&buf->rb_mws);
+ INIT_LIST_HEAD(&buf->rb_mrs);
INIT_LIST_HEAD(&buf->rb_all);
INIT_LIST_HEAD(&buf->rb_stale_mrs);
INIT_DELAYED_WORK(&buf->rb_refresh_worker,
@@ -1148,7 +1150,7 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
INIT_DELAYED_WORK(&buf->rb_recovery_worker,
rpcrdma_mr_recovery_worker);
- rpcrdma_create_mrs(r_xprt);
+ rpcrdma_mrs_create(r_xprt);
INIT_LIST_HEAD(&buf->rb_send_bufs);
INIT_LIST_HEAD(&buf->rb_allreqs);
@@ -1167,17 +1169,10 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
}
INIT_LIST_HEAD(&buf->rb_recv_bufs);
- for (i = 0; i < buf->rb_max_requests + RPCRDMA_MAX_BC_REQUESTS; i++) {
- struct rpcrdma_rep *rep;
-
- rep = rpcrdma_create_rep(r_xprt);
- if (IS_ERR(rep)) {
- dprintk("RPC: %s: reply buffer %d alloc failed\n",
- __func__, i);
- rc = PTR_ERR(rep);
+ for (i = 0; i <= buf->rb_max_requests; i++) {
+ rc = rpcrdma_create_rep(r_xprt);
+ if (rc)
goto out;
- }
- list_add(&rep->rr_list, &buf->rb_recv_bufs);
}
rc = rpcrdma_sendctxs_create(r_xprt);
@@ -1229,26 +1224,26 @@ rpcrdma_destroy_req(struct rpcrdma_req *req)
}
static void
-rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf)
+rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf)
{
struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt,
rx_buf);
struct rpcrdma_ia *ia = rdmab_to_ia(buf);
- struct rpcrdma_mw *mw;
+ struct rpcrdma_mr *mr;
unsigned int count;
count = 0;
- spin_lock(&buf->rb_mwlock);
+ spin_lock(&buf->rb_mrlock);
while (!list_empty(&buf->rb_all)) {
- mw = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
- list_del(&mw->mw_all);
+ mr = list_entry(buf->rb_all.next, struct rpcrdma_mr, mr_all);
+ list_del(&mr->mr_all);
- spin_unlock(&buf->rb_mwlock);
- ia->ri_ops->ro_release_mr(mw);
+ spin_unlock(&buf->rb_mrlock);
+ ia->ri_ops->ro_release_mr(mr);
count++;
- spin_lock(&buf->rb_mwlock);
+ spin_lock(&buf->rb_mrlock);
}
- spin_unlock(&buf->rb_mwlock);
+ spin_unlock(&buf->rb_mrlock);
r_xprt->rx_stats.mrs_allocated = 0;
dprintk("RPC: %s: released %u MRs\n", __func__, count);
@@ -1285,27 +1280,33 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
spin_unlock(&buf->rb_reqslock);
buf->rb_recv_count = 0;
- rpcrdma_destroy_mrs(buf);
+ rpcrdma_mrs_destroy(buf);
}
-struct rpcrdma_mw *
-rpcrdma_get_mw(struct rpcrdma_xprt *r_xprt)
+/**
+ * rpcrdma_mr_get - Allocate an rpcrdma_mr object
+ * @r_xprt: controlling transport
+ *
+ * Returns an initialized rpcrdma_mr or NULL if no free
+ * rpcrdma_mr objects are available.
+ */
+struct rpcrdma_mr *
+rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
- struct rpcrdma_mw *mw = NULL;
+ struct rpcrdma_mr *mr = NULL;
- spin_lock(&buf->rb_mwlock);
- if (!list_empty(&buf->rb_mws))
- mw = rpcrdma_pop_mw(&buf->rb_mws);
- spin_unlock(&buf->rb_mwlock);
+ spin_lock(&buf->rb_mrlock);
+ if (!list_empty(&buf->rb_mrs))
+ mr = rpcrdma_mr_pop(&buf->rb_mrs);
+ spin_unlock(&buf->rb_mrlock);
- if (!mw)
- goto out_nomws;
- mw->mw_flags = 0;
- return mw;
+ if (!mr)
+ goto out_nomrs;
+ return mr;
-out_nomws:
- dprintk("RPC: %s: no MWs available\n", __func__);
+out_nomrs:
+ trace_xprtrdma_nomrs(r_xprt);
if (r_xprt->rx_ep.rep_connected != -ENODEV)
schedule_delayed_work(&buf->rb_refresh_worker, 0);
@@ -1315,14 +1316,39 @@ out_nomws:
return NULL;
}
+static void
+__rpcrdma_mr_put(struct rpcrdma_buffer *buf, struct rpcrdma_mr *mr)
+{
+ spin_lock(&buf->rb_mrlock);
+ rpcrdma_mr_push(mr, &buf->rb_mrs);
+ spin_unlock(&buf->rb_mrlock);
+}
+
+/**
+ * rpcrdma_mr_put - Release an rpcrdma_mr object
+ * @mr: object to release
+ *
+ */
void
-rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw)
+rpcrdma_mr_put(struct rpcrdma_mr *mr)
{
- struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+ __rpcrdma_mr_put(&mr->mr_xprt->rx_buf, mr);
+}
+
+/**
+ * rpcrdma_mr_unmap_and_put - DMA unmap an MR and release it
+ * @mr: object to release
+ *
+ */
+void
+rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr)
+{
+ struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
- spin_lock(&buf->rb_mwlock);
- rpcrdma_push_mw(mw, &buf->rb_mws);
- spin_unlock(&buf->rb_mwlock);
+ trace_xprtrdma_dma_unmap(mr);
+ ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
+ mr->mr_sg, mr->mr_nents, mr->mr_dir);
+ __rpcrdma_mr_put(&r_xprt->rx_buf, mr);
}
static struct rpcrdma_rep *
@@ -1359,11 +1385,11 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
req = rpcrdma_buffer_get_req_locked(buffers);
req->rl_reply = rpcrdma_buffer_get_rep(buffers);
spin_unlock(&buffers->rb_lock);
+
return req;
out_reqbuf:
spin_unlock(&buffers->rb_lock);
- pr_warn("RPC: %s: out of request buffers\n", __func__);
return NULL;
}
@@ -1519,9 +1545,6 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
req->rl_reply = NULL;
}
- dprintk("RPC: %s: posting %d s/g entries\n",
- __func__, send_wr->num_sge);
-
if (!ep->rep_send_count ||
test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) {
send_wr->send_flags |= IB_SEND_SIGNALED;
@@ -1530,14 +1553,12 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
send_wr->send_flags &= ~IB_SEND_SIGNALED;
--ep->rep_send_count;
}
+
rc = ib_post_send(ia->ri_id->qp, send_wr, &send_wr_fail);
+ trace_xprtrdma_post_send(req, rc);
if (rc)
- goto out_postsend_err;
+ return -ENOTCONN;
return 0;
-
-out_postsend_err:
- pr_err("rpcrdma: RDMA Send ib_post_send returned %i\n", rc);
- return -ENOTCONN;
}
int
@@ -1550,23 +1571,20 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
if (!rpcrdma_dma_map_regbuf(ia, rep->rr_rdmabuf))
goto out_map;
rc = ib_post_recv(ia->ri_id->qp, &rep->rr_recv_wr, &recv_wr_fail);
+ trace_xprtrdma_post_recv(rep, rc);
if (rc)
- goto out_postrecv;
+ return -ENOTCONN;
return 0;
out_map:
pr_err("rpcrdma: failed to DMA map the Receive buffer\n");
return -EIO;
-
-out_postrecv:
- pr_err("rpcrdma: ib_post_recv returned %i\n", rc);
- return -ENOTCONN;
}
/**
* rpcrdma_ep_post_extra_recv - Post buffers for incoming backchannel requests
* @r_xprt: transport associated with these backchannel resources
- * @min_reqs: minimum number of incoming requests expected
+ * @count: minimum number of incoming requests expected
*
* Returns zero if all requested buffers were posted, or a negative errno.
*/
@@ -1594,7 +1612,7 @@ rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count)
out_reqbuf:
spin_unlock(&buffers->rb_lock);
- pr_warn("%s: no extra receive buffers\n", __func__);
+ trace_xprtrdma_noreps(r_xprt);
return -ENOMEM;
out_rc:
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 1342f743f1c4..69883a960a3f 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -73,11 +73,10 @@ struct rpcrdma_ia {
struct completion ri_remove_done;
int ri_async_rc;
unsigned int ri_max_segs;
- unsigned int ri_max_frmr_depth;
+ unsigned int ri_max_frwr_depth;
unsigned int ri_max_inline_write;
unsigned int ri_max_inline_read;
unsigned int ri_max_send_sges;
- bool ri_reminv_expected;
bool ri_implicit_roundup;
enum ib_mr_type ri_mrtype;
unsigned long ri_flags;
@@ -101,7 +100,6 @@ struct rpcrdma_ep {
wait_queue_head_t rep_connect_wait;
struct rpcrdma_connect_private rep_cm_private;
struct rdma_conn_param rep_remote_cma;
- struct sockaddr_storage rep_remote_addr;
struct delayed_work rep_connect_worker;
};
@@ -232,29 +230,29 @@ enum {
};
/*
- * struct rpcrdma_mw - external memory region metadata
+ * struct rpcrdma_mr - external memory region metadata
*
* An external memory region is any buffer or page that is registered
* on the fly (ie, not pre-registered).
*
- * Each rpcrdma_buffer has a list of free MWs anchored in rb_mws. During
+ * Each rpcrdma_buffer has a list of free MWs anchored in rb_mrs. During
* call_allocate, rpcrdma_buffer_get() assigns one to each segment in
* an rpcrdma_req. Then rpcrdma_register_external() grabs these to keep
* track of registration metadata while each RPC is pending.
* rpcrdma_deregister_external() uses this metadata to unmap and
* release these resources when an RPC is complete.
*/
-enum rpcrdma_frmr_state {
- FRMR_IS_INVALID, /* ready to be used */
- FRMR_IS_VALID, /* in use */
- FRMR_FLUSHED_FR, /* flushed FASTREG WR */
- FRMR_FLUSHED_LI, /* flushed LOCALINV WR */
+enum rpcrdma_frwr_state {
+ FRWR_IS_INVALID, /* ready to be used */
+ FRWR_IS_VALID, /* in use */
+ FRWR_FLUSHED_FR, /* flushed FASTREG WR */
+ FRWR_FLUSHED_LI, /* flushed LOCALINV WR */
};
-struct rpcrdma_frmr {
+struct rpcrdma_frwr {
struct ib_mr *fr_mr;
struct ib_cqe fr_cqe;
- enum rpcrdma_frmr_state fr_state;
+ enum rpcrdma_frwr_state fr_state;
struct completion fr_linv_done;
union {
struct ib_reg_wr fr_regwr;
@@ -267,26 +265,20 @@ struct rpcrdma_fmr {
u64 *fm_physaddrs;
};
-struct rpcrdma_mw {
- struct list_head mw_list;
- struct scatterlist *mw_sg;
- int mw_nents;
- enum dma_data_direction mw_dir;
- unsigned long mw_flags;
+struct rpcrdma_mr {
+ struct list_head mr_list;
+ struct scatterlist *mr_sg;
+ int mr_nents;
+ enum dma_data_direction mr_dir;
union {
struct rpcrdma_fmr fmr;
- struct rpcrdma_frmr frmr;
+ struct rpcrdma_frwr frwr;
};
- struct rpcrdma_xprt *mw_xprt;
- u32 mw_handle;
- u32 mw_length;
- u64 mw_offset;
- struct list_head mw_all;
-};
-
-/* mw_flags */
-enum {
- RPCRDMA_MW_F_RI = 1,
+ struct rpcrdma_xprt *mr_xprt;
+ u32 mr_handle;
+ u32 mr_length;
+ u64 mr_offset;
+ struct list_head mr_all;
};
/*
@@ -362,8 +354,7 @@ struct rpcrdma_req {
/* rl_flags */
enum {
- RPCRDMA_REQ_F_BACKCHANNEL = 0,
- RPCRDMA_REQ_F_PENDING,
+ RPCRDMA_REQ_F_PENDING = 0,
RPCRDMA_REQ_F_TX_RESOURCES,
};
@@ -374,25 +365,25 @@ rpcrdma_set_xprtdata(struct rpc_rqst *rqst, struct rpcrdma_req *req)
}
static inline struct rpcrdma_req *
-rpcr_to_rdmar(struct rpc_rqst *rqst)
+rpcr_to_rdmar(const struct rpc_rqst *rqst)
{
return rqst->rq_xprtdata;
}
static inline void
-rpcrdma_push_mw(struct rpcrdma_mw *mw, struct list_head *list)
+rpcrdma_mr_push(struct rpcrdma_mr *mr, struct list_head *list)
{
- list_add_tail(&mw->mw_list, list);
+ list_add_tail(&mr->mr_list, list);
}
-static inline struct rpcrdma_mw *
-rpcrdma_pop_mw(struct list_head *list)
+static inline struct rpcrdma_mr *
+rpcrdma_mr_pop(struct list_head *list)
{
- struct rpcrdma_mw *mw;
+ struct rpcrdma_mr *mr;
- mw = list_first_entry(list, struct rpcrdma_mw, mw_list);
- list_del(&mw->mw_list);
- return mw;
+ mr = list_first_entry(list, struct rpcrdma_mr, mr_list);
+ list_del(&mr->mr_list);
+ return mr;
}
/*
@@ -402,8 +393,8 @@ rpcrdma_pop_mw(struct list_head *list)
* One of these is associated with a transport instance
*/
struct rpcrdma_buffer {
- spinlock_t rb_mwlock; /* protect rb_mws list */
- struct list_head rb_mws;
+ spinlock_t rb_mrlock; /* protect rb_mrs list */
+ struct list_head rb_mrs;
struct list_head rb_all;
unsigned long rb_sc_head;
@@ -438,13 +429,11 @@ struct rpcrdma_buffer {
* This data should be set with mount options
*/
struct rpcrdma_create_data_internal {
- struct sockaddr_storage addr; /* RDMA server address */
unsigned int max_requests; /* max requests (slots) in flight */
unsigned int rsize; /* mount rsize - max read hdr+data */
unsigned int wsize; /* mount wsize - max write hdr+data */
unsigned int inline_rsize; /* max non-rdma read data payload */
unsigned int inline_wsize; /* max non-rdma write data payload */
- unsigned int padding; /* non-rdma write header padding */
};
/*
@@ -484,17 +473,19 @@ struct rpcrdma_memreg_ops {
struct rpcrdma_mr_seg *
(*ro_map)(struct rpcrdma_xprt *,
struct rpcrdma_mr_seg *, int, bool,
- struct rpcrdma_mw **);
+ struct rpcrdma_mr **);
+ void (*ro_reminv)(struct rpcrdma_rep *rep,
+ struct list_head *mrs);
void (*ro_unmap_sync)(struct rpcrdma_xprt *,
struct list_head *);
- void (*ro_recover_mr)(struct rpcrdma_mw *);
+ void (*ro_recover_mr)(struct rpcrdma_mr *mr);
int (*ro_open)(struct rpcrdma_ia *,
struct rpcrdma_ep *,
struct rpcrdma_create_data_internal *);
size_t (*ro_maxpages)(struct rpcrdma_xprt *);
int (*ro_init_mr)(struct rpcrdma_ia *,
- struct rpcrdma_mw *);
- void (*ro_release_mr)(struct rpcrdma_mw *);
+ struct rpcrdma_mr *);
+ void (*ro_release_mr)(struct rpcrdma_mr *mr);
const char *ro_displayname;
const int ro_send_w_inv_ok;
};
@@ -525,6 +516,18 @@ struct rpcrdma_xprt {
#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, rx_xprt)
#define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data)
+static inline const char *
+rpcrdma_addrstr(const struct rpcrdma_xprt *r_xprt)
+{
+ return r_xprt->rx_xprt.address_strings[RPC_DISPLAY_ADDR];
+}
+
+static inline const char *
+rpcrdma_portstr(const struct rpcrdma_xprt *r_xprt)
+{
+ return r_xprt->rx_xprt.address_strings[RPC_DISPLAY_PORT];
+}
+
/* Setting this to 0 ensures interoperability with early servers.
* Setting this to 1 enhances certain unaligned read/write performance.
* Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */
@@ -538,7 +541,7 @@ extern unsigned int xprt_rdma_memreg_strategy;
/*
* Interface Adapter calls - xprtrdma/verbs.c
*/
-int rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr);
+int rpcrdma_ia_open(struct rpcrdma_xprt *xprt);
void rpcrdma_ia_remove(struct rpcrdma_ia *ia);
void rpcrdma_ia_close(struct rpcrdma_ia *);
bool frwr_is_supported(struct rpcrdma_ia *);
@@ -564,22 +567,23 @@ int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_rep *);
* Buffer calls - xprtrdma/verbs.c
*/
struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *);
-struct rpcrdma_rep *rpcrdma_create_rep(struct rpcrdma_xprt *);
void rpcrdma_destroy_req(struct rpcrdma_req *);
+int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt);
int rpcrdma_buffer_create(struct rpcrdma_xprt *);
void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf);
void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc);
-struct rpcrdma_mw *rpcrdma_get_mw(struct rpcrdma_xprt *);
-void rpcrdma_put_mw(struct rpcrdma_xprt *, struct rpcrdma_mw *);
+struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt);
+void rpcrdma_mr_put(struct rpcrdma_mr *mr);
+void rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr);
+void rpcrdma_mr_defer_recovery(struct rpcrdma_mr *mr);
+
struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
void rpcrdma_buffer_put(struct rpcrdma_req *);
void rpcrdma_recv_buffer_get(struct rpcrdma_req *);
void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
-void rpcrdma_defer_mr_recovery(struct rpcrdma_mw *);
-
struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(size_t, enum dma_data_direction,
gfp_t);
bool __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *, struct rpcrdma_regbuf *);
@@ -663,7 +667,7 @@ int xprt_rdma_bc_up(struct svc_serv *, struct net *);
size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *);
int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int);
void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *);
-int rpcrdma_bc_marshal_reply(struct rpc_rqst *);
+int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst);
void xprt_rdma_bc_free_rqst(struct rpc_rqst *);
void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int);
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
@@ -671,3 +675,5 @@ void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int);
extern struct xprt_class xprt_rdma_bc;
#endif /* _LINUX_SUNRPC_XPRT_RDMA_H */
+
+#include <trace/events/rpcrdma.h>
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 6d0cc3b8f932..18803021f242 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -52,6 +52,8 @@
#include "sunrpc.h"
+#define RPC_TCP_READ_CHUNK_SZ (3*512*1024)
+
static void xs_close(struct rpc_xprt *xprt);
static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
struct socket *sock);
@@ -1003,6 +1005,7 @@ static void xs_local_data_receive(struct sock_xprt *transport)
struct sock *sk;
int err;
+restart:
mutex_lock(&transport->recv_mutex);
sk = transport->inet;
if (sk == NULL)
@@ -1016,6 +1019,11 @@ static void xs_local_data_receive(struct sock_xprt *transport)
}
if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
break;
+ if (need_resched()) {
+ mutex_unlock(&transport->recv_mutex);
+ cond_resched();
+ goto restart;
+ }
}
out:
mutex_unlock(&transport->recv_mutex);
@@ -1094,6 +1102,7 @@ static void xs_udp_data_receive(struct sock_xprt *transport)
struct sock *sk;
int err;
+restart:
mutex_lock(&transport->recv_mutex);
sk = transport->inet;
if (sk == NULL)
@@ -1107,6 +1116,11 @@ static void xs_udp_data_receive(struct sock_xprt *transport)
}
if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
break;
+ if (need_resched()) {
+ mutex_unlock(&transport->recv_mutex);
+ cond_resched();
+ goto restart;
+ }
}
out:
mutex_unlock(&transport->recv_mutex);
@@ -1479,6 +1493,7 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns
.offset = offset,
.count = len,
};
+ size_t ret;
dprintk("RPC: xs_tcp_data_recv started\n");
do {
@@ -1507,9 +1522,14 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns
/* Skip over any trailing bytes on short reads */
xs_tcp_read_discard(transport, &desc);
} while (desc.count);
+ ret = len - desc.count;
+ if (ret < rd_desc->count)
+ rd_desc->count -= ret;
+ else
+ rd_desc->count = 0;
trace_xs_tcp_data_recv(transport);
dprintk("RPC: xs_tcp_data_recv done\n");
- return len - desc.count;
+ return ret;
}
static void xs_tcp_data_receive(struct sock_xprt *transport)
@@ -1517,30 +1537,34 @@ static void xs_tcp_data_receive(struct sock_xprt *transport)
struct rpc_xprt *xprt = &transport->xprt;
struct sock *sk;
read_descriptor_t rd_desc = {
- .count = 2*1024*1024,
.arg.data = xprt,
};
unsigned long total = 0;
- int loop;
int read = 0;
+restart:
mutex_lock(&transport->recv_mutex);
sk = transport->inet;
if (sk == NULL)
goto out;
/* We use rd_desc to pass struct xprt to xs_tcp_data_recv */
- for (loop = 0; loop < 64; loop++) {
+ for (;;) {
+ rd_desc.count = RPC_TCP_READ_CHUNK_SZ;
lock_sock(sk);
read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv);
- if (read <= 0) {
+ if (rd_desc.count != 0 || read < 0) {
clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
release_sock(sk);
break;
}
release_sock(sk);
total += read;
- rd_desc.count = 65536;
+ if (need_resched()) {
+ mutex_unlock(&transport->recv_mutex);
+ cond_resched();
+ goto restart;
+ }
}
if (test_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
queue_work(xprtiod_workqueue, &transport->recv_worker);