From 1549210fcc17e9ae20c09ac8cd4c48a8dfd431bd Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Jun 2012 09:16:47 -0400 Subject: NFSv4: Fix an Oops in the open recovery code The open recovery code does not need to request a new value for the mdsthreshold, and so does not allocate a struct nfs4_threshold. The problem is that encode_getfattr_open() will still request an mdsthreshold, and so we end up Oopsing in decode_attr_mdsthreshold. This patch fixes encode_getfattr_open so that it doesn't request an mdsthreshold when the caller isn't asking for one. It also fixes decode_attr_mdsthreshold so that it errors if the server returns an mdsthreshold that we didn't ask for (instead of Oopsing). Signed-off-by: Trond Myklebust Cc: Andy Adamson --- fs/nfs/nfs4_fs.h | 2 +- fs/nfs/nfs4proc.c | 22 +++++++++++++++++++++- fs/nfs/nfs4xdr.c | 12 ++++++++---- 3 files changed, 30 insertions(+), 6 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index c6827f93ab57..cc5900ac61b5 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -295,7 +295,7 @@ is_ds_client(struct nfs_client *clp) extern const struct nfs4_minor_version_ops *nfs_v4_minor_ops[]; -extern const u32 nfs4_fattr_bitmap[2]; +extern const u32 nfs4_fattr_bitmap[3]; extern const u32 nfs4_statfs_bitmap[2]; extern const u32 nfs4_pathconf_bitmap[2]; extern const u32 nfs4_fsinfo_bitmap[3]; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d48dbefa0e71..ad1515521a6a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -116,7 +116,7 @@ static int nfs4_map_errors(int err) /* * This is our standard bitmap for GETATTR requests. */ -const u32 nfs4_fattr_bitmap[2] = { +const u32 nfs4_fattr_bitmap[3] = { FATTR4_WORD0_TYPE | FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE @@ -133,6 +133,24 @@ const u32 nfs4_fattr_bitmap[2] = { | FATTR4_WORD1_TIME_MODIFY }; +static const u32 nfs4_pnfs_open_bitmap[3] = { + FATTR4_WORD0_TYPE + | FATTR4_WORD0_CHANGE + | FATTR4_WORD0_SIZE + | FATTR4_WORD0_FSID + | FATTR4_WORD0_FILEID, + FATTR4_WORD1_MODE + | FATTR4_WORD1_NUMLINKS + | FATTR4_WORD1_OWNER + | FATTR4_WORD1_OWNER_GROUP + | FATTR4_WORD1_RAWDEV + | FATTR4_WORD1_SPACE_USED + | FATTR4_WORD1_TIME_ACCESS + | FATTR4_WORD1_TIME_METADATA + | FATTR4_WORD1_TIME_MODIFY, + FATTR4_WORD2_MDSTHRESHOLD +}; + const u32 nfs4_statfs_bitmap[2] = { FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE @@ -844,6 +862,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, p->o_arg.name = &dentry->d_name; p->o_arg.server = server; p->o_arg.bitmask = server->attr_bitmask; + p->o_arg.open_bitmap = &nfs4_fattr_bitmap[0]; p->o_arg.claim = NFS4_OPEN_CLAIM_NULL; if (attrs != NULL && attrs->ia_valid != 0) { __be32 verf[2]; @@ -1820,6 +1839,7 @@ static int _nfs4_do_open(struct inode *dir, opendata->f_attr.mdsthreshold = pnfs_mdsthreshold_alloc(); if (!opendata->f_attr.mdsthreshold) goto err_opendata_put; + opendata->o_arg.open_bitmap = &nfs4_pnfs_open_bitmap[0]; } if (dentry->d_inode != NULL) opendata->state = nfs4_get_open_state(dentry->d_inode, sp); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index ee4a74db95d0..9ca1428da9d9 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1198,12 +1198,13 @@ static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct c } static void encode_getfattr_open(struct xdr_stream *xdr, const u32 *bitmask, + const u32 *open_bitmap, struct compound_hdr *hdr) { encode_getattr_three(xdr, - bitmask[0] & nfs4_fattr_bitmap[0], - bitmask[1] & nfs4_fattr_bitmap[1], - bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD, + bitmask[0] & open_bitmap[0], + bitmask[1] & open_bitmap[1], + bitmask[2] & open_bitmap[2], hdr); } @@ -2221,7 +2222,7 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr, encode_putfh(xdr, args->fh, &hdr); encode_open(xdr, args, &hdr); encode_getfh(xdr, &hdr); - encode_getfattr_open(xdr, args->bitmask, &hdr); + encode_getfattr_open(xdr, args->bitmask, args->open_bitmap, &hdr); encode_nops(&hdr); } @@ -4360,6 +4361,9 @@ static int decode_attr_mdsthreshold(struct xdr_stream *xdr, if (unlikely(bitmap[2] & (FATTR4_WORD2_MDSTHRESHOLD - 1U))) return -EIO; if (likely(bitmap[2] & FATTR4_WORD2_MDSTHRESHOLD)) { + /* Did the server return an unrequested attribute? */ + if (unlikely(res == NULL)) + return -EREMOTEIO; p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) goto out_overflow; -- cgit From 029c53473727f21c1dd73237e8d630a6f007a2fe Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Jun 2012 09:35:44 -0400 Subject: NFSv4: Fix up decode_attr_mdsthreshold Fix an incorrect use of 'likely()'. The FATTR4_WORD2_MDSTHRESHOLD bit is only expected in NFSv4.1 OPEN calls, and so is actually rather _unlikely_. decode_attr_mdsthreshold needs to clear FATTR4_WORD2_MDSTHRESHOLD from the attribute bitmap after it has decoded the data. Signed-off-by: Trond Myklebust Cc: Andy Adamson --- fs/nfs/nfs4xdr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 9ca1428da9d9..18fae29b0301 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4360,7 +4360,7 @@ static int decode_attr_mdsthreshold(struct xdr_stream *xdr, if (unlikely(bitmap[2] & (FATTR4_WORD2_MDSTHRESHOLD - 1U))) return -EIO; - if (likely(bitmap[2] & FATTR4_WORD2_MDSTHRESHOLD)) { + if (bitmap[2] & FATTR4_WORD2_MDSTHRESHOLD) { /* Did the server return an unrequested attribute? */ if (unlikely(res == NULL)) return -EREMOTEIO; @@ -4376,6 +4376,7 @@ static int decode_attr_mdsthreshold(struct xdr_stream *xdr, __func__); status = decode_first_threshold_item4(xdr, res); + bitmap[2] &= ~FATTR4_WORD2_MDSTHRESHOLD; } return status; out_overflow: -- cgit From 08106ac7c892cad769c5026cb9dd877a39aed603 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Jun 2012 10:08:24 -0400 Subject: NFSv4.1: Convert a trivial printk into a dprintk There is no need to bug the user about the server returning an error on destroy_session. The error will be handled by the state manager, without any need for further input from anyone else. So convert that printk into a debugging dprintk. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ad1515521a6a..7c218fd1ec21 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5766,8 +5766,7 @@ int nfs4_proc_destroy_session(struct nfs4_session *session, status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); if (status) - printk(KERN_WARNING - "NFS: Got error %d from the server on DESTROY_SESSION. " + dprintk("NFS: Got error %d from the server on DESTROY_SESSION. " "Session has been destroyed regardless...\n", status); dprintk("<-- nfs4_proc_destroy_session\n"); -- cgit From bda197f5d71d56b95a84c0ccbcb8ce2691106cca Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Jun 2012 10:22:14 -0400 Subject: NFSv4.1: Ensure we clear session state flags after a session creation Both nfs4_reset_session and nfs41_init_clientid need to clear all the session related state flags on success. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index c679b9ecef63..f38300e9f171 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -244,6 +244,16 @@ static int nfs4_begin_drain_session(struct nfs_client *clp) return nfs4_wait_on_slot_tbl(&ses->fc_slot_table); } +static void nfs41_finish_session_reset(struct nfs_client *clp) +{ + clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); + clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); + /* create_session negotiated new slot table */ + clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state); + clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state); + nfs41_setup_state_renewal(clp); +} + int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) { int status; @@ -259,8 +269,7 @@ do_confirm: status = nfs4_proc_create_session(clp, cred); if (status != 0) goto out; - clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); - nfs41_setup_state_renewal(clp); + nfs41_finish_session_reset(clp); nfs_mark_client_ready(clp, NFS_CS_READY); out: return status; @@ -1772,16 +1781,9 @@ static int nfs4_reset_session(struct nfs_client *clp) status = nfs4_handle_reclaim_lease_error(clp, status); goto out; } - clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); - /* create_session negotiated new slot table */ - clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state); - clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state); + nfs41_finish_session_reset(clp); dprintk("%s: session reset was successful for server %s!\n", __func__, clp->cl_hostname); - - /* Let the state manager reestablish state */ - if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) - nfs41_setup_state_renewal(clp); out: if (cred) put_rpccred(cred); -- cgit From cdf66442fab82916fe38f928b4f91815195a294c Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Tue, 5 Jun 2012 14:59:54 -0400 Subject: NFS4: Set parsed mount data version to 4 This patch only affects mounting through "-t nfs4" since it doesn't set up an nfs version to use in the mount data. The nfs client was trying to mount using NFS v0, causing either a BUG() or a protocol not supported message. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index ff656c022684..bdd673141e9e 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2637,6 +2637,8 @@ static int nfs4_validate_mount_data(void *options, if (data == NULL) goto out_no_data; + args->version = 4; + switch (data->version) { case 1: if (data->host_addrlen > sizeof(args->nfs_server.address)) -- cgit From 9bce008bae8b57bc7b007bcc2071d1247a527120 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Jun 2012 18:32:03 -0400 Subject: NFS: Fix a commit bug The new commit code fails to copy the verifier into the wb_verf field of _all_ the nfs_page structures; it only copies it into the first entry. The consequence is that most requests end up failing to match in nfs_commit_release. Fix is to copy the verifier into the req->wb_verf field in nfs_write_completion. Signed-off-by: Trond Myklebust Cc: Fred Isaman --- fs/nfs/direct.c | 4 ++-- fs/nfs/write.c | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 23d170bc44f4..b5385a7efd56 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -710,12 +710,12 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) bit = NFS_IOHDR_NEED_RESCHED; else if (dreq->flags == 0) { - memcpy(&dreq->verf, &req->wb_verf, + memcpy(&dreq->verf, hdr->verf, sizeof(dreq->verf)); bit = NFS_IOHDR_NEED_COMMIT; dreq->flags = NFS_ODIRECT_DO_COMMIT; } else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) { - if (memcmp(&dreq->verf, &req->wb_verf, sizeof(dreq->verf))) { + if (memcmp(&dreq->verf, hdr->verf, sizeof(dreq->verf))) { dreq->flags = NFS_ODIRECT_RESCHED_WRITES; bit = NFS_IOHDR_NEED_RESCHED; } else diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e6fe3d69d14c..4d6861c0dc14 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -80,6 +80,7 @@ struct nfs_write_header *nfs_writehdr_alloc(void) INIT_LIST_HEAD(&hdr->rpc_list); spin_lock_init(&hdr->lock); atomic_set(&hdr->refcnt, 0); + hdr->verf = &p->verf; } return p; } @@ -619,6 +620,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) goto next; } if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) { + memcpy(&req->wb_verf, hdr->verf, sizeof(req->wb_verf)); nfs_mark_request_commit(req, hdr->lseg, &cinfo); goto next; } @@ -1255,15 +1257,14 @@ static void nfs_writeback_release_common(void *calldata) struct nfs_write_data *data = calldata; struct nfs_pgio_header *hdr = data->header; int status = data->task.tk_status; - struct nfs_page *req = hdr->req; if ((status >= 0) && nfs_write_need_commit(data)) { spin_lock(&hdr->lock); if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) ; /* Do nothing */ else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) - memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); - else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf))) + memcpy(hdr->verf, &data->verf, sizeof(*hdr->verf)); + else if (memcmp(hdr->verf, &data->verf, sizeof(*hdr->verf))) set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags); spin_unlock(&hdr->lock); } -- cgit From f25efd851cc8c0f63d74334bc784a437f4df8ee4 Mon Sep 17 00:00:00 2001 From: Steve Dickson Date: Wed, 6 Jun 2012 14:12:07 -0400 Subject: NFS: Map minor mismatch error to protocol not support error. Sservers that only have NFSv4.1 support the NFS4ERR_MINOR_VERS_MISMATCH error is return on v4.0 mounts. Mapping that error to EPROTONOSUPPORT will cause the mount to back off to v3 instead of failing. Signed-off-by: Steve Dickson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 7c218fd1ec21..bfc919fd3f06 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -105,6 +105,8 @@ static int nfs4_map_errors(int err) return -EINVAL; case -NFS4ERR_SHARE_DENIED: return -EACCES; + case -NFS4ERR_MINOR_VERS_MISMATCH: + return -EPROTONOSUPPORT; default: dprintk("%s could not handle NFSv4 error %d\n", __func__, -err); -- cgit From f07936f2c446bd5310e669c8e6eab3f812ea665a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 7 Jun 2012 10:21:03 -0400 Subject: NFS: Remove incorrect BUG_ON in nfs_found_client It is perfectly valid for nfs_get_client() to return a nfs_client that is in the process of setting up the NFSv4.1 session. Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 7d108753af81..17ba6b995659 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -544,8 +544,6 @@ nfs_found_client(const struct nfs_client_initdata *cl_init, smp_rmb(); - BUG_ON(clp->cl_cons_state != NFS_CS_READY); - dprintk("<-- %s found nfs_client %p for %s\n", __func__, clp, cl_init->hostname ?: ""); return clp; -- cgit From fb47ddc9d5ded3d202335ea29743b9242d54eb5a Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Thu, 7 Jun 2012 11:42:12 -0400 Subject: NFS4: Fix open bug when pnfs module blacklisted Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 29fd23c0efdc..64f90d845f6a 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -365,7 +365,7 @@ static inline bool pnfs_use_threshold(struct nfs4_threshold **dst, struct nfs4_threshold *src, struct nfs_server *nfss) { - return (dst && src && src->bm != 0 && + return (dst && src && src->bm != 0 && nfss->pnfs_curr_ld && nfss->pnfs_curr_ld->id == src->l_type); } -- cgit From 02c67525cf325131aa06c6b0c6fd457bd57161e3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 7 Jun 2012 13:45:53 -0400 Subject: NFSv4.1: Convert another trivial printk into a dprintk Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index bfc919fd3f06..5881f2cc5d81 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5297,7 +5297,7 @@ static int _nfs4_proc_destroy_clientid(struct nfs_client *clp, status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); if (status) - pr_warn("NFS: Got error %d from the server %s on " + dprintk("NFS: Got error %d from the server %s on " "DESTROY_CLIENTID.", status, clp->cl_hostname); return status; } -- cgit From 2d0dbc6ae8a5194aaecb9cfffb9053f38fce8b86 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 8 Jun 2012 10:58:09 -0400 Subject: NFSv4: Fix unnecessary delegation returns in nfs4_do_open While nfs4_do_open() expects the fmode argument to be restricted to combinations of FMODE_READ and FMODE_WRITE, both nfs4_atomic_open() and nfs4_proc_create will pass the nfs_open_context->mode, which contains the full fmode_t. This patch ensures that nfs4_do_open strips the other fmode_t bits, fixing a problem in which the nfs4_do_open call would result in an unnecessary delegation return. Reported-by: Fred Isaman Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org --- fs/nfs/nfs4proc.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5881f2cc5d81..f23977e4ac0f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1902,6 +1902,7 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct nfs4_state *res; int status; + fmode &= FMODE_READ|FMODE_WRITE; do { status = _nfs4_do_open(dir, dentry, fmode, flags, sattr, cred, &res, ctx_th); -- cgit From 906369e43c29001c39c7dfed8a01b9dff24ace75 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 8 Jun 2012 16:48:33 -0400 Subject: NFS: fix directio refcount bug on commit This reverts a hunk from commit 04277086577 "NFS: Clean up - Simplify reference counting in fs/nfs/direct.c" The cleanups in that patch affect the write path, but by the time processing hits commit the removed reference has been added back by nfs_scan_commit_list(). Without this reversion, any page that is sent to commit holds on to an unbalanced reference that is never freed. The immediate effect is an imbalance over the wire between OPENs and CLOSEs. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index b5385a7efd56..05099890a929 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -517,9 +517,9 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) nfs_list_remove_request(req); if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) { /* Note the rewrite will go through mds */ - kref_get(&req->wb_kref); nfs_mark_request_commit(req, NULL, &cinfo); - } + } else + nfs_release_request(req); nfs_unlock_and_release_request(req); } -- cgit From c5afc8da5b881633717bfc0510792428aa01fa3f Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Fri, 8 Jun 2012 11:32:56 -0400 Subject: NFS: Use the NFS_DEFAULT_VERSION for v2 and v3 mounts Older versions of nfs utils don't always pass a "vers=" mount option for NFS. This chould lead to attempts at using NFS v0 due to a zeroed out nfs_parsed_mount_data struct. I solve this by setting the default NFS version to NFS_DEFAULT_VERSION in the v2 and v3 cases (v4 has already been taken care of by a similar patch). Reported-by: Joerg Roedel Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/nfs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index bdd673141e9e..906f09c7d842 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1867,6 +1867,7 @@ static int nfs23_validate_mount_data(void *options, if (data == NULL) goto out_no_data; + args->version = NFS_DEFAULT_VERSION; switch (data->version) { case 1: data->namlen = 0; -- cgit From 64f9a83665e4f168ff50ce1db9eb175f954048fa Mon Sep 17 00:00:00 2001 From: Sachin Prabhu Date: Thu, 31 May 2012 17:45:00 +0100 Subject: NFSv2: EOF incorrectly set on short read In cases where the server returns fewer bytes then those requested, we can incorrectly set the eof flag for the file. Fixing this allows the request to be retried with updated offset and count arguments. Signed-off-by: Sachin Prabhu Signed-off-by: Trond Myklebust --- fs/nfs/proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index a706b6bcc286..617c7419a08e 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -651,7 +651,7 @@ static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data) /* Emulate the eof flag, which isn't normally needed in NFSv2 * as it is guaranteed to always return the file attributes */ - if (data->args.offset + data->args.count >= data->res.fattr->size) + if (data->args.offset + data->res.count >= data->res.fattr->size) data->res.eof = 1; } return 0; -- cgit From 2669940db8d02147181e338bb6db98394569f31a Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Wed, 30 May 2012 16:12:24 -0400 Subject: NFSv4 do not send an empty SETATTR compound Commit 536e43d12b9517bbbf6114cd1a12be27857a4d7a ATTR_OPEN check can result in an ia_valid with only ATTR_FILE set, and no NFS_VALID_ATTRS attributes to request from the server. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f23977e4ac0f..15fc7e4664ed 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2549,6 +2549,14 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, nfs_fattr_init(fattr); + /* Deal with open(O_TRUNC) */ + if (sattr->ia_valid & ATTR_OPEN) + sattr->ia_valid &= ~(ATTR_MTIME|ATTR_CTIME|ATTR_OPEN); + + /* Optimization: if the end result is no change, don't RPC */ + if ((sattr->ia_valid & ~(ATTR_FILE)) == 0) + return 0; + /* Search for an existing open(O_WRITE) file */ if (sattr->ia_valid & ATTR_FILE) { struct nfs_open_context *ctx; @@ -2560,10 +2568,6 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, } } - /* Deal with open(O_TRUNC) */ - if (sattr->ia_valid & ATTR_OPEN) - sattr->ia_valid &= ~(ATTR_MTIME|ATTR_CTIME|ATTR_OPEN); - status = nfs4_do_setattr(inode, cred, fattr, sattr, state); if (status == 0) nfs_setattr_update_inode(inode, sattr); -- cgit From 0439f31c35d1da0b28988b308ea455e38e6a350d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 12 Jun 2012 10:37:08 +0300 Subject: NFSv4.1: integer overflow in decode_cb_sequence_args() This seems like it could overflow on 32 bits. Use kmalloc_array() which has overflow protection built in. Signed-off-by: Dan Carpenter Signed-off-by: Trond Myklebust --- fs/nfs/callback_xdr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 95bfc243992c..27c2969a9d02 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -455,9 +455,9 @@ static __be32 decode_cb_sequence_args(struct svc_rqst *rqstp, args->csa_nrclists = ntohl(*p++); args->csa_rclists = NULL; if (args->csa_nrclists) { - args->csa_rclists = kmalloc(args->csa_nrclists * - sizeof(*args->csa_rclists), - GFP_KERNEL); + args->csa_rclists = kmalloc_array(args->csa_nrclists, + sizeof(*args->csa_rclists), + GFP_KERNEL); if (unlikely(args->csa_rclists == NULL)) goto out; -- cgit From e216c8c771c9a77f14d7e8b4131846b038f6c145 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 12 Jun 2012 10:37:39 +0300 Subject: NFS: add an endian notation for sparse This is supposed to be a __be32 value. Sparse complains a lot: fs/nfs/callback_xdr.c:699:30: warning: incorrect type in initializer (different base types) fs/nfs/callback_xdr.c:699:30: expected unsigned int [unsigned] status fs/nfs/callback_xdr.c:699:30: got restricted __be32 const [usertype] csr_status fs/nfs/callback_xdr.c:715:9: warning: cast to restricted __be32 fs/nfs/callback_xdr.c:716:16: warning: incorrect type in return expression (different base types) fs/nfs/callback_xdr.c:716:16: expected restricted __be32 fs/nfs/callback_xdr.c:716:16: got unsigned int [unsigned] status Signed-off-by: Dan Carpenter Signed-off-by: Trond Myklebust --- fs/nfs/callback_xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 27c2969a9d02..e64b01d2a338 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -696,7 +696,7 @@ static __be32 encode_cb_sequence_res(struct svc_rqst *rqstp, const struct cb_sequenceres *res) { __be32 *p; - unsigned status = res->csr_status; + __be32 status = res->csr_status; if (unlikely(status != 0)) goto out; -- cgit From 12918b10d59e975fd5241eef03ef9e6d5ea3dcfe Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 1 Jun 2012 13:55:47 +0400 Subject: NFS: hard-code init_net for NFS callback transports In case of destroying mount namespace on child reaper exit, nsproxy is zeroed to the point already. So, dereferencing of it is invalid. This patch hard-code "init_net" for all network namespace references for NFS callback services. This will be fixed with proper NFS callback containerization. Signed-off-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields --- fs/nfs/callback.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 970659daa323..23ff18fe080a 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -17,7 +17,6 @@ #include #include #include -#include #include @@ -107,7 +106,7 @@ nfs4_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt) { int ret; - ret = svc_create_xprt(serv, "tcp", xprt->xprt_net, PF_INET, + ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET, nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); if (ret <= 0) goto out_err; @@ -115,7 +114,7 @@ nfs4_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt) dprintk("NFS: Callback listener port = %u (af %u)\n", nfs_callback_tcpport, PF_INET); - ret = svc_create_xprt(serv, "tcp", xprt->xprt_net, PF_INET6, + ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET6, nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); if (ret > 0) { nfs_callback_tcpport6 = ret; @@ -184,7 +183,7 @@ nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt) * fore channel connection. * Returns the input port (0) and sets the svc_serv bc_xprt on success */ - ret = svc_create_xprt(serv, "tcp-bc", xprt->xprt_net, PF_INET, 0, + ret = svc_create_xprt(serv, "tcp-bc", &init_net, PF_INET, 0, SVC_SOCK_ANONYMOUS); if (ret < 0) { rqstp = ERR_PTR(ret); @@ -254,7 +253,7 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt) char svc_name[12]; int ret = 0; int minorversion_setup; - struct net *net = current->nsproxy->net_ns; + struct net *net = &init_net; mutex_lock(&nfs_callback_mutex); if (cb_info->users++ || cb_info->task != NULL) { @@ -330,7 +329,7 @@ void nfs_callback_down(int minorversion) cb_info->users--; if (cb_info->users == 0 && cb_info->task != NULL) { kthread_stop(cb_info->task); - svc_shutdown_net(cb_info->serv, current->nsproxy->net_ns); + svc_shutdown_net(cb_info->serv, &init_net); svc_exit_thread(cb_info->rqst); cb_info->serv = NULL; cb_info->rqst = NULL; -- cgit From 2a4c8994eeef50796015f8a2005e4a75c1929166 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 14 Jun 2012 13:08:38 -0400 Subject: NFSv4.1: Fix umount when filelayout DS is also the MDS Currently there is a 'chicken and egg' issue when the DS is also the mounted MDS. The nfs_match_client() reference from nfs4_set_ds_client bumps the cl_count, the nfs_client is not freed at umount, and nfs4_deviceid_purge_client is not called to dereference the MDS usage of a deviceid which holds a reference to the DS nfs_client. The result is the umount program returns, but the nfs_client is not freed, and the cl_session hearbeat continues. The MDS (and all other nfs mounts) lose their last nfs_client reference in nfs_free_server when the last nfs_server (fsid) is umounted. The file layout DS lose their last nfs_client reference in destroy_ds when the last deviceid referencing the data server is put and destroy_ds is called. This is triggered by a call to nfs4_deviceid_purge_client which removes references to a pNFS deviceid used by an MDS mount. The fix is to track how many pnfs enabled filesystems are mounted from this server, and then to purge the device id cache once that count reaches zero. Reported-by: Jorge Mora Reported-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 1 - fs/nfs/pnfs.c | 5 +++++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 17ba6b995659..f005b5bebdc7 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -207,7 +207,6 @@ error_0: static void nfs4_shutdown_session(struct nfs_client *clp) { if (nfs4_has_session(clp)) { - nfs4_deviceid_purge_client(clp); nfs4_destroy_session(clp->cl_session); nfs4_destroy_clientid(clp); } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index b8323aa7b543..bdf7e52943c8 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -80,6 +80,9 @@ unset_pnfs_layoutdriver(struct nfs_server *nfss) if (nfss->pnfs_curr_ld) { if (nfss->pnfs_curr_ld->clear_layoutdriver) nfss->pnfs_curr_ld->clear_layoutdriver(nfss); + /* Decrement the MDS count. Purge the deviceid cache if zero */ + if (atomic_dec_and_test(&nfss->nfs_client->cl_mds_count)) + nfs4_deviceid_purge_client(nfss->nfs_client); module_put(nfss->pnfs_curr_ld->owner); } nfss->pnfs_curr_ld = NULL; @@ -127,6 +130,8 @@ set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh, module_put(ld_type->owner); goto out_no_driver; } + /* Bump the MDS count */ + atomic_inc(&server->nfs_client->cl_mds_count); dprintk("%s: pNFS module for %u set\n", __func__, id); return; -- cgit From 0a9c63fae7df086ff5e107273c3cce8642430974 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 15 Jun 2012 13:02:58 -0400 Subject: NFSv4.1: Fix a race in set_pnfs_layoutdriver The call to try_module_get() dereferences ld_type outside the spin locks, which means that it may be pointing to garbage if a module unload was in progress. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index bdf7e52943c8..bbc49caa7a82 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -70,6 +70,10 @@ find_pnfs_driver(u32 id) spin_lock(&pnfs_spinlock); local = find_pnfs_driver_locked(id); + if (local != NULL && !try_module_get(local->owner)) { + dprintk("%s: Could not grab reference on module\n", __func__); + local = NULL; + } spin_unlock(&pnfs_spinlock); return local; } @@ -118,10 +122,6 @@ set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh, goto out_no_driver; } } - if (!try_module_get(ld_type->owner)) { - dprintk("%s: Could not grab reference on module\n", __func__); - goto out_no_driver; - } server->pnfs_curr_ld = ld_type; if (ld_type->set_layoutdriver && ld_type->set_layoutdriver(server, mntfh)) { -- cgit From 5a695da26367f53a368c90435e0b883c12f02791 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 19 Jun 2012 13:39:14 -0400 Subject: NFS: Fix a refcounting issue in O_DIRECT In nfs_direct_write_reschedule(), the requests from nfs_scan_commit_list have a refcount of 2, whereas the operations in nfs_direct_write_completion_ops expect them to have a refcount of 1. This patch adds a call to release the extra references. Signed-off-by: Trond Myklebust Cc: Fred Isaman --- fs/nfs/direct.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/nfs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 3168f6e3d4d4..9a4cbfc85d81 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -490,6 +490,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) dreq->error = -EIO; spin_unlock(cinfo.lock); } + nfs_release_request(req); } nfs_pageio_complete(&desc); -- cgit From 1a0de48ae56b5cdb9a46b3d3a0b578dd7f787f22 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 19 Jun 2012 18:38:56 -0400 Subject: NFS: Initialise commit_info.rpc_out when !defined(CONFIG_NFS_V4) Signed-off-by: Trond Myklebust Cc: Fred Isaman --- fs/nfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index e605d695dbcb..f7296983eba6 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1530,7 +1530,6 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi) nfsi->delegation_state = 0; init_rwsem(&nfsi->rwsem); nfsi->layout = NULL; - atomic_set(&nfsi->commit_info.rpcs_out, 0); #endif } @@ -1545,6 +1544,7 @@ static void init_once(void *foo) INIT_LIST_HEAD(&nfsi->commit_info.list); nfsi->npages = 0; nfsi->commit_info.ncommit = 0; + atomic_set(&nfsi->commit_info.rpcs_out, 0); atomic_set(&nfsi->silly_count, 1); INIT_HLIST_HEAD(&nfsi->silly_list); init_waitqueue_head(&nfsi->waitqueue); -- cgit From b1027439dff844675f6c0df97a1b1d190791a699 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Wed, 20 Jun 2012 14:35:28 -0400 Subject: NFS: Force the legacy idmapper to be single threaded It was initially coded under the assumption that there would only be one request at a time, so use a lock to enforce this requirement.. Signed-off-by: Bryan Schumaker CC: stable@vger.kernel.org [3.4+] Signed-off-by: Trond Myklebust --- fs/nfs/idmap.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index b5b86a05059c..864c51e4b400 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -57,6 +57,11 @@ unsigned int nfs_idmap_cache_timeout = 600; static const struct cred *id_resolver_cache; static struct key_type key_type_id_resolver_legacy; +struct idmap { + struct rpc_pipe *idmap_pipe; + struct key_construction *idmap_key_cons; + struct mutex idmap_mutex; +}; /** * nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields @@ -310,9 +315,11 @@ static ssize_t nfs_idmap_get_key(const char *name, size_t namelen, name, namelen, type, data, data_size, NULL); if (ret < 0) { + mutex_lock(&idmap->idmap_mutex); ret = nfs_idmap_request_key(&key_type_id_resolver_legacy, name, namelen, type, data, data_size, idmap); + mutex_unlock(&idmap->idmap_mutex); } return ret; } @@ -354,11 +361,6 @@ static int nfs_idmap_lookup_id(const char *name, size_t namelen, const char *typ /* idmap classic begins here */ module_param(nfs_idmap_cache_timeout, int, 0644); -struct idmap { - struct rpc_pipe *idmap_pipe; - struct key_construction *idmap_key_cons; -}; - enum { Opt_find_uid, Opt_find_gid, Opt_find_user, Opt_find_group, Opt_find_err }; @@ -469,6 +471,7 @@ nfs_idmap_new(struct nfs_client *clp) return error; } idmap->idmap_pipe = pipe; + mutex_init(&idmap->idmap_mutex); clp->cl_idmap = idmap; return 0; -- cgit From 4035c2487f179327fae87af3477659402b797584 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 8 Jul 2012 10:24:10 -0400 Subject: NFS: Fix list manipulation snafus in fs/nfs/direct.c Fix 2 bugs in nfs_direct_write_reschedule: - The request needs to be removed from the 'reqs' list before it can be added to 'failed'. - Fix an infinite loop if the 'failed' list is non-empty. Reported-by: Julia Lawall Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 9a4cbfc85d81..48253372ab1d 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -484,6 +484,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) list_for_each_entry_safe(req, tmp, &reqs, wb_list) { if (!nfs_pageio_add_request(&desc, req)) { + nfs_list_remove_request(req); nfs_list_add_request(req, &failed); spin_lock(cinfo.lock); dreq->flags = 0; @@ -494,8 +495,11 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) } nfs_pageio_complete(&desc); - while (!list_empty(&failed)) + while (!list_empty(&failed)) { + req = nfs_list_entry(failed.next); + nfs_list_remove_request(req); nfs_unlock_and_release_request(req); + } if (put_dreq(dreq)) nfs_direct_write_complete(dreq, dreq->inode); -- cgit From f1daf666dd7d097653bd38320248c68084b1febf Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 10 Jul 2012 12:04:15 -0400 Subject: NFSv4: Fix an NFSv4 mount regression The helper nfs_fs_mount() will always call nfs4_try_mount with the mount_info->fill_super argument pointing to nfs_fill_super, which is NFSv2/v3 only. Fix is to have nfs4_try_mount replace it with nfs4_fill_super. The regression was introduced by commit c40f8d1d (NFS: Create a common fs_mount() function) Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 906f09c7d842..06228192f64e 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2860,6 +2860,8 @@ static struct dentry *nfs4_try_mount(int flags, const char *dev_name, dfprintk(MOUNT, "--> nfs4_try_mount()\n"); + mount_info->fill_super = nfs4_fill_super; + export_path = data->nfs_server.export_path; data->nfs_server.export_path = "/"; root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, mount_info, -- cgit From 9909d45a8557455ca5f8ee7af0f253debc851f1a Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Fri, 8 Jun 2012 05:29:40 +0300 Subject: pnfs-obj: don't leak objio_state if ore_write/read fails [Bug since 3.2 Kernel] CC: Stable Tree Signed-off-by: Boaz Harrosh --- fs/nfs/objlayout/objio_osd.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index b47277baebab..86d7595aca8f 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -454,7 +454,10 @@ int objio_read_pagelist(struct nfs_read_data *rdata) objios->ios->done = _read_done; dprintk("%s: offset=0x%llx length=0x%x\n", __func__, rdata->args.offset, rdata->args.count); - return ore_read(objios->ios); + ret = ore_read(objios->ios); + if (unlikely(ret)) + objio_free_result(&objios->oir); + return ret; } /* @@ -539,8 +542,10 @@ int objio_write_pagelist(struct nfs_write_data *wdata, int how) dprintk("%s: offset=0x%llx length=0x%x\n", __func__, wdata->args.offset, wdata->args.count); ret = ore_write(objios->ios); - if (unlikely(ret)) + if (unlikely(ret)) { + objio_free_result(&objios->oir); return ret; + } if (objios->sync) _write_done(objios->ios, objios); -- cgit From c999ff68029ebd0f56ccae75444f640f6d5a27d2 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Fri, 8 Jun 2012 02:02:30 +0300 Subject: pnfs-obj: Fix __r4w_get_page when offset is beyond i_size It is very common for the end of the file to be unaligned on stripe size. But since we know it's beyond file's end then the XOR should be preformed with all zeros. Old code used to just read zeros out of the OSD devices, which is a great waist. But what scares me more about this situation is that, we now have pages attached to the file's mapping that are beyond i_size. I don't like the kind of bugs this calls for. Fix both birds, by returning a global zero_page, if offset is beyond i_size. TODO: Change the API to ->__r4w_get_page() so a NULL can be returned without being considered as error, since XOR API treats NULL entries as zero_pages. [Bug since 3.2. Should apply the same way to all Kernels since] CC: Stable Tree Signed-off-by: Boaz Harrosh --- fs/nfs/objlayout/objio_osd.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 86d7595aca8f..f50d3e8d6f22 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -489,8 +489,16 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate) struct nfs_write_data *wdata = objios->oir.rpcdata; struct address_space *mapping = wdata->header->inode->i_mapping; pgoff_t index = offset / PAGE_SIZE; - struct page *page = find_get_page(mapping, index); + struct page *page; + loff_t i_size = i_size_read(wdata->header->inode); + if (offset >= i_size) { + *uptodate = true; + dprintk("%s: g_zero_page index=0x%lx\n", __func__, index); + return ZERO_PAGE(0); + } + + page = find_get_page(mapping, index); if (!page) { page = find_or_create_page(mapping, index, GFP_NOFS); if (unlikely(!page)) { @@ -510,8 +518,10 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate) static void __r4w_put_page(void *priv, struct page *page) { - dprintk("%s: index=0x%lx\n", __func__, page->index); - page_cache_release(page); + dprintk("%s: index=0x%lx\n", __func__, + (page == ZERO_PAGE(0)) ? -1UL : page->index); + if (ZERO_PAGE(0) != page) + page_cache_release(page); return; } -- cgit