summaryrefslogtreecommitdiff
path: root/fs/nfs/write.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-04-07 13:51:39 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-04-07 13:51:39 -0700
commit04de788e61a576820baf03ff8accc246ca146cb3 (patch)
treece7398fd61364c78b2a353729f8a92d77d02c076 /fs/nfs/write.c
parentf40f31cadc0ea5dcdd224c8b324add26469c2379 (diff)
parent93ce4af774bc3d8a72ce2271d03241c96383629d (diff)
Merge tag 'nfs-for-5.7-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
Pull NFS client updates from Trond Myklebust: "Highlights include: Stable fixes: - Fix a page leak in nfs_destroy_unlinked_subrequests() - Fix use-after-free issues in nfs_pageio_add_request() - Fix new mount code constant_table array definitions - finish_automount() requires us to hold 2 refs to the mount record Features: - Improve the accuracy of telldir/seekdir by using 64-bit cookies when possible. - Allow one RDMA active connection and several zombie connections to prevent blocking if the remote server is unresponsive. - Limit the size of the NFS access cache by default - Reduce the number of references to credentials that are taken by NFS - pNFS files and flexfiles drivers now support per-layout segment COMMIT lists. - Enable partial-file layout segments in the pNFS/flexfiles driver. - Add support for CB_RECALL_ANY to the pNFS flexfiles layout type - pNFS/flexfiles Report NFS4ERR_DELAY and NFS4ERR_GRACE errors from the DS using the layouterror mechanism. Bugfixes and cleanups: - SUNRPC: Fix krb5p regressions - Don't specify NFS version in "UDP not supported" error - nfsroot: set tcp as the default transport protocol - pnfs: Return valid stateids in nfs_layout_find_inode_by_stateid() - alloc_nfs_open_context() must use the file cred when available - Fix locking when dereferencing the delegation cred - Fix memory leaks in O_DIRECT when nfs_get_lock_context() fails - Various clean ups of the NFS O_DIRECT commit code - Clean up RDMA connect/disconnect - Replace zero-length arrays with C99-style flexible arrays" * tag 'nfs-for-5.7-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (86 commits) NFS: Clean up process of marking inode stale. SUNRPC: Don't start a timer on an already queued rpc task NFS/pnfs: Reference the layout cred in pnfs_prepare_layoutreturn() NFS/pnfs: Fix dereference of layout cred in pnfs_layoutcommit_inode() NFS: Beware when dereferencing the delegation cred NFS: Add a module parameter to set nfs_mountpoint_expiry_timeout NFS: finish_automount() requires us to hold 2 refs to the mount record NFS: Fix a few constant_table array definitions NFS: Try to join page groups before an O_DIRECT retransmission NFS: Refactor nfs_lock_and_join_requests() NFS: Reverse the submission order of requests in __nfs_pageio_add_request() NFS: Clean up nfs_lock_and_join_requests() NFS: Remove the redundant function nfs_pgio_has_mirroring() NFS: Fix memory leaks in nfs_pageio_stop_mirroring() NFS: Fix a request reference leak in nfs_direct_write_clear_reqs() NFS: Fix use-after-free issues in nfs_pageio_add_request() NFS: Fix races nfs_page_group_destroy() vs nfs_destroy_unlinked_subrequests() NFS: Fix a page leak in nfs_destroy_unlinked_subrequests() NFS: Remove unused FLUSH_SYNC support in nfs_initiate_pgio() pNFS/flexfiles: Specify the layout segment range in LAYOUTGET ...
Diffstat (limited to 'fs/nfs/write.c')
-rw-r--r--fs/nfs/write.c276
1 files changed, 134 insertions, 142 deletions
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index c478b772cc49..df4b87c30ac9 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -149,6 +149,31 @@ static void nfs_io_completion_put(struct nfs_io_completion *ioc)
kref_put(&ioc->refcount, nfs_io_completion_release);
}
+static void
+nfs_page_set_inode_ref(struct nfs_page *req, struct inode *inode)
+{
+ if (!test_and_set_bit(PG_INODE_REF, &req->wb_flags)) {
+ kref_get(&req->wb_kref);
+ atomic_long_inc(&NFS_I(inode)->nrequests);
+ }
+}
+
+static int
+nfs_cancel_remove_inode(struct nfs_page *req, struct inode *inode)
+{
+ int ret;
+
+ if (!test_bit(PG_REMOVE, &req->wb_flags))
+ return 0;
+ ret = nfs_page_group_lock(req);
+ if (ret)
+ return ret;
+ if (test_and_clear_bit(PG_REMOVE, &req->wb_flags))
+ nfs_page_set_inode_ref(req, inode);
+ nfs_page_group_unlock(req);
+ return 0;
+}
+
static struct nfs_page *
nfs_page_private_request(struct page *page)
{
@@ -218,6 +243,36 @@ static struct nfs_page *nfs_page_find_head_request(struct page *page)
return req;
}
+static struct nfs_page *nfs_find_and_lock_page_request(struct page *page)
+{
+ struct inode *inode = page_file_mapping(page)->host;
+ struct nfs_page *req, *head;
+ int ret;
+
+ for (;;) {
+ req = nfs_page_find_head_request(page);
+ if (!req)
+ return req;
+ head = nfs_page_group_lock_head(req);
+ if (head != req)
+ nfs_release_request(req);
+ if (IS_ERR(head))
+ return head;
+ ret = nfs_cancel_remove_inode(head, inode);
+ if (ret < 0) {
+ nfs_unlock_and_release_request(head);
+ return ERR_PTR(ret);
+ }
+ /* Ensure that nobody removed the request before we locked it */
+ if (head == nfs_page_private_request(page))
+ break;
+ if (PageSwapCache(page))
+ break;
+ nfs_unlock_and_release_request(head);
+ }
+ return head;
+}
+
/* Adjust the file length if we're writing beyond the end */
static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count)
{
@@ -380,34 +435,6 @@ static void nfs_end_page_writeback(struct nfs_page *req)
}
/*
- * nfs_unroll_locks_and_wait - unlock all newly locked reqs and wait on @req
- *
- * this is a helper function for nfs_lock_and_join_requests
- *
- * @inode - inode associated with request page group, must be holding inode lock
- * @head - head request of page group, must be holding head lock
- * @req - request that couldn't lock and needs to wait on the req bit lock
- *
- * NOTE: this must be called holding page_group bit lock
- * which will be released before returning.
- *
- * returns 0 on success, < 0 on error.
- */
-static void
-nfs_unroll_locks(struct inode *inode, struct nfs_page *head,
- struct nfs_page *req)
-{
- struct nfs_page *tmp;
-
- /* relinquish all the locks successfully grabbed this run */
- for (tmp = head->wb_this_page ; tmp != req; tmp = tmp->wb_this_page) {
- if (!kref_read(&tmp->wb_kref))
- continue;
- nfs_unlock_and_release_request(tmp);
- }
-}
-
-/*
* nfs_destroy_unlinked_subrequests - destroy recently unlinked subrequests
*
* @destroy_list - request list (using wb_this_page) terminated by @old_head
@@ -428,22 +455,29 @@ nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list,
destroy_list = (subreq->wb_this_page == old_head) ?
NULL : subreq->wb_this_page;
+ /* Note: lock subreq in order to change subreq->wb_head */
+ nfs_page_set_headlock(subreq);
WARN_ON_ONCE(old_head != subreq->wb_head);
/* make sure old group is not used */
subreq->wb_this_page = subreq;
+ subreq->wb_head = subreq;
clear_bit(PG_REMOVE, &subreq->wb_flags);
/* Note: races with nfs_page_group_destroy() */
if (!kref_read(&subreq->wb_kref)) {
/* Check if we raced with nfs_page_group_destroy() */
- if (test_and_clear_bit(PG_TEARDOWN, &subreq->wb_flags))
+ if (test_and_clear_bit(PG_TEARDOWN, &subreq->wb_flags)) {
+ nfs_page_clear_headlock(subreq);
nfs_free_request(subreq);
+ } else
+ nfs_page_clear_headlock(subreq);
continue;
}
+ nfs_page_clear_headlock(subreq);
- subreq->wb_head = subreq;
+ nfs_release_request(old_head);
if (test_and_clear_bit(PG_INODE_REF, &subreq->wb_flags)) {
nfs_release_request(subreq);
@@ -457,105 +491,43 @@ nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list,
}
/*
- * nfs_lock_and_join_requests - join all subreqs to the head req and return
- * a locked reference, cancelling any pending
- * operations for this page.
- *
- * @page - the page used to lookup the "page group" of nfs_page structures
+ * nfs_join_page_group - destroy subrequests of the head req
+ * @head: the page used to lookup the "page group" of nfs_page structures
+ * @inode: Inode to which the request belongs.
*
* This function joins all sub requests to the head request by first
* locking all requests in the group, cancelling any pending operations
* and finally updating the head request to cover the whole range covered by
* the (former) group. All subrequests are removed from any write or commit
* lists, unlinked from the group and destroyed.
- *
- * Returns a locked, referenced pointer to the head request - which after
- * this call is guaranteed to be the only request associated with the page.
- * Returns NULL if no requests are found for @page, or a ERR_PTR if an
- * error was encountered.
*/
-static struct nfs_page *
-nfs_lock_and_join_requests(struct page *page)
+void
+nfs_join_page_group(struct nfs_page *head, struct inode *inode)
{
- struct inode *inode = page_file_mapping(page)->host;
- struct nfs_page *head, *subreq;
+ struct nfs_page *subreq;
struct nfs_page *destroy_list = NULL;
- unsigned int total_bytes;
- int ret;
+ unsigned int pgbase, off, bytes;
-try_again:
- /*
- * A reference is taken only on the head request which acts as a
- * reference to the whole page group - the group will not be destroyed
- * until the head reference is released.
- */
- head = nfs_page_find_head_request(page);
- if (!head)
- return NULL;
-
- /* lock the page head first in order to avoid an ABBA inefficiency */
- if (!nfs_lock_request(head)) {
- ret = nfs_wait_on_request(head);
- nfs_release_request(head);
- if (ret < 0)
- return ERR_PTR(ret);
- goto try_again;
- }
-
- /* Ensure that nobody removed the request before we locked it */
- if (head != nfs_page_private_request(page) && !PageSwapCache(page)) {
- nfs_unlock_and_release_request(head);
- goto try_again;
- }
-
- ret = nfs_page_group_lock(head);
- if (ret < 0)
- goto release_request;
-
- /* lock each request in the page group */
- total_bytes = head->wb_bytes;
+ pgbase = head->wb_pgbase;
+ bytes = head->wb_bytes;
+ off = head->wb_offset;
for (subreq = head->wb_this_page; subreq != head;
subreq = subreq->wb_this_page) {
-
- if (!kref_get_unless_zero(&subreq->wb_kref)) {
- if (subreq->wb_offset == head->wb_offset + total_bytes)
- total_bytes += subreq->wb_bytes;
- continue;
- }
-
- while (!nfs_lock_request(subreq)) {
- /*
- * Unlock page to allow nfs_page_group_sync_on_bit()
- * to succeed
- */
- nfs_page_group_unlock(head);
- ret = nfs_wait_on_request(subreq);
- if (!ret)
- ret = nfs_page_group_lock(head);
- if (ret < 0) {
- nfs_unroll_locks(inode, head, subreq);
- nfs_release_request(subreq);
- goto release_request;
- }
- }
- /*
- * Subrequests are always contiguous, non overlapping
- * and in order - but may be repeated (mirrored writes).
- */
- if (subreq->wb_offset == (head->wb_offset + total_bytes)) {
- /* keep track of how many bytes this group covers */
- total_bytes += subreq->wb_bytes;
- } else if (WARN_ON_ONCE(subreq->wb_offset < head->wb_offset ||
- ((subreq->wb_offset + subreq->wb_bytes) >
- (head->wb_offset + total_bytes)))) {
- nfs_page_group_unlock(head);
- nfs_unroll_locks(inode, head, subreq);
- nfs_unlock_and_release_request(subreq);
- ret = -EIO;
- goto release_request;
+ /* Subrequests should always form a contiguous range */
+ if (pgbase > subreq->wb_pgbase) {
+ off -= pgbase - subreq->wb_pgbase;
+ bytes += pgbase - subreq->wb_pgbase;
+ pgbase = subreq->wb_pgbase;
}
+ bytes = max(subreq->wb_pgbase + subreq->wb_bytes
+ - pgbase, bytes);
}
+ /* Set the head request's range to cover the former page group */
+ head->wb_pgbase = pgbase;
+ head->wb_bytes = bytes;
+ head->wb_offset = off;
+
/* Now that all requests are locked, make sure they aren't on any list.
* Commit list removal accounting is done after locks are dropped */
subreq = head;
@@ -569,36 +541,52 @@ try_again:
/* destroy list will be terminated by head */
destroy_list = head->wb_this_page;
head->wb_this_page = head;
-
- /* change head request to cover whole range that
- * the former page group covered */
- head->wb_bytes = total_bytes;
}
- /* Postpone destruction of this request */
- if (test_and_clear_bit(PG_REMOVE, &head->wb_flags)) {
- set_bit(PG_INODE_REF, &head->wb_flags);
- kref_get(&head->wb_kref);
- atomic_long_inc(&NFS_I(inode)->nrequests);
- }
+ nfs_destroy_unlinked_subrequests(destroy_list, head, inode);
+}
- nfs_page_group_unlock(head);
+/*
+ * nfs_lock_and_join_requests - join all subreqs to the head req
+ * @page: the page used to lookup the "page group" of nfs_page structures
+ *
+ * This function joins all sub requests to the head request by first
+ * locking all requests in the group, cancelling any pending operations
+ * and finally updating the head request to cover the whole range covered by
+ * the (former) group. All subrequests are removed from any write or commit
+ * lists, unlinked from the group and destroyed.
+ *
+ * Returns a locked, referenced pointer to the head request - which after
+ * this call is guaranteed to be the only request associated with the page.
+ * Returns NULL if no requests are found for @page, or a ERR_PTR if an
+ * error was encountered.
+ */
+static struct nfs_page *
+nfs_lock_and_join_requests(struct page *page)
+{
+ struct inode *inode = page_file_mapping(page)->host;
+ struct nfs_page *head;
+ int ret;
- nfs_destroy_unlinked_subrequests(destroy_list, head, inode);
+ /*
+ * A reference is taken only on the head request which acts as a
+ * reference to the whole page group - the group will not be destroyed
+ * until the head reference is released.
+ */
+ head = nfs_find_and_lock_page_request(page);
+ if (IS_ERR_OR_NULL(head))
+ return head;
- /* Did we lose a race with nfs_inode_remove_request()? */
- if (!(PagePrivate(page) || PageSwapCache(page))) {
+ /* lock each request in the page group */
+ ret = nfs_page_group_lock_subrequests(head);
+ if (ret < 0) {
nfs_unlock_and_release_request(head);
- return NULL;
+ return ERR_PTR(ret);
}
- /* still holds ref on head from nfs_page_find_head_request
- * and still has lock on head from lock loop */
- return head;
+ nfs_join_page_group(head, inode);
-release_request:
- nfs_unlock_and_release_request(head);
- return ERR_PTR(ret);
+ return head;
}
static void nfs_write_error(struct nfs_page *req, int error)
@@ -1707,7 +1695,7 @@ int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data,
.callback_ops = call_ops,
.callback_data = data,
.workqueue = nfsiod_workqueue,
- .flags = RPC_TASK_ASYNC | flags,
+ .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF | flags,
.priority = priority,
};
/* Set up the initial task struct. */
@@ -1746,14 +1734,19 @@ void nfs_init_commit(struct nfs_commit_data *data,
struct pnfs_layout_segment *lseg,
struct nfs_commit_info *cinfo)
{
- struct nfs_page *first = nfs_list_entry(head->next);
- struct nfs_open_context *ctx = nfs_req_openctx(first);
- struct inode *inode = d_inode(ctx->dentry);
+ struct nfs_page *first;
+ struct nfs_open_context *ctx;
+ struct inode *inode;
/* Set up the RPC argument and reply structs
* NB: take care not to mess about with data->commit et al. */
- list_splice_init(head, &data->pages);
+ if (head)
+ list_splice_init(head, &data->pages);
+
+ first = nfs_list_entry(data->pages.next);
+ ctx = nfs_req_openctx(first);
+ inode = d_inode(ctx->dentry);
data->inode = inode;
data->cred = ctx->cred;
@@ -1869,8 +1862,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
/* Okay, COMMIT succeeded, apparently. Check the verifier
* returned by the server against all stored verfs. */
- if (verf->committed > NFS_UNSTABLE &&
- !nfs_write_verifier_cmp(&req->wb_verf, &verf->verifier)) {
+ if (nfs_write_match_verf(verf, req)) {
/* We have a match */
if (req->wb_page)
nfs_inode_remove_request(req);