From fd2f3a06d30c85a17cf035ebc60c88c2a13a8ece Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Fri, 8 Aug 2014 11:00:53 -0400
Subject: nfs: change nfs_page_group_lock argument

Flip the meaning of the second argument from 'wait' to 'nonblock' to
match related functions. Update all five calls to reflect this change.

Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Reviewed-by: Peng Tao <tao.peng@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/pagelist.c | 11 ++++++-----
 fs/nfs/write.c    |  4 ++--
 2 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index ba491926df5f..7efa61586bc4 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -139,13 +139,14 @@ nfs_iocounter_wait(struct nfs_io_counter *c)
 /*
  * nfs_page_group_lock - lock the head of the page group
  * @req - request in group that is to be locked
+ * @nonblock - if true don't block waiting for lock
  *
  * this lock must be held if modifying the page group list
  *
  * returns result from wait_on_bit_lock: 0 on success, < 0 on error
  */
 int
-nfs_page_group_lock(struct nfs_page *req, bool wait)
+nfs_page_group_lock(struct nfs_page *req, bool nonblock)
 {
 	struct nfs_page *head = req->wb_head;
 	int ret;
@@ -155,7 +156,7 @@ nfs_page_group_lock(struct nfs_page *req, bool wait)
 	do {
 		ret = wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK,
 			TASK_UNINTERRUPTIBLE);
-	} while (wait && ret != 0);
+	} while (!nonblock && ret != 0);
 
 	WARN_ON_ONCE(ret > 0);
 	return ret;
@@ -219,7 +220,7 @@ bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit)
 {
 	bool ret;
 
-	nfs_page_group_lock(req, true);
+	nfs_page_group_lock(req, false);
 	ret = nfs_page_group_sync_on_bit_locked(req, bit);
 	nfs_page_group_unlock(req);
 
@@ -860,7 +861,7 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
 	unsigned int offset, pgbase;
 	int ret;
 
-	ret = nfs_page_group_lock(req, false);
+	ret = nfs_page_group_lock(req, true);
 	if (ret < 0) {
 		desc->pg_error = ret;
 		return 0;
@@ -886,7 +887,7 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
 			if (desc->pg_recoalesce)
 				return 0;
 			/* retry add_request for this subreq */
-			ret = nfs_page_group_lock(req, false);
+			ret = nfs_page_group_lock(req, true);
 			if (ret < 0) {
 				desc->pg_error = ret;
 				return 0;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index e3b5cf28bdc5..27715306f24b 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -241,7 +241,7 @@ static bool nfs_page_group_covers_page(struct nfs_page *req)
 	unsigned int pos = 0;
 	unsigned int len = nfs_page_length(req->wb_page);
 
-	nfs_page_group_lock(req, true);
+	nfs_page_group_lock(req, false);
 
 	do {
 		tmp = nfs_page_group_search_locked(req->wb_head, pos);
@@ -479,7 +479,7 @@ try_again:
 	}
 
 	/* lock each request in the page group */
-	ret = nfs_page_group_lock(head, false);
+	ret = nfs_page_group_lock(head, true);
 	if (ret < 0)
 		return ERR_PTR(ret);
 	subreq = head;
-- 
cgit 


From bc8a309e88a86205fc3e17f06e42a2e56fc6f807 Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Fri, 8 Aug 2014 11:00:54 -0400
Subject: nfs: fix nonblocking calls to nfs_page_group_lock

nfs_page_group_lock was calling wait_on_bit_lock even when told not to
block. Fix by first trying test_and_set_bit, followed by wait_on_bit_lock
if and only if blocking is allowed.  Return -EAGAIN if nonblocking and the
test_and_set of the bit was already locked.

Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Reviewed-by: Peng Tao <tao.peng@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/pagelist.c | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 7efa61586bc4..89d5d433e351 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -143,23 +143,28 @@ nfs_iocounter_wait(struct nfs_io_counter *c)
  *
  * this lock must be held if modifying the page group list
  *
- * returns result from wait_on_bit_lock: 0 on success, < 0 on error
+ * return 0 on success, < 0 on error: -EDELAY if nonblocking or the
+ * result from wait_on_bit_lock
+ *
+ * NOTE: calling with nonblock=false should always have set the
+ *       lock bit (see fs/buffer.c and other uses of wait_on_bit_lock
+ *       with TASK_UNINTERRUPTIBLE), so there is no need to check the result.
  */
 int
 nfs_page_group_lock(struct nfs_page *req, bool nonblock)
 {
 	struct nfs_page *head = req->wb_head;
-	int ret;
 
 	WARN_ON_ONCE(head != head->wb_head);
 
-	do {
-		ret = wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK,
-			TASK_UNINTERRUPTIBLE);
-	} while (!nonblock && ret != 0);
+	if (!test_and_set_bit(PG_HEADLOCK, &head->wb_flags))
+		return 0;
 
-	WARN_ON_ONCE(ret > 0);
-	return ret;
+	if (!nonblock)
+		return wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK,
+				TASK_UNINTERRUPTIBLE);
+
+	return -EAGAIN;
 }
 
 /*
-- 
cgit 


From bfd484a5606d6a0379a0a2f04251b1e5c1f8995c Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Fri, 8 Aug 2014 11:00:55 -0400
Subject: nfs: use blocking page_group_lock in add_request

__nfs_pageio_add_request was calling nfs_page_group_lock nonblocking, but
this can return -EAGAIN which would end up passing -EIO to the application.

There is no reason not to block in this path, so change the two calls to
do so. Also, there is no need to check the return value of
nfs_page_group_lock when nonblock=false, so remove the error handling code.

Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Reviewed-by: Peng Tao <tao.peng@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/pagelist.c | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 89d5d433e351..30c9626f96b0 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -864,13 +864,8 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
 	struct nfs_page *subreq;
 	unsigned int bytes_left = 0;
 	unsigned int offset, pgbase;
-	int ret;
 
-	ret = nfs_page_group_lock(req, true);
-	if (ret < 0) {
-		desc->pg_error = ret;
-		return 0;
-	}
+	nfs_page_group_lock(req, false);
 
 	subreq = req;
 	bytes_left = subreq->wb_bytes;
@@ -892,11 +887,7 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
 			if (desc->pg_recoalesce)
 				return 0;
 			/* retry add_request for this subreq */
-			ret = nfs_page_group_lock(req, true);
-			if (ret < 0) {
-				desc->pg_error = ret;
-				return 0;
-			}
+			nfs_page_group_lock(req, false);
 			continue;
 		}
 
-- 
cgit 


From 94970014c46223cbcdfbfc67b89596a412f9e3dd Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Fri, 8 Aug 2014 11:00:56 -0400
Subject: nfs: fix error handling in lock_and_join_requests

This fixes handling of errors from nfs_page_group_lock in
nfs_lock_and_join_requests.  It now releases the inode lock and the
reference to the head request.

Reported-by: Peng Tao <tao.peng@primarydata.com>
Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Reviewed-by: Peng Tao <tao.peng@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/write.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 27715306f24b..e056f617adf2 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -480,8 +480,11 @@ try_again:
 
 	/* lock each request in the page group */
 	ret = nfs_page_group_lock(head, true);
-	if (ret < 0)
+	if (ret < 0) {
+		spin_unlock(&inode->i_lock);
+		nfs_release_request(head);
 		return ERR_PTR(ret);
+	}
 	subreq = head;
 	do {
 		/*
-- 
cgit 


From 7c3af975257383ece54b83c0505d3e0656cb7daf Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Fri, 8 Aug 2014 11:00:57 -0400
Subject: nfs: don't sleep with inode lock in lock_and_join_requests

This handles the 'nonblock=false' case in nfs_lock_and_join_requests.
If the group is already locked and blocking is allowed, drop the inode lock
and wait for the group lock to be cleared before trying it all again.
This should fix warnings found in peterz's tree (sched/wait branch), where
might_sleep() checks are added to wait.[ch].

Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Reviewed-by: Peng Tao <tao.peng@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/pagelist.c | 17 +++++++++++++++++
 fs/nfs/write.c    | 12 +++++++++++-
 2 files changed, 28 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 30c9626f96b0..4ec67f8d70aa 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -167,6 +167,23 @@ nfs_page_group_lock(struct nfs_page *req, bool nonblock)
 	return -EAGAIN;
 }
 
+/*
+ * nfs_page_group_lock_wait - wait for the lock to clear, but don't grab it
+ * @req - a request in the group
+ *
+ * This is a blocking call to wait for the group lock to be cleared.
+ */
+void
+nfs_page_group_lock_wait(struct nfs_page *req)
+{
+	struct nfs_page *head = req->wb_head;
+
+	WARN_ON_ONCE(head != head->wb_head);
+
+	wait_on_bit(&head->wb_flags, PG_HEADLOCK,
+		TASK_UNINTERRUPTIBLE);
+}
+
 /*
  * nfs_page_group_unlock - unlock the head of the page group
  * @req - request in group that is to be unlocked
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index e056f617adf2..175d5d073ccf 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -478,13 +478,23 @@ try_again:
 		return NULL;
 	}
 
-	/* lock each request in the page group */
+	/* holding inode lock, so always make a non-blocking call to try the
+	 * page group lock */
 	ret = nfs_page_group_lock(head, true);
 	if (ret < 0) {
 		spin_unlock(&inode->i_lock);
+
+		if (!nonblock && ret == -EAGAIN) {
+			nfs_page_group_lock_wait(head);
+			nfs_release_request(head);
+			goto try_again;
+		}
+
 		nfs_release_request(head);
 		return ERR_PTR(ret);
 	}
+
+	/* lock each request in the page group */
 	subreq = head;
 	do {
 		/*
-- 
cgit 


From bba5c1887a925a9945d22217d38d58d8b3ba1043 Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Thu, 14 Aug 2014 17:39:32 -0400
Subject: nfs: disallow duplicate pages in pgio page vectors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adjacent requests that share the same page are allowed, but should only
use one entry in the page vector. This avoids overruning the page
vector - it is sized based on how many bytes there are, not by
request count.

This fixes issues that manifest as "Redzone overwritten" bugs (the
vector overrun) and hangs waiting on page read / write, as it waits on
the same page more than once.

This also adds bounds checking to the page vector with a graceful failure
(WARN_ON_ONCE and pgio error returned to application).

Reported-by: Toralf Förster <toralf.foerster@gmx.de>
Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/pagelist.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 4ec67f8d70aa..a1d1de7b97c5 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -724,10 +724,11 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
 		     struct nfs_pgio_header *hdr)
 {
 	struct nfs_page		*req;
-	struct page		**pages;
+	struct page		**pages,
+				*last_page;
 	struct list_head *head = &desc->pg_list;
 	struct nfs_commit_info cinfo;
-	unsigned int pagecount;
+	unsigned int pagecount, pageused;
 
 	pagecount = nfs_page_array_len(desc->pg_base, desc->pg_count);
 	if (!nfs_pgarray_set(&hdr->page_array, pagecount))
@@ -735,12 +736,23 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
 
 	nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
 	pages = hdr->page_array.pagevec;
+	last_page = NULL;
+	pageused = 0;
 	while (!list_empty(head)) {
 		req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
 		nfs_list_add_request(req, &hdr->pages);
-		*pages++ = req->wb_page;
+
+		if (WARN_ON_ONCE(pageused >= pagecount))
+			return nfs_pgio_error(desc, hdr);
+
+		if (!last_page || last_page != req->wb_page) {
+			*pages++ = last_page = req->wb_page;
+			pageused++;
+		}
 	}
+	if (WARN_ON_ONCE(pageused != pagecount))
+		return nfs_pgio_error(desc, hdr);
 
 	if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
 	    (desc->pg_moreio || nfs_reqs_to_commit(&cinfo)))
-- 
cgit 


From 78270e8fbc2916bfc8305b8f58f33474cce1ec0e Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Thu, 14 Aug 2014 17:39:33 -0400
Subject: nfs: can_coalesce_requests must enforce contiguity
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 6094f83864c1d1296566a282cba05ba613f151ee
"nfs: allow coalescing of subpage requests" got rid of the requirement
that requests cover whole pages, but it made some incorrect assumptions.

It turns out that callers of this interface can map adjacent requests
(by file position as seen by req_offset + req->wb_bytes) to different pages,
even when they could share a page. An example is the direct I/O interface -
iov_iter_get_pages_alloc may return one segment with a partial page filled
and the next segment (which is adjacent in the file position) starts with a
new page.

Reported-by: Toralf Förster <toralf.foerster@gmx.de>
Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/pagelist.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'fs')

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index a1d1de7b97c5..932c6cc27a76 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -823,6 +823,14 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
 			return false;
 		if (req_offset(req) != req_offset(prev) + prev->wb_bytes)
 			return false;
+		if (req->wb_page == prev->wb_page) {
+			if (req->wb_pgbase != prev->wb_pgbase + prev->wb_bytes)
+				return false;
+		} else {
+			if (req->wb_pgbase != 0 ||
+			    prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
+				return false;
+		}
 	}
 	size = pgio->pg_ops->pg_test(pgio, prev, req);
 	WARN_ON_ONCE(size > req->wb_bytes);
-- 
cgit 


From 92a56555bd576c61b27a5cab9f38a33a1e9a1df5 Mon Sep 17 00:00:00 2001
From: David Jeffery <djeffery@redhat.com>
Date: Tue, 5 Aug 2014 11:19:42 -0400
Subject: nfs: Don't busy-wait on SIGKILL in __nfs_iocounter_wait

If a SIGKILL is sent to a task waiting in __nfs_iocounter_wait,
it will busy-wait or soft lockup in its while loop.
nfs_wait_bit_killable won't sleep, and the loop won't exit on
the error return.

Stop the busy-wait by breaking out of the loop when
nfs_wait_bit_killable returns an error.

Signed-off-by: David Jeffery <djeffery@redhat.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/pagelist.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 932c6cc27a76..be7cbce6e4c7 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -116,7 +116,7 @@ __nfs_iocounter_wait(struct nfs_io_counter *c)
 		if (atomic_read(&c->io_count) == 0)
 			break;
 		ret = nfs_wait_bit_killable(&q.key);
-	} while (atomic_read(&c->io_count) != 0);
+	} while (atomic_read(&c->io_count) != 0 && !ret);
 	finish_wait(wq, &q.wait);
 	return ret;
 }
-- 
cgit