From d383e346f97d6bb0d654bb3d63c44ab106d92d29 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Oct 2020 14:40:31 +0100 Subject: afs: Fix afs_launder_page to not clear PG_writeback Fix afs_launder_page() to not clear PG_writeback on the page it is laundering as the flag isn't set in this case. Fixes: 4343d00872e1 ("afs: Get rid of the afs_writeback record") Signed-off-by: David Howells --- fs/afs/write.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'fs/afs/write.c') diff --git a/fs/afs/write.c b/fs/afs/write.c index da12abd6db21..b937ec047ec9 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -396,7 +396,8 @@ static void afs_store_data_success(struct afs_operation *op) op->ctime = op->file[0].scb.status.mtime_client; afs_vnode_commit_status(op, &op->file[0]); if (op->error == 0) { - afs_pages_written_back(vnode, op->store.first, op->store.last); + if (!op->store.laundering) + afs_pages_written_back(vnode, op->store.first, op->store.last); afs_stat_v(vnode, n_stores); atomic_long_add((op->store.last * PAGE_SIZE + op->store.last_to) - (op->store.first * PAGE_SIZE + op->store.first_offset), @@ -415,7 +416,7 @@ static const struct afs_operation_ops afs_store_data_operation = { */ static int afs_store_data(struct address_space *mapping, pgoff_t first, pgoff_t last, - unsigned offset, unsigned to) + unsigned offset, unsigned to, bool laundering) { struct afs_vnode *vnode = AFS_FS_I(mapping->host); struct afs_operation *op; @@ -448,6 +449,7 @@ static int afs_store_data(struct address_space *mapping, op->store.last = last; op->store.first_offset = offset; op->store.last_to = to; + op->store.laundering = laundering; op->mtime = vnode->vfs_inode.i_mtime; op->flags |= AFS_OPERATION_UNINTR; op->ops = &afs_store_data_operation; @@ -601,7 +603,7 @@ no_more: if (end > i_size) to = i_size & ~PAGE_MASK; - ret = afs_store_data(mapping, first, last, offset, to); + ret = afs_store_data(mapping, first, last, offset, to, false); switch (ret) { case 0: ret = count; @@ -921,7 +923,7 @@ int afs_launder_page(struct page *page) trace_afs_page_dirty(vnode, tracepoint_string("launder"), page->index, priv); - ret = afs_store_data(mapping, page->index, page->index, t, f); + ret = afs_store_data(mapping, page->index, page->index, t, f, true); } trace_afs_page_dirty(vnode, tracepoint_string("laundered"), -- cgit From fa04a40b169fcee615afbae97f71a09332993f64 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 21 Oct 2020 13:22:19 +0100 Subject: afs: Fix to take ref on page when PG_private is set Fix afs to take a ref on a page when it sets PG_private on it and to drop the ref when removing the flag. Note that in afs_write_begin(), a lot of the time, PG_private is already set on a page to which we're going to add some data. In such a case, we leave the bit set and mustn't increment the page count. As suggested by Matthew Wilcox, use attach/detach_page_private() where possible. Fixes: 31143d5d515e ("AFS: implement basic file write support") Reported-by: Matthew Wilcox (Oracle) Signed-off-by: David Howells Reviewed-by: Matthew Wilcox (Oracle) --- fs/afs/write.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'fs/afs/write.c') diff --git a/fs/afs/write.c b/fs/afs/write.c index b937ec047ec9..02facb19a0f1 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -151,8 +151,10 @@ try_again: priv |= f; trace_afs_page_dirty(vnode, tracepoint_string("begin"), page->index, priv); - SetPagePrivate(page); - set_page_private(page, priv); + if (PagePrivate(page)) + set_page_private(page, priv); + else + attach_page_private(page, (void *)priv); _leave(" = 0"); return 0; @@ -334,10 +336,9 @@ static void afs_pages_written_back(struct afs_vnode *vnode, ASSERTCMP(pv.nr, ==, count); for (loop = 0; loop < count; loop++) { - priv = page_private(pv.pages[loop]); + priv = (unsigned long)detach_page_private(pv.pages[loop]); trace_afs_page_dirty(vnode, tracepoint_string("clear"), pv.pages[loop]->index, priv); - set_page_private(pv.pages[loop], 0); end_page_writeback(pv.pages[loop]); } first += count; @@ -863,8 +864,10 @@ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf) priv |= 0; /* From */ trace_afs_page_dirty(vnode, tracepoint_string("mkwrite"), vmf->page->index, priv); - SetPagePrivate(vmf->page); - set_page_private(vmf->page, priv); + if (PagePrivate(vmf->page)) + set_page_private(vmf->page, priv); + else + attach_page_private(vmf->page, (void *)priv); file_update_time(file); sb_end_pagefault(inode->i_sb); @@ -926,10 +929,9 @@ int afs_launder_page(struct page *page) ret = afs_store_data(mapping, page->index, page->index, t, f, true); } + priv = (unsigned long)detach_page_private(page); trace_afs_page_dirty(vnode, tracepoint_string("laundered"), page->index, priv); - set_page_private(page, 0); - ClearPagePrivate(page); #ifdef CONFIG_AFS_FSCACHE if (PageFsCache(page)) { -- cgit From 21db2cdc667f744691a407105b7712bc18d74023 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Oct 2020 14:03:03 +0100 Subject: afs: Fix page leak on afs_write_begin() failure Fix the leak of the target page in afs_write_begin() when it fails. Fixes: 15b4650e55e0 ("afs: convert to new aops") Signed-off-by: David Howells cc: Nick Piggin --- fs/afs/write.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) (limited to 'fs/afs/write.c') diff --git a/fs/afs/write.c b/fs/afs/write.c index 02facb19a0f1..7fae9f8b38eb 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -76,7 +76,7 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key *key, */ int afs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata) + struct page **_page, void **fsdata) { struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); struct page *page; @@ -110,9 +110,6 @@ int afs_write_begin(struct file *file, struct address_space *mapping, SetPageUptodate(page); } - /* page won't leak in error case: it eventually gets cleaned off LRU */ - *pagep = page; - try_again: /* See if this page is already partially written in a way that we can * merge the new write with. @@ -155,6 +152,7 @@ try_again: set_page_private(page, priv); else attach_page_private(page, (void *)priv); + *_page = page; _leave(" = 0"); return 0; @@ -164,17 +162,18 @@ try_again: flush_conflicting_write: _debug("flush conflict"); ret = write_one_page(page); - if (ret < 0) { - _leave(" = %d", ret); - return ret; - } + if (ret < 0) + goto error; ret = lock_page_killable(page); - if (ret < 0) { - _leave(" = %d", ret); - return ret; - } + if (ret < 0) + goto error; goto try_again; + +error: + put_page(page); + _leave(" = %d", ret); + return ret; } /* -- cgit From f792e3ac82fe2c6c863e93187eb7ddfccab68fa7 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 26 Oct 2020 14:05:33 +0000 Subject: afs: Fix where page->private is set during write In afs, page->private is set to indicate the dirty region of a page. This is done in afs_write_begin(), but that can't take account of whether the copy into the page actually worked. Fix this by moving the change of page->private into afs_write_end(). Fixes: 4343d00872e1 ("afs: Get rid of the afs_writeback record") Signed-off-by: David Howells --- fs/afs/write.c | 41 ++++++++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 15 deletions(-) (limited to 'fs/afs/write.c') diff --git a/fs/afs/write.c b/fs/afs/write.c index 7fae9f8b38eb..f28d85c38cd8 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -135,23 +135,8 @@ try_again: if (!test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags) && (to < f || from > t)) goto flush_conflicting_write; - if (from < f) - f = from; - if (to > t) - t = to; - } else { - f = from; - t = to; } - priv = (unsigned long)t << AFS_PRIV_SHIFT; - priv |= f; - trace_afs_page_dirty(vnode, tracepoint_string("begin"), - page->index, priv); - if (PagePrivate(page)) - set_page_private(page, priv); - else - attach_page_private(page, (void *)priv); *_page = page; _leave(" = 0"); return 0; @@ -185,6 +170,9 @@ int afs_write_end(struct file *file, struct address_space *mapping, { struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); struct key *key = afs_file_key(file); + unsigned long priv; + unsigned int f, from = pos & (PAGE_SIZE - 1); + unsigned int t, to = from + copied; loff_t i_size, maybe_i_size; int ret; @@ -216,6 +204,29 @@ int afs_write_end(struct file *file, struct address_space *mapping, SetPageUptodate(page); } + if (PagePrivate(page)) { + priv = page_private(page); + f = priv & AFS_PRIV_MAX; + t = priv >> AFS_PRIV_SHIFT; + if (from < f) + f = from; + if (to > t) + t = to; + priv = (unsigned long)t << AFS_PRIV_SHIFT; + priv |= f; + set_page_private(page, priv); + trace_afs_page_dirty(vnode, tracepoint_string("dirty+"), + page->index, priv); + } else { + f = from; + t = to; + priv = (unsigned long)t << AFS_PRIV_SHIFT; + priv |= f; + attach_page_private(page, (void *)priv); + trace_afs_page_dirty(vnode, tracepoint_string("dirty"), + page->index, priv); + } + set_page_dirty(page); if (PageDirty(page)) _debug("dirtied"); -- cgit From 185f0c7073bd5c78f86265f703f5daf1306ab5a7 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 26 Oct 2020 13:22:47 +0000 Subject: afs: Wrap page->private manipulations in inline functions The afs filesystem uses page->private to store the dirty range within a page such that in the event of a conflicting 3rd-party write to the server, we write back just the bits that got changed locally. However, there are a couple of problems with this: (1) I need a bit to note if the page might be mapped so that partial invalidation doesn't shrink the range. (2) There aren't necessarily sufficient bits to store the entire range of data altered (say it's a 32-bit system with 64KiB pages or transparent huge pages are in use). So wrap the accesses in inline functions so that future commits can change how this works. Also move them out of the tracing header into the in-directory header. There's not really any need for them to be in the tracing header. Signed-off-by: David Howells --- fs/afs/write.c | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) (limited to 'fs/afs/write.c') diff --git a/fs/afs/write.c b/fs/afs/write.c index f28d85c38cd8..ea1768b3c0b5 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -117,8 +117,8 @@ try_again: t = f = 0; if (PagePrivate(page)) { priv = page_private(page); - f = priv & AFS_PRIV_MAX; - t = priv >> AFS_PRIV_SHIFT; + f = afs_page_dirty_from(priv); + t = afs_page_dirty_to(priv); ASSERTCMP(f, <=, t); } @@ -206,22 +206,18 @@ int afs_write_end(struct file *file, struct address_space *mapping, if (PagePrivate(page)) { priv = page_private(page); - f = priv & AFS_PRIV_MAX; - t = priv >> AFS_PRIV_SHIFT; + f = afs_page_dirty_from(priv); + t = afs_page_dirty_to(priv); if (from < f) f = from; if (to > t) t = to; - priv = (unsigned long)t << AFS_PRIV_SHIFT; - priv |= f; + priv = afs_page_dirty(f, t); set_page_private(page, priv); trace_afs_page_dirty(vnode, tracepoint_string("dirty+"), page->index, priv); } else { - f = from; - t = to; - priv = (unsigned long)t << AFS_PRIV_SHIFT; - priv |= f; + priv = afs_page_dirty(from, to); attach_page_private(page, (void *)priv); trace_afs_page_dirty(vnode, tracepoint_string("dirty"), page->index, priv); @@ -522,8 +518,8 @@ static int afs_write_back_from_locked_page(struct address_space *mapping, */ start = primary_page->index; priv = page_private(primary_page); - offset = priv & AFS_PRIV_MAX; - to = priv >> AFS_PRIV_SHIFT; + offset = afs_page_dirty_from(priv); + to = afs_page_dirty_to(priv); trace_afs_page_dirty(vnode, tracepoint_string("store"), primary_page->index, priv); @@ -568,8 +564,8 @@ static int afs_write_back_from_locked_page(struct address_space *mapping, } priv = page_private(page); - f = priv & AFS_PRIV_MAX; - t = priv >> AFS_PRIV_SHIFT; + f = afs_page_dirty_from(priv); + t = afs_page_dirty_to(priv); if (f != 0 && !test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags)) { unlock_page(page); @@ -870,8 +866,7 @@ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf) */ wait_on_page_writeback(vmf->page); - priv = (unsigned long)PAGE_SIZE << AFS_PRIV_SHIFT; /* To */ - priv |= 0; /* From */ + priv = afs_page_dirty(0, PAGE_SIZE); trace_afs_page_dirty(vnode, tracepoint_string("mkwrite"), vmf->page->index, priv); if (PagePrivate(vmf->page)) @@ -930,8 +925,8 @@ int afs_launder_page(struct page *page) f = 0; t = PAGE_SIZE; if (PagePrivate(page)) { - f = priv & AFS_PRIV_MAX; - t = priv >> AFS_PRIV_SHIFT; + f = afs_page_dirty_from(priv); + t = afs_page_dirty_to(priv); } trace_afs_page_dirty(vnode, tracepoint_string("launder"), -- cgit From 65dd2d6072d393a3aa14ded8afa9a12f27d9c8ad Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 26 Oct 2020 13:57:44 +0000 Subject: afs: Alter dirty range encoding in page->private Currently, page->private on an afs page is used to store the range of dirtied data within the page, where the range includes the lower bound, but excludes the upper bound (e.g. 0-1 is a range covering a single byte). This, however, requires a superfluous bit for the last-byte bound so that on a 4KiB page, it can say 0-4096 to indicate the whole page, the idea being that having both numbers the same would indicate an empty range. This is unnecessary as the PG_private bit is clear if it's an empty range (as is PG_dirty). Alter the way the dirty range is encoded in page->private such that the upper bound is reduced by 1 (e.g. 0-0 is then specified the same single byte range mentioned above). Applying this to both bounds frees up two bits, one of which can be used in a future commit. This allows the afs filesystem to be compiled on ppc32 with 64K pages; without this, the following warnings are seen: ../fs/afs/internal.h: In function 'afs_page_dirty_to': ../fs/afs/internal.h:881:15: warning: right shift count >= width of type [-Wshift-count-overflow] 881 | return (priv >> __AFS_PAGE_PRIV_SHIFT) & __AFS_PAGE_PRIV_MASK; | ^~ ../fs/afs/internal.h: In function 'afs_page_dirty': ../fs/afs/internal.h:886:28: warning: left shift count >= width of type [-Wshift-count-overflow] 886 | return ((unsigned long)to << __AFS_PAGE_PRIV_SHIFT) | from; | ^~ Fixes: 4343d00872e1 ("afs: Get rid of the afs_writeback record") Signed-off-by: David Howells --- fs/afs/write.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/afs/write.c') diff --git a/fs/afs/write.c b/fs/afs/write.c index ea1768b3c0b5..1a49f5c89342 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -93,7 +93,7 @@ int afs_write_begin(struct file *file, struct address_space *mapping, /* We want to store information about how much of a page is altered in * page->private. */ - BUILD_BUG_ON(PAGE_SIZE > 32768 && sizeof(page->private) < 8); + BUILD_BUG_ON(PAGE_SIZE - 1 > __AFS_PAGE_PRIV_MASK && sizeof(page->private) < 8); page = grab_cache_page_write_begin(mapping, index, flags); if (!page) -- cgit From f86726a69dec5df6ba051baf9265584419478b64 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Oct 2020 14:08:23 +0100 Subject: afs: Fix afs_invalidatepage to adjust the dirty region Fix afs_invalidatepage() to adjust the dirty region recorded in page->private when truncating a page. If the dirty region is entirely removed, then the private data is cleared and the page dirty state is cleared. Without this, if the page is truncated and then expanded again by truncate, zeros from the expanded, but no-longer dirty region may get written back to the server if the page gets laundered due to a conflicting 3rd-party write. It mustn't, however, shorten the dirty region of the page if that page is still mmapped and has been marked dirty by afs_page_mkwrite(), so a flag is stored in page->private to record this. Fixes: 4343d00872e1 ("afs: Get rid of the afs_writeback record") Signed-off-by: David Howells --- fs/afs/write.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/afs/write.c') diff --git a/fs/afs/write.c b/fs/afs/write.c index 1a49f5c89342..a2511e3ad2cc 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -867,6 +867,7 @@ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf) wait_on_page_writeback(vmf->page); priv = afs_page_dirty(0, PAGE_SIZE); + priv = afs_page_dirty_mmapped(priv); trace_afs_page_dirty(vnode, tracepoint_string("mkwrite"), vmf->page->index, priv); if (PagePrivate(vmf->page)) -- cgit From 2d9900f26ad61e63a34f239bc76c80d2f8a6ff41 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 28 Oct 2020 12:08:39 +0000 Subject: afs: Fix dirty-region encoding on ppc32 with 64K pages The dirty region bounds stored in page->private on an afs page are 15 bits on a 32-bit box and can, at most, represent a range of up to 32K within a 32K page with a resolution of 1 byte. This is a problem for powerpc32 with 64K pages enabled. Further, transparent huge pages may get up to 2M, which will be a problem for the afs filesystem on all 32-bit arches in the future. Fix this by decreasing the resolution. For the moment, a 64K page will have a resolution determined from PAGE_SIZE. In the future, the page will need to be passed in to the helper functions so that the page size can be assessed and the resolution determined dynamically. Note that this might not be the ideal way to handle this, since it may allow some leakage of undirtied zero bytes to the server's copy in the case of a 3rd-party conflict. Fixing that would require a separately allocated record and is a more complicated fix. Fixes: 4343d00872e1 ("afs: Get rid of the afs_writeback record") Reported-by: kernel test robot Signed-off-by: David Howells Reviewed-by: Matthew Wilcox (Oracle) --- fs/afs/write.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'fs/afs/write.c') diff --git a/fs/afs/write.c b/fs/afs/write.c index a2511e3ad2cc..50371207f327 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -90,11 +90,6 @@ int afs_write_begin(struct file *file, struct address_space *mapping, _enter("{%llx:%llu},{%lx},%u,%u", vnode->fid.vid, vnode->fid.vnode, index, from, to); - /* We want to store information about how much of a page is altered in - * page->private. - */ - BUILD_BUG_ON(PAGE_SIZE - 1 > __AFS_PAGE_PRIV_MASK && sizeof(page->private) < 8); - page = grab_cache_page_write_begin(mapping, index, flags); if (!page) return -ENOMEM; -- cgit From 3ad216ee73abc554ed8f13f4f8b70845a7bef6da Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 14 Nov 2020 17:27:57 +0000 Subject: afs: Fix afs_write_end() when called with copied == 0 [ver #3] When afs_write_end() is called with copied == 0, it tries to set the dirty region, but there's no way to actually encode a 0-length region in the encoding in page->private. "0,0", for example, indicates a 1-byte region at offset 0. The maths miscalculates this and sets it incorrectly. Fix it to just do nothing but unlock and put the page in this case. We don't actually need to mark the page dirty as nothing presumably changed. Fixes: 65dd2d6072d3 ("afs: Alter dirty range encoding in page->private") Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- fs/afs/write.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/afs/write.c') diff --git a/fs/afs/write.c b/fs/afs/write.c index 50371207f327..c9195fc67fd8 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -169,11 +169,14 @@ int afs_write_end(struct file *file, struct address_space *mapping, unsigned int f, from = pos & (PAGE_SIZE - 1); unsigned int t, to = from + copied; loff_t i_size, maybe_i_size; - int ret; + int ret = 0; _enter("{%llx:%llu},{%lx}", vnode->fid.vid, vnode->fid.vnode, page->index); + if (copied == 0) + goto out; + maybe_i_size = pos + copied; i_size = i_size_read(&vnode->vfs_inode); -- cgit