diff options
Diffstat (limited to 'fs/ceph/addr.c')
-rw-r--r-- | fs/ceph/addr.c | 118 |
1 files changed, 56 insertions, 62 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index b3e3edc09d80..5f7ad3d0df2e 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/ceph/ceph_debug.h> #include <linux/backing-dev.h> @@ -14,6 +15,7 @@ #include "mds_client.h" #include "cache.h" #include <linux/ceph/osd_client.h> +#include <linux/ceph/striper.h> /* * Ceph address space ops. @@ -298,7 +300,8 @@ unlock: * start an async read(ahead) operation. return nr_pages we submitted * a read for on success, or negative error code. */ -static int start_read(struct inode *inode, struct list_head *page_list, int max) +static int start_read(struct inode *inode, struct ceph_rw_context *rw_ctx, + struct list_head *page_list, int max) { struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->client->osdc; @@ -315,7 +318,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max) int got = 0; int ret = 0; - if (!current->journal_info) { + if (!rw_ctx) { /* caller of readpages does not hold buffer and read caps * (fadvise, madvise and readahead cases) */ int want = CEPH_CAP_FILE_CACHE; @@ -436,6 +439,8 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, { struct inode *inode = file_inode(file); struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_file_info *fi = file->private_data; + struct ceph_rw_context *rw_ctx; int rc = 0; int max = 0; @@ -448,11 +453,12 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, if (rc == 0) goto out; + rw_ctx = ceph_find_rw_context(fi); max = fsc->mount_options->rsize >> PAGE_SHIFT; - dout("readpages %p file %p nr_pages %d max %d\n", - inode, file, nr_pages, max); + dout("readpages %p file %p ctx %p nr_pages %d max %d\n", + inode, file, rw_ctx, nr_pages, max); while (!list_empty(page_list)) { - rc = start_read(inode, page_list, max); + rc = start_read(inode, rw_ctx, page_list, max); if (rc < 0) goto out; } @@ -573,7 +579,6 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) struct ceph_fs_client *fsc; struct ceph_snap_context *snapc, *oldest; loff_t page_off = page_offset(page); - long writeback_stat; int err, len = PAGE_SIZE; struct ceph_writeback_ctl ceph_wbc; @@ -614,8 +619,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) dout("writepage %p page %p index %lu on %llu~%u snapc %p seq %lld\n", inode, page, page->index, page_off, len, snapc, snapc->seq); - writeback_stat = atomic_long_inc_return(&fsc->writeback_count); - if (writeback_stat > + if (atomic_long_inc_return(&fsc->writeback_count) > CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb)) set_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC); @@ -650,6 +654,11 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) end_page_writeback(page); ceph_put_wrbuffer_cap_refs(ci, 1, snapc); ceph_put_snap_context(snapc); /* page's reference */ + + if (atomic_long_dec_return(&fsc->writeback_count) < + CONGESTION_OFF_THRESH(fsc->mount_options->congestion_kb)) + clear_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC); + return err; } @@ -679,7 +688,7 @@ static void ceph_release_pages(struct page **pages, int num) struct pagevec pvec; int i; - pagevec_init(&pvec, 0); + pagevec_init(&pvec); for (i = 0; i < num; i++) { if (pagevec_add(&pvec, pages[i]) == 0) pagevec_release(&pvec); @@ -792,7 +801,7 @@ static int ceph_writepages_start(struct address_space *mapping, struct ceph_osd_request *req = NULL; struct ceph_writeback_ctl ceph_wbc; bool should_loop, range_whole = false; - bool stop, done = false; + bool done = false; dout("writepages_start %p (mode=%s)\n", inode, wbc->sync_mode == WB_SYNC_NONE ? "NONE" : @@ -810,7 +819,7 @@ static int ceph_writepages_start(struct address_space *mapping, if (fsc->mount_options->wsize < wsize) wsize = fsc->mount_options->wsize; - pagevec_init(&pvec, 0); + pagevec_init(&pvec); start_index = wbc->range_cyclic ? mapping->writeback_index : 0; index = start_index; @@ -848,7 +857,7 @@ retry: * in that range can be associated with newer snapc. * They are not writeable until we write all dirty pages * associated with 'snapc' get written */ - if (index > 0 || wbc->sync_mode != WB_SYNC_NONE) + if (index > 0) should_loop = true; dout(" non-head snapc, range whole\n"); } @@ -856,8 +865,7 @@ retry: ceph_put_snap_context(last_snapc); last_snapc = snapc; - stop = false; - while (!stop && index <= end) { + while (!done && index <= end) { int num_ops = 0, op_idx; unsigned i, pvec_pages, max_pages, locked_pages = 0; struct page **pages = NULL, **data_pages; @@ -869,15 +877,10 @@ retry: max_pages = wsize >> PAGE_SHIFT; get_more_pages: - pvec_pages = min_t(unsigned, PAGEVEC_SIZE, - max_pages - locked_pages); - if (end - index < (u64)(pvec_pages - 1)) - pvec_pages = (unsigned)(end - index) + 1; - - pvec_pages = pagevec_lookup_tag(&pvec, mapping, &index, - PAGECACHE_TAG_DIRTY, - pvec_pages); - dout("pagevec_lookup_tag got %d\n", pvec_pages); + pvec_pages = pagevec_lookup_range_nr_tag(&pvec, mapping, &index, + end, PAGECACHE_TAG_DIRTY, + max_pages - locked_pages); + dout("pagevec_lookup_range_tag got %d\n", pvec_pages); if (!pvec_pages && !locked_pages) break; for (i = 0; i < pvec_pages && locked_pages < max_pages; i++) { @@ -895,26 +898,30 @@ get_more_pages: unlock_page(page); continue; } - if (page->index > end) { - dout("end of range %p\n", page); - /* can't be range_cyclic (1st pass) because - * end == -1 in that case. */ - stop = true; - if (ceph_wbc.head_snapc) - done = true; - unlock_page(page); - break; - } - if (strip_unit_end && (page->index > strip_unit_end)) { - dout("end of strip unit %p\n", page); + /* only if matching snap context */ + pgsnapc = page_snap_context(page); + if (pgsnapc != snapc) { + dout("page snapc %p %lld != oldest %p %lld\n", + pgsnapc, pgsnapc->seq, snapc, snapc->seq); + if (!should_loop && + !ceph_wbc.head_snapc && + wbc->sync_mode != WB_SYNC_NONE) + should_loop = true; unlock_page(page); - break; + continue; } if (page_offset(page) >= ceph_wbc.i_size) { dout("%p page eof %llu\n", page, ceph_wbc.i_size); - /* not done if range_cyclic */ - stop = true; + if (ceph_wbc.size_stable || + page_offset(page) >= i_size_read(inode)) + mapping->a_ops->invalidatepage(page, + 0, PAGE_SIZE); + unlock_page(page); + continue; + } + if (strip_unit_end && (page->index > strip_unit_end)) { + dout("end of strip unit %p\n", page); unlock_page(page); break; } @@ -928,15 +935,6 @@ get_more_pages: wait_on_page_writeback(page); } - /* only if matching snap context */ - pgsnapc = page_snap_context(page); - if (pgsnapc != snapc) { - dout("page snapc %p %lld != oldest %p %lld\n", - pgsnapc, pgsnapc->seq, snapc, snapc->seq); - unlock_page(page); - continue; - } - if (!clear_page_dirty_for_io(page)) { dout("%p !clear_page_dirty_for_io\n", page); unlock_page(page); @@ -952,19 +950,15 @@ get_more_pages: if (locked_pages == 0) { u64 objnum; u64 objoff; + u32 xlen; /* prepare async write request */ offset = (u64)page_offset(page); - len = wsize; - - rc = ceph_calc_file_object_mapping(&ci->i_layout, - offset, len, - &objnum, &objoff, - &len); - if (rc < 0) { - unlock_page(page); - break; - } + ceph_calc_file_object_mapping(&ci->i_layout, + offset, wsize, + &objnum, &objoff, + &xlen); + len = xlen; num_ops = 1; strip_unit_end = page->index + @@ -1153,7 +1147,7 @@ new_request: * we tagged for writeback prior to entering this loop. */ if (wbc->nr_to_write <= 0 && wbc->sync_mode == WB_SYNC_NONE) - done = stop = true; + done = true; release_pvec_pages: dout("pagevec_release on %d pages (%p)\n", (int)pvec.nr, @@ -1176,8 +1170,7 @@ release_pvec_pages: index = 0; while ((index <= end) && (nr = pagevec_lookup_tag(&pvec, mapping, &index, - PAGECACHE_TAG_WRITEBACK, - PAGEVEC_SIZE))) { + PAGECACHE_TAG_WRITEBACK))) { for (i = 0; i < nr; i++) { page = pvec.pages[i]; if (page_snap_context(page) != snapc) @@ -1465,9 +1458,10 @@ static int ceph_filemap_fault(struct vm_fault *vmf) if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) || ci->i_inline_version == CEPH_INLINE_NONE) { - current->journal_info = vma->vm_file; + CEPH_DEFINE_RW_CONTEXT(rw_ctx, got); + ceph_add_rw_context(fi, &rw_ctx); ret = filemap_fault(vmf); - current->journal_info = NULL; + ceph_del_rw_context(fi, &rw_ctx); } else ret = -EAGAIN; |