summaryrefslogtreecommitdiff
path: root/fs/ceph/file.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-12-16 11:23:34 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2016-12-16 11:23:34 -0800
commit59331c215daf600a650e281b6e8ef3e1ed1174c2 (patch)
treef62bf3253fe48d7dd8b09b040c8e7fd9b2c02a86 /fs/ceph/file.c
parentff0f962ca3c38239b299a70e7eea27abfbb979c3 (diff)
parent45ee2c1d66185e5bd27702c60cce3c43fa3370d2 (diff)
Merge tag 'ceph-for-4.10-rc1' of git://github.com/ceph/ceph-client
Pull ceph updates from Ilya Dryomov: "A varied set of changes: - a large rework of cephx auth code to cope with CONFIG_VMAP_STACK (myself). Also fixed a deadlock caused by a bogus allocation on the writeback path and authorize reply verification. - a fix for long stalls during fsync (Jeff Layton). The client now has a way to force the MDS log flush, leading to ~100x speedups in some synthetic tests. - a new [no]require_active_mds mount option (Zheng Yan). On mount, we will now check whether any of the MDSes are available and bail rather than block if none are. This check can be avoided by specifying the "no" option. - a couple of MDS cap handling fixes and a few assorted patches throughout" * tag 'ceph-for-4.10-rc1' of git://github.com/ceph/ceph-client: (32 commits) libceph: remove now unused finish_request() wrapper libceph: always signal completion when done ceph: avoid creating orphan object when checking pool permission ceph: properly set issue_seq for cap release ceph: add flags parameter to send_cap_msg ceph: update cap message struct version to 10 ceph: define new argument structure for send_cap_msg ceph: move xattr initialzation before the encoding past the ceph_mds_caps ceph: fix minor typo in unsafe_request_wait ceph: record truncate size/seq for snap data writeback ceph: check availability of mds cluster on mount ceph: fix splice read for no Fc capability case ceph: try getting buffer capability for readahead/fadvise ceph: fix scheduler warning due to nested blocking ceph: fix printing wrong return variable in ceph_direct_read_write() crush: include mapper.h in mapper.c rbd: silence bogus -Wmaybe-uninitialized warning libceph: no need to drop con->mutex for ->get_authorizer() libceph: drop len argument of *verify_authorizer_reply() libceph: verify authorize reply on connect ...
Diffstat (limited to 'fs/ceph/file.c')
-rw-r--r--fs/ceph/file.c127
1 files changed, 70 insertions, 57 deletions
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 159fc8f1a6a0..045d30d26624 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -454,71 +454,60 @@ enum {
* only return a short read to the caller if we hit EOF.
*/
static int striped_read(struct inode *inode,
- u64 off, u64 len,
+ u64 pos, u64 len,
struct page **pages, int num_pages,
- int *checkeof)
+ int page_align, int *checkeof)
{
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_inode_info *ci = ceph_inode(inode);
- u64 pos, this_len, left;
+ u64 this_len;
loff_t i_size;
- int page_align, pages_left;
- int read, ret;
- struct page **page_pos;
+ int page_idx;
+ int ret, read = 0;
bool hit_stripe, was_short;
/*
* we may need to do multiple reads. not atomic, unfortunately.
*/
- pos = off;
- left = len;
- page_pos = pages;
- pages_left = num_pages;
- read = 0;
-
more:
- page_align = pos & ~PAGE_MASK;
- this_len = left;
+ this_len = len;
+ page_idx = (page_align + read) >> PAGE_SHIFT;
ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode),
&ci->i_layout, pos, &this_len,
- ci->i_truncate_seq,
- ci->i_truncate_size,
- page_pos, pages_left, page_align);
+ ci->i_truncate_seq, ci->i_truncate_size,
+ pages + page_idx, num_pages - page_idx,
+ ((page_align + read) & ~PAGE_MASK));
if (ret == -ENOENT)
ret = 0;
- hit_stripe = this_len < left;
+ hit_stripe = this_len < len;
was_short = ret >= 0 && ret < this_len;
- dout("striped_read %llu~%llu (read %u) got %d%s%s\n", pos, left, read,
+ dout("striped_read %llu~%llu (read %u) got %d%s%s\n", pos, len, read,
ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : "");
i_size = i_size_read(inode);
if (ret >= 0) {
- int didpages;
if (was_short && (pos + ret < i_size)) {
int zlen = min(this_len - ret, i_size - pos - ret);
- int zoff = (off & ~PAGE_MASK) + read + ret;
+ int zoff = page_align + read + ret;
dout(" zero gap %llu to %llu\n",
- pos + ret, pos + ret + zlen);
+ pos + ret, pos + ret + zlen);
ceph_zero_page_vector_range(zoff, zlen, pages);
ret += zlen;
}
- didpages = (page_align + ret) >> PAGE_SHIFT;
+ read += ret;
pos += ret;
- read = pos - off;
- left -= ret;
- page_pos += didpages;
- pages_left -= didpages;
+ len -= ret;
/* hit stripe and need continue*/
- if (left && hit_stripe && pos < i_size)
+ if (len && hit_stripe && pos < i_size)
goto more;
}
if (read > 0) {
ret = read;
/* did we bounce off eof? */
- if (pos + left > i_size)
+ if (pos + len > i_size)
*checkeof = CHECK_EOF;
}
@@ -532,15 +521,16 @@ more:
*
* If the read spans object boundary, just do multiple reads.
*/
-static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
- int *checkeof)
+static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
+ int *checkeof)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
struct page **pages;
u64 off = iocb->ki_pos;
- int num_pages, ret;
- size_t len = iov_iter_count(i);
+ int num_pages;
+ ssize_t ret;
+ size_t len = iov_iter_count(to);
dout("sync_read on file %p %llu~%u %s\n", file, off,
(unsigned)len,
@@ -559,35 +549,56 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
if (ret < 0)
return ret;
- num_pages = calc_pages_for(off, len);
- pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
- if (IS_ERR(pages))
- return PTR_ERR(pages);
- ret = striped_read(inode, off, len, pages,
- num_pages, checkeof);
- if (ret > 0) {
- int l, k = 0;
- size_t left = ret;
-
- while (left) {
- size_t page_off = off & ~PAGE_MASK;
- size_t copy = min_t(size_t, left,
- PAGE_SIZE - page_off);
- l = copy_page_to_iter(pages[k++], page_off, copy, i);
- off += l;
- left -= l;
- if (l < copy)
- break;
+ if (unlikely(to->type & ITER_PIPE)) {
+ size_t page_off;
+ ret = iov_iter_get_pages_alloc(to, &pages, len,
+ &page_off);
+ if (ret <= 0)
+ return -ENOMEM;
+ num_pages = DIV_ROUND_UP(ret + page_off, PAGE_SIZE);
+
+ ret = striped_read(inode, off, ret, pages, num_pages,
+ page_off, checkeof);
+ if (ret > 0) {
+ iov_iter_advance(to, ret);
+ off += ret;
+ } else {
+ iov_iter_advance(to, 0);
+ }
+ ceph_put_page_vector(pages, num_pages, false);
+ } else {
+ num_pages = calc_pages_for(off, len);
+ pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
+ if (IS_ERR(pages))
+ return PTR_ERR(pages);
+
+ ret = striped_read(inode, off, len, pages, num_pages,
+ (off & ~PAGE_MASK), checkeof);
+ if (ret > 0) {
+ int l, k = 0;
+ size_t left = ret;
+
+ while (left) {
+ size_t page_off = off & ~PAGE_MASK;
+ size_t copy = min_t(size_t, left,
+ PAGE_SIZE - page_off);
+ l = copy_page_to_iter(pages[k++], page_off,
+ copy, to);
+ off += l;
+ left -= l;
+ if (l < copy)
+ break;
+ }
}
+ ceph_release_page_vector(pages, num_pages);
}
- ceph_release_page_vector(pages, num_pages);
if (off > iocb->ki_pos) {
ret = off - iocb->ki_pos;
iocb->ki_pos = off;
}
- dout("sync_read result %d\n", ret);
+ dout("sync_read result %zd\n", ret);
return ret;
}
@@ -849,7 +860,7 @@ void ceph_sync_write_wait(struct inode *inode)
dout("sync_write_wait on tid %llu (until %llu)\n",
req->r_tid, last_tid);
- wait_for_completion(&req->r_safe_completion);
+ wait_for_completion(&req->r_done_completion);
ceph_osdc_put_request(req);
spin_lock(&ci->i_unsafe_lock);
@@ -902,7 +913,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
pos >> PAGE_SHIFT,
(pos + count) >> PAGE_SHIFT);
if (ret2 < 0)
- dout("invalidate_inode_pages2_range returned %d\n", ret);
+ dout("invalidate_inode_pages2_range returned %d\n", ret2);
flags = CEPH_OSD_FLAG_ORDERSNAP |
CEPH_OSD_FLAG_ONDISK |
@@ -1245,8 +1256,9 @@ again:
dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n",
inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
ceph_cap_string(got));
-
+ current->journal_info = filp;
ret = generic_file_read_iter(iocb, to);
+ current->journal_info = NULL;
}
dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret);
@@ -1766,6 +1778,7 @@ const struct file_operations ceph_file_fops = {
.fsync = ceph_fsync,
.lock = ceph_lock,
.flock = ceph_flock,
+ .splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.unlocked_ioctl = ceph_ioctl,
.compat_ioctl = ceph_ioctl,