summaryrefslogtreecommitdiff
path: root/fs/ceph
diff options
context:
space:
mode:
authorYan, Zheng <zyan@redhat.com>2017-12-15 11:15:36 +0800
committerIlya Dryomov <idryomov@gmail.com>2018-01-29 18:36:08 +0100
commit5d988308283ecf062fa88f20ae05c52cce0bcdca (patch)
treefdc84f449e10eab2f74123d38fc0f3dcf26e5b54 /fs/ceph
parent5495c2d04f85da09512f5f346ed24dc0261d905d (diff)
ceph: track read contexts in ceph_file_info
Previously ceph_read_iter() uses current->journal to pass context info to ceph_readpages(), so that ceph_readpages() can distinguish read(2) from readahead(2)/fadvise(2)/madvise(2). The problem is that page fault can happen when copying data to userspace memory. Page fault may call other filesystem's page_mkwrite() if the userspace memory is mapped to a file. The later filesystem may also want to use current->journal. The fix is define a on-stack data structure in ceph_read_iter(), add it to context list in ceph_file_info. ceph_readpages() searches the list, find if there is a context belongs to current thread. Signed-off-by: "Yan, Zheng" <zyan@redhat.com> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Diffstat (limited to 'fs/ceph')
-rw-r--r--fs/ceph/addr.c19
-rw-r--r--fs/ceph/file.c10
-rw-r--r--fs/ceph/super.h46
3 files changed, 66 insertions, 9 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index dbf07051aacd..78a1208b878e 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -299,7 +299,8 @@ unlock:
* start an async read(ahead) operation. return nr_pages we submitted
* a read for on success, or negative error code.
*/
-static int start_read(struct inode *inode, struct list_head *page_list, int max)
+static int start_read(struct inode *inode, struct ceph_rw_context *rw_ctx,
+ struct list_head *page_list, int max)
{
struct ceph_osd_client *osdc =
&ceph_inode_to_client(inode)->client->osdc;
@@ -316,7 +317,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
int got = 0;
int ret = 0;
- if (!current->journal_info) {
+ if (!rw_ctx) {
/* caller of readpages does not hold buffer and read caps
* (fadvise, madvise and readahead cases) */
int want = CEPH_CAP_FILE_CACHE;
@@ -437,6 +438,8 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
{
struct inode *inode = file_inode(file);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+ struct ceph_file_info *ci = file->private_data;
+ struct ceph_rw_context *rw_ctx;
int rc = 0;
int max = 0;
@@ -449,11 +452,12 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
if (rc == 0)
goto out;
+ rw_ctx = ceph_find_rw_context(ci);
max = fsc->mount_options->rsize >> PAGE_SHIFT;
- dout("readpages %p file %p nr_pages %d max %d\n",
- inode, file, nr_pages, max);
+ dout("readpages %p file %p ctx %p nr_pages %d max %d\n",
+ inode, file, rw_ctx, nr_pages, max);
while (!list_empty(page_list)) {
- rc = start_read(inode, page_list, max);
+ rc = start_read(inode, rw_ctx, page_list, max);
if (rc < 0)
goto out;
}
@@ -1450,9 +1454,10 @@ static int ceph_filemap_fault(struct vm_fault *vmf)
if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) ||
ci->i_inline_version == CEPH_INLINE_NONE) {
- current->journal_info = vma->vm_file;
+ CEPH_DEFINE_RW_CONTEXT(rw_ctx, got);
+ ceph_add_rw_context(fi, &rw_ctx);
ret = filemap_fault(vmf);
- current->journal_info = NULL;
+ ceph_del_rw_context(fi, &rw_ctx);
} else
ret = -EAGAIN;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 770dd3b413e4..6639926eed4e 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -181,6 +181,10 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
return -ENOMEM;
}
cf->fmode = fmode;
+
+ spin_lock_init(&cf->rw_contexts_lock);
+ INIT_LIST_HEAD(&cf->rw_contexts);
+
cf->next_offset = 2;
cf->readdir_cache_idx = -1;
file->private_data = cf;
@@ -464,6 +468,7 @@ int ceph_release(struct inode *inode, struct file *file)
ceph_mdsc_put_request(cf->last_readdir);
kfree(cf->last_name);
kfree(cf->dir_info);
+ WARN_ON(!list_empty(&cf->rw_contexts));
kmem_cache_free(ceph_file_cachep, cf);
/* wake up anyone waiting for caps on this inode */
@@ -1199,12 +1204,13 @@ again:
retry_op = READ_INLINE;
}
} else {
+ CEPH_DEFINE_RW_CONTEXT(rw_ctx, got);
dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n",
inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
ceph_cap_string(got));
- current->journal_info = filp;
+ ceph_add_rw_context(fi, &rw_ctx);
ret = generic_file_read_iter(iocb, to);
- current->journal_info = NULL;
+ ceph_del_rw_context(fi, &rw_ctx);
}
dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 75701c199b2b..601100da738f 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -669,6 +669,9 @@ struct ceph_file_info {
short fmode; /* initialized on open */
short flags; /* CEPH_F_* */
+ spinlock_t rw_contexts_lock;
+ struct list_head rw_contexts;
+
/* readdir: position within the dir */
u32 frag;
struct ceph_mds_request *last_readdir;
@@ -685,6 +688,49 @@ struct ceph_file_info {
int dir_info_len;
};
+struct ceph_rw_context {
+ struct list_head list;
+ struct task_struct *thread;
+ int caps;
+};
+
+#define CEPH_DEFINE_RW_CONTEXT(_name, _caps) \
+ struct ceph_rw_context _name = { \
+ .thread = current, \
+ .caps = _caps, \
+ }
+
+static inline void ceph_add_rw_context(struct ceph_file_info *cf,
+ struct ceph_rw_context *ctx)
+{
+ spin_lock(&cf->rw_contexts_lock);
+ list_add(&ctx->list, &cf->rw_contexts);
+ spin_unlock(&cf->rw_contexts_lock);
+}
+
+static inline void ceph_del_rw_context(struct ceph_file_info *cf,
+ struct ceph_rw_context *ctx)
+{
+ spin_lock(&cf->rw_contexts_lock);
+ list_del(&ctx->list);
+ spin_unlock(&cf->rw_contexts_lock);
+}
+
+static inline struct ceph_rw_context*
+ceph_find_rw_context(struct ceph_file_info *cf)
+{
+ struct ceph_rw_context *ctx, *found = NULL;
+ spin_lock(&cf->rw_contexts_lock);
+ list_for_each_entry(ctx, &cf->rw_contexts, list) {
+ if (ctx->thread == current) {
+ found = ctx;
+ break;
+ }
+ }
+ spin_unlock(&cf->rw_contexts_lock);
+ return found;
+}
+
struct ceph_readdir_cache_control {
struct page *page;
struct dentry **dentries;