From e63890f38ade9497b5609ddeb7f52df0fe55ea15 Mon Sep 17 00:00:00 2001 From: Ryan Ding Date: Fri, 25 Mar 2016 14:21:15 -0700 Subject: ocfs2: fix ip_unaligned_aio deadlock with dio work queue In the current implementation of unaligned aio+dio, lock order behave as follow: in user process context: -> call io_submit() -> get i_mutex <== window1 -> get ip_unaligned_aio -> submit direct io to block device -> release i_mutex -> io_submit() return in dio work queue context(the work queue is created in __blockdev_direct_IO): -> release ip_unaligned_aio <== window2 -> get i_mutex -> clear unwritten flag & change i_size -> release i_mutex There is a limitation to the thread number of dio work queue. 256 at default. If all 256 thread are in the above 'window2' stage, and there is a user process in the 'window1' stage, the system will became deadlock. Since the user process hold i_mutex to wait ip_unaligned_aio lock, while there is a direct bio hold ip_unaligned_aio mutex who is waiting for a dio work queue thread to be schedule. But all the dio work queue thread is waiting for i_mutex lock in 'window2'. This case only happened in a test which send a large number(more than 256) of aio at one io_submit() call. My design is to remove ip_unaligned_aio lock. Change it to a sync io instead. Just like ip_unaligned_aio lock, serialize the unaligned aio dio. [akpm@linux-foundation.org: remove OCFS2_IOCB_UNALIGNED_IO, per Junxiao Bi] Signed-off-by: Ryan Ding Reviewed-by: Junxiao Bi Cc: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/file.c | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) (limited to 'fs/ocfs2/file.c') diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 1ab182321b18..c18ab45f8d21 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2178,7 +2178,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); int full_coherency = !(osb->s_mount_opt & OCFS2_MOUNT_COHERENCY_BUFFERED); - int unaligned_dio = 0; + void *saved_ki_complete = NULL; int append_write = ((iocb->ki_pos + count) >= i_size_read(inode) ? 1 : 0); @@ -2241,17 +2241,12 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, goto out; } - if (direct_io && !is_sync_kiocb(iocb)) - unaligned_dio = ocfs2_is_io_unaligned(inode, count, iocb->ki_pos); - - if (unaligned_dio) { + if (direct_io && !is_sync_kiocb(iocb) && + ocfs2_is_io_unaligned(inode, count, iocb->ki_pos)) { /* - * Wait on previous unaligned aio to complete before - * proceeding. + * Make it a sync io if it's an unaligned aio. */ - mutex_lock(&OCFS2_I(inode)->ip_unaligned_aio); - /* Mark the iocb as needing an unlock in ocfs2_dio_end_io */ - ocfs2_iocb_set_unaligned_aio(iocb); + saved_ki_complete = xchg(&iocb->ki_complete, NULL); } /* communicate with ocfs2_dio_end_io */ @@ -2272,11 +2267,10 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, */ if ((written == -EIOCBQUEUED) || (!ocfs2_iocb_is_rw_locked(iocb))) { rw_level = -1; - unaligned_dio = 0; } if (unlikely(written <= 0)) - goto no_sync; + goto out; if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode)) { @@ -2298,13 +2292,10 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, iocb->ki_pos - 1); } -no_sync: - if (unaligned_dio && ocfs2_iocb_is_unaligned_aio(iocb)) { - ocfs2_iocb_clear_unaligned_aio(iocb); - mutex_unlock(&OCFS2_I(inode)->ip_unaligned_aio); - } - out: + if (saved_ki_complete) + xchg(&iocb->ki_complete, saved_ki_complete); + if (rw_level != -1) ocfs2_rw_unlock(inode, rw_level); -- cgit