From bd5fe6c5eb9c548d7f07fe8f89a150bb6705e8e3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 24 Jun 2011 14:29:43 -0400 Subject: fs: kill i_alloc_sem i_alloc_sem is a rather special rw_semaphore. It's the last one that may be released by a non-owner, and it's write side is always mirrored by real exclusion. It's intended use it to wait for all pending direct I/O requests to finish before starting a truncate. Replace it with a hand-grown construct: - exclusion for truncates is already guaranteed by i_mutex, so it can simply fall way - the reader side is replaced by an i_dio_count member in struct inode that counts the number of pending direct I/O requests. Truncate can't proceed as long as it's non-zero - when i_dio_count reaches non-zero we wake up a pending truncate using wake_up_bit on a new bit in i_flags - new references to i_dio_count can't appear while we are waiting for it to read zero because the direct I/O count always needs i_mutex (or an equivalent like XFS's i_iolock) for starting a new operation. This scheme is much simpler, and saves the space of a spinlock_t and a struct list_head in struct inode (typically 160 bits on a non-debug 64-bit system). Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- mm/filemap.c | 3 --- mm/madvise.c | 2 +- mm/rmap.c | 1 - mm/truncate.c | 3 +-- 4 files changed, 2 insertions(+), 7 deletions(-) (limited to 'mm') diff --git a/mm/filemap.c b/mm/filemap.c index a8251a8d3457..f820e600f1ad 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -78,9 +78,6 @@ * ->i_mutex (generic_file_buffered_write) * ->mmap_sem (fault_in_pages_readable->do_page_fault) * - * ->i_mutex - * ->i_alloc_sem (various) - * * inode_wb_list_lock * sb_lock (fs/fs-writeback.c) * ->mapping->tree_lock (__sync_single_inode) diff --git a/mm/madvise.c b/mm/madvise.c index 2221491ed503..74bf193eff04 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -218,7 +218,7 @@ static long madvise_remove(struct vm_area_struct *vma, endoff = (loff_t)(end - vma->vm_start - 1) + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); - /* vmtruncate_range needs to take i_mutex and i_alloc_sem */ + /* vmtruncate_range needs to take i_mutex */ up_read(¤t->mm->mmap_sem); error = vmtruncate_range(mapping->host, offset, endoff); down_read(¤t->mm->mmap_sem); diff --git a/mm/rmap.c b/mm/rmap.c index 23295f65ae43..2540a39eea4a 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -21,7 +21,6 @@ * Lock ordering in mm: * * inode->i_mutex (while writing or truncating, not reading or faulting) - * inode->i_alloc_sem (vmtruncate_range) * mm->mmap_sem * page->flags PG_locked (lock_page) * mapping->i_mmap_mutex diff --git a/mm/truncate.c b/mm/truncate.c index e13f22efaad7..003c6c685fc8 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -622,12 +622,11 @@ int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end) return -ENOSYS; mutex_lock(&inode->i_mutex); - down_write(&inode->i_alloc_sem); + inode_dio_wait(inode); unmap_mapping_range(mapping, offset, (end - offset), 1); inode->i_op->truncate_range(inode, offset, end); /* unmap again to remove racily COWed private pages */ unmap_mapping_range(mapping, offset, (end - offset), 1); - up_write(&inode->i_alloc_sem); mutex_unlock(&inode->i_mutex); return 0; -- cgit