diff options
Diffstat (limited to 'fs/fs-writeback.c')
| -rw-r--r-- | fs/fs-writeback.c | 143 | 
1 files changed, 92 insertions, 51 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 59c6e4956786..34591ee804b5 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -144,7 +144,7 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,   *   * Description:   *   This does WB_SYNC_NONE opportunistic writeback. The IO is only - *   started when this function returns, we make no guarentees on + *   started when this function returns, we make no guarantees on   *   completion. Caller need not hold sb s_umount semaphore.   *   */ @@ -176,6 +176,17 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi)  }  /* + * Remove the inode from the writeback list it is on. + */ +void inode_wb_list_del(struct inode *inode) +{ +	spin_lock(&inode_wb_list_lock); +	list_del_init(&inode->i_wb_list); +	spin_unlock(&inode_wb_list_lock); +} + + +/*   * Redirty an inode: set its when-it-was dirtied timestamp and move it to the   * furthest end of its superblock's dirty-inode list.   * @@ -188,6 +199,7 @@ static void redirty_tail(struct inode *inode)  {  	struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; +	assert_spin_locked(&inode_wb_list_lock);  	if (!list_empty(&wb->b_dirty)) {  		struct inode *tail; @@ -205,14 +217,17 @@ static void requeue_io(struct inode *inode)  {  	struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; +	assert_spin_locked(&inode_wb_list_lock);  	list_move(&inode->i_wb_list, &wb->b_more_io);  }  static void inode_sync_complete(struct inode *inode)  {  	/* -	 * Prevent speculative execution through spin_unlock(&inode_lock); +	 * Prevent speculative execution through +	 * spin_unlock(&inode_wb_list_lock);  	 */ +  	smp_mb();  	wake_up_bit(&inode->i_state, __I_SYNC);  } @@ -286,6 +301,7 @@ static void move_expired_inodes(struct list_head *delaying_queue,   */  static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this)  { +	assert_spin_locked(&inode_wb_list_lock);  	list_splice_init(&wb->b_more_io, &wb->b_io);  	move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this);  } @@ -306,25 +322,25 @@ static void inode_wait_for_writeback(struct inode *inode)  	wait_queue_head_t *wqh;  	wqh = bit_waitqueue(&inode->i_state, __I_SYNC); -	 while (inode->i_state & I_SYNC) { -		spin_unlock(&inode_lock); +	while (inode->i_state & I_SYNC) { +		spin_unlock(&inode->i_lock); +		spin_unlock(&inode_wb_list_lock);  		__wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE); -		spin_lock(&inode_lock); +		spin_lock(&inode_wb_list_lock); +		spin_lock(&inode->i_lock);  	}  }  /* - * Write out an inode's dirty pages.  Called under inode_lock.  Either the - * caller has ref on the inode (either via __iget or via syscall against an fd) - * or the inode has I_WILL_FREE set (via generic_forget_inode) + * Write out an inode's dirty pages.  Called under inode_wb_list_lock and + * inode->i_lock.  Either the caller has an active reference on the inode or + * the inode has I_WILL_FREE set.   *   * If `wait' is set, wait on the writeout.   *   * The whole writeout design is quite complex and fragile.  We want to avoid   * starvation of particular inodes when others are being redirtied, prevent   * livelocks, etc. - * - * Called under inode_lock.   */  static int  writeback_single_inode(struct inode *inode, struct writeback_control *wbc) @@ -333,6 +349,9 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)  	unsigned dirty;  	int ret; +	assert_spin_locked(&inode_wb_list_lock); +	assert_spin_locked(&inode->i_lock); +  	if (!atomic_read(&inode->i_count))  		WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));  	else @@ -363,7 +382,8 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)  	/* Set I_SYNC, reset I_DIRTY_PAGES */  	inode->i_state |= I_SYNC;  	inode->i_state &= ~I_DIRTY_PAGES; -	spin_unlock(&inode_lock); +	spin_unlock(&inode->i_lock); +	spin_unlock(&inode_wb_list_lock);  	ret = do_writepages(mapping, wbc); @@ -383,10 +403,10 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)  	 * due to delalloc, clear dirty metadata flags right before  	 * write_inode()  	 */ -	spin_lock(&inode_lock); +	spin_lock(&inode->i_lock);  	dirty = inode->i_state & I_DIRTY;  	inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC); -	spin_unlock(&inode_lock); +	spin_unlock(&inode->i_lock);  	/* Don't write the inode if only I_DIRTY_PAGES was set */  	if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {  		int err = write_inode(inode, wbc); @@ -394,7 +414,8 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)  			ret = err;  	} -	spin_lock(&inode_lock); +	spin_lock(&inode_wb_list_lock); +	spin_lock(&inode->i_lock);  	inode->i_state &= ~I_SYNC;  	if (!(inode->i_state & I_FREEING)) {  		if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { @@ -506,7 +527,9 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,  		 * kind does not need peridic writeout yet, and for the latter  		 * kind writeout is handled by the freer.  		 */ +		spin_lock(&inode->i_lock);  		if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { +			spin_unlock(&inode->i_lock);  			requeue_io(inode);  			continue;  		} @@ -515,10 +538,13 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,  		 * Was this inode dirtied after sync_sb_inodes was called?  		 * This keeps sync from extra jobs and livelock.  		 */ -		if (inode_dirtied_after(inode, wbc->wb_start)) +		if (inode_dirtied_after(inode, wbc->wb_start)) { +			spin_unlock(&inode->i_lock);  			return 1; +		}  		__iget(inode); +  		pages_skipped = wbc->pages_skipped;  		writeback_single_inode(inode, wbc);  		if (wbc->pages_skipped != pages_skipped) { @@ -528,10 +554,11 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,  			 */  			redirty_tail(inode);  		} -		spin_unlock(&inode_lock); +		spin_unlock(&inode->i_lock); +		spin_unlock(&inode_wb_list_lock);  		iput(inode);  		cond_resched(); -		spin_lock(&inode_lock); +		spin_lock(&inode_wb_list_lock);  		if (wbc->nr_to_write <= 0) {  			wbc->more_io = 1;  			return 1; @@ -550,7 +577,7 @@ void writeback_inodes_wb(struct bdi_writeback *wb,  	if (!wbc->wb_start)  		wbc->wb_start = jiffies; /* livelock avoidance */ -	spin_lock(&inode_lock); +	spin_lock(&inode_wb_list_lock);  	if (!wbc->for_kupdate || list_empty(&wb->b_io))  		queue_io(wb, wbc->older_than_this); @@ -568,7 +595,7 @@ void writeback_inodes_wb(struct bdi_writeback *wb,  		if (ret)  			break;  	} -	spin_unlock(&inode_lock); +	spin_unlock(&inode_wb_list_lock);  	/* Leave any unwritten inodes on b_io */  } @@ -577,11 +604,11 @@ static void __writeback_inodes_sb(struct super_block *sb,  {  	WARN_ON(!rwsem_is_locked(&sb->s_umount)); -	spin_lock(&inode_lock); +	spin_lock(&inode_wb_list_lock);  	if (!wbc->for_kupdate || list_empty(&wb->b_io))  		queue_io(wb, wbc->older_than_this);  	writeback_sb_inodes(sb, wb, wbc, true); -	spin_unlock(&inode_lock); +	spin_unlock(&inode_wb_list_lock);  }  /* @@ -720,13 +747,15 @@ static long wb_writeback(struct bdi_writeback *wb,  		 * become available for writeback. Otherwise  		 * we'll just busyloop.  		 */ -		spin_lock(&inode_lock); +		spin_lock(&inode_wb_list_lock);  		if (!list_empty(&wb->b_more_io))  {  			inode = wb_inode(wb->b_more_io.prev);  			trace_wbc_writeback_wait(&wbc, wb->bdi); +			spin_lock(&inode->i_lock);  			inode_wait_for_writeback(inode); +			spin_unlock(&inode->i_lock);  		} -		spin_unlock(&inode_lock); +		spin_unlock(&inode_wb_list_lock);  	}  	return wrote; @@ -992,7 +1021,6 @@ void __mark_inode_dirty(struct inode *inode, int flags)  {  	struct super_block *sb = inode->i_sb;  	struct backing_dev_info *bdi = NULL; -	bool wakeup_bdi = false;  	/*  	 * Don't do this for I_DIRTY_PAGES - that doesn't actually @@ -1016,7 +1044,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)  	if (unlikely(block_dump))  		block_dump___mark_inode_dirty(inode); -	spin_lock(&inode_lock); +	spin_lock(&inode->i_lock);  	if ((inode->i_state & flags) != flags) {  		const int was_dirty = inode->i_state & I_DIRTY; @@ -1028,7 +1056,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)  		 * superblock list, based upon its state.  		 */  		if (inode->i_state & I_SYNC) -			goto out; +			goto out_unlock_inode;  		/*  		 * Only add valid (hashed) inodes to the superblock's @@ -1036,16 +1064,17 @@ void __mark_inode_dirty(struct inode *inode, int flags)  		 */  		if (!S_ISBLK(inode->i_mode)) {  			if (inode_unhashed(inode)) -				goto out; +				goto out_unlock_inode;  		}  		if (inode->i_state & I_FREEING) -			goto out; +			goto out_unlock_inode;  		/*  		 * If the inode was already on b_dirty/b_io/b_more_io, don't  		 * reposition it (that would break b_dirty time-ordering).  		 */  		if (!was_dirty) { +			bool wakeup_bdi = false;  			bdi = inode_to_bdi(inode);  			if (bdi_cap_writeback_dirty(bdi)) { @@ -1062,15 +1091,20 @@ void __mark_inode_dirty(struct inode *inode, int flags)  					wakeup_bdi = true;  			} +			spin_unlock(&inode->i_lock); +			spin_lock(&inode_wb_list_lock);  			inode->dirtied_when = jiffies;  			list_move(&inode->i_wb_list, &bdi->wb.b_dirty); +			spin_unlock(&inode_wb_list_lock); + +			if (wakeup_bdi) +				bdi_wakeup_thread_delayed(bdi); +			return;  		}  	} -out: -	spin_unlock(&inode_lock); +out_unlock_inode: +	spin_unlock(&inode->i_lock); -	if (wakeup_bdi) -		bdi_wakeup_thread_delayed(bdi);  }  EXPORT_SYMBOL(__mark_inode_dirty); @@ -1101,7 +1135,7 @@ static void wait_sb_inodes(struct super_block *sb)  	 */  	WARN_ON(!rwsem_is_locked(&sb->s_umount)); -	spin_lock(&inode_lock); +	spin_lock(&inode_sb_list_lock);  	/*  	 * Data integrity sync. Must wait for all pages under writeback, @@ -1111,22 +1145,25 @@ static void wait_sb_inodes(struct super_block *sb)  	 * we still have to wait for that writeout.  	 */  	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { -		struct address_space *mapping; +		struct address_space *mapping = inode->i_mapping; -		if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) -			continue; -		mapping = inode->i_mapping; -		if (mapping->nrpages == 0) +		spin_lock(&inode->i_lock); +		if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || +		    (mapping->nrpages == 0)) { +			spin_unlock(&inode->i_lock);  			continue; +		}  		__iget(inode); -		spin_unlock(&inode_lock); +		spin_unlock(&inode->i_lock); +		spin_unlock(&inode_sb_list_lock); +  		/* -		 * We hold a reference to 'inode' so it couldn't have -		 * been removed from s_inodes list while we dropped the -		 * inode_lock.  We cannot iput the inode now as we can -		 * be holding the last reference and we cannot iput it -		 * under inode_lock. So we keep the reference and iput -		 * it later. +		 * We hold a reference to 'inode' so it couldn't have been +		 * removed from s_inodes list while we dropped the +		 * inode_sb_list_lock.  We cannot iput the inode now as we can +		 * be holding the last reference and we cannot iput it under +		 * inode_sb_list_lock. So we keep the reference and iput it +		 * later.  		 */  		iput(old_inode);  		old_inode = inode; @@ -1135,9 +1172,9 @@ static void wait_sb_inodes(struct super_block *sb)  		cond_resched(); -		spin_lock(&inode_lock); +		spin_lock(&inode_sb_list_lock);  	} -	spin_unlock(&inode_lock); +	spin_unlock(&inode_sb_list_lock);  	iput(old_inode);  } @@ -1271,9 +1308,11 @@ int write_inode_now(struct inode *inode, int sync)  		wbc.nr_to_write = 0;  	might_sleep(); -	spin_lock(&inode_lock); +	spin_lock(&inode_wb_list_lock); +	spin_lock(&inode->i_lock);  	ret = writeback_single_inode(inode, &wbc); -	spin_unlock(&inode_lock); +	spin_unlock(&inode->i_lock); +	spin_unlock(&inode_wb_list_lock);  	if (sync)  		inode_sync_wait(inode);  	return ret; @@ -1295,9 +1334,11 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc)  {  	int ret; -	spin_lock(&inode_lock); +	spin_lock(&inode_wb_list_lock); +	spin_lock(&inode->i_lock);  	ret = writeback_single_inode(inode, wbc); -	spin_unlock(&inode_lock); +	spin_unlock(&inode->i_lock); +	spin_unlock(&inode_wb_list_lock);  	return ret;  }  EXPORT_SYMBOL(sync_inode);  | 
