summaryrefslogtreecommitdiff
path: root/fs/fs-writeback.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/fs-writeback.c')
-rw-r--r--fs/fs-writeback.c188
1 files changed, 100 insertions, 88 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 245c430a2e41..4b12ba70a895 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -126,7 +126,7 @@ static void wb_io_lists_depopulated(struct bdi_writeback *wb)
* inode_io_list_move_locked - move an inode onto a bdi_writeback IO list
* @inode: inode to be moved
* @wb: target bdi_writeback
- * @head: one of @wb->b_{dirty|io|more_io}
+ * @head: one of @wb->b_{dirty|io|more_io|dirty_time}
*
* Move @inode->i_io_list to @list of @wb and set %WB_has_dirty_io.
* Returns %true if @inode is the first occupant of the !dirty_time IO
@@ -347,9 +347,9 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
* By the time control reaches here, RCU grace period has passed
* since I_WB_SWITCH assertion and all wb stat update transactions
* between unlocked_inode_to_wb_begin/end() are guaranteed to be
- * synchronizing against mapping->tree_lock.
+ * synchronizing against the i_pages lock.
*
- * Grabbing old_wb->list_lock, inode->i_lock and mapping->tree_lock
+ * Grabbing old_wb->list_lock, inode->i_lock and the i_pages lock
* gives us exclusion against all wb related operations on @inode
* including IO list manipulations and stat updates.
*/
@@ -361,7 +361,7 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
spin_lock_nested(&old_wb->list_lock, SINGLE_DEPTH_NESTING);
}
spin_lock(&inode->i_lock);
- spin_lock_irq(&mapping->tree_lock);
+ xa_lock_irq(&mapping->i_pages);
/*
* Once I_FREEING is visible under i_lock, the eviction path owns
@@ -373,22 +373,22 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
/*
* Count and transfer stats. Note that PAGECACHE_TAG_DIRTY points
* to possibly dirty pages while PAGECACHE_TAG_WRITEBACK points to
- * pages actually under underwriteback.
+ * pages actually under writeback.
*/
- radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter, 0,
+ radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, 0,
PAGECACHE_TAG_DIRTY) {
struct page *page = radix_tree_deref_slot_protected(slot,
- &mapping->tree_lock);
+ &mapping->i_pages.xa_lock);
if (likely(page) && PageDirty(page)) {
dec_wb_stat(old_wb, WB_RECLAIMABLE);
inc_wb_stat(new_wb, WB_RECLAIMABLE);
}
}
- radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter, 0,
+ radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, 0,
PAGECACHE_TAG_WRITEBACK) {
struct page *page = radix_tree_deref_slot_protected(slot,
- &mapping->tree_lock);
+ &mapping->i_pages.xa_lock);
if (likely(page)) {
WARN_ON_ONCE(!PageWriteback(page));
dec_wb_stat(old_wb, WB_WRITEBACK);
@@ -430,7 +430,7 @@ skip_switch:
*/
smp_store_release(&inode->i_state, inode->i_state & ~I_WB_SWITCH);
- spin_unlock_irq(&mapping->tree_lock);
+ xa_unlock_irq(&mapping->i_pages);
spin_unlock(&inode->i_lock);
spin_unlock(&new_wb->list_lock);
spin_unlock(&old_wb->list_lock);
@@ -490,7 +490,7 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
/* while holding I_WB_SWITCH, no one else can update the association */
spin_lock(&inode->i_lock);
- if (!(inode->i_sb->s_flags & MS_ACTIVE) ||
+ if (!(inode->i_sb->s_flags & SB_ACTIVE) ||
inode->i_state & (I_WB_SWITCH | I_FREEING) ||
inode_to_wb(inode) == isw->new_wb) {
spin_unlock(&inode->i_lock);
@@ -506,8 +506,8 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
/*
* In addition to synchronizing among switchers, I_WB_SWITCH tells
- * the RCU protected stat update paths to grab the mapping's
- * tree_lock so that stat transfer can synchronize against them.
+ * the RCU protected stat update paths to grab the i_page
+ * lock so that stat transfer can synchronize against them.
* Let's continue after I_WB_SWITCH is guaranteed to be visible.
*/
call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn);
@@ -933,33 +933,36 @@ static void bdi_split_work_to_wbs(struct backing_dev_info *bdi,
#endif /* CONFIG_CGROUP_WRITEBACK */
-void wb_start_writeback(struct bdi_writeback *wb, long nr_pages,
- bool range_cyclic, enum wb_reason reason)
+/*
+ * Add in the number of potentially dirty inodes, because each inode
+ * write can dirty pagecache in the underlying blockdev.
+ */
+static unsigned long get_nr_dirty_pages(void)
{
- struct wb_writeback_work *work;
+ return global_node_page_state(NR_FILE_DIRTY) +
+ global_node_page_state(NR_UNSTABLE_NFS) +
+ get_nr_dirty_inodes();
+}
+static void wb_start_writeback(struct bdi_writeback *wb, enum wb_reason reason)
+{
if (!wb_has_dirty_io(wb))
return;
/*
- * This is WB_SYNC_NONE writeback, so if allocation fails just
- * wakeup the thread for old dirty data writeback
+ * All callers of this function want to start writeback of all
+ * dirty pages. Places like vmscan can call this at a very
+ * high frequency, causing pointless allocations of tons of
+ * work items and keeping the flusher threads busy retrieving
+ * that work. Ensure that we only allow one of them pending and
+ * inflight at the time.
*/
- work = kzalloc(sizeof(*work),
- GFP_NOWAIT | __GFP_NOMEMALLOC | __GFP_NOWARN);
- if (!work) {
- trace_writeback_nowork(wb);
- wb_wakeup(wb);
+ if (test_bit(WB_start_all, &wb->state) ||
+ test_and_set_bit(WB_start_all, &wb->state))
return;
- }
-
- work->sync_mode = WB_SYNC_NONE;
- work->nr_pages = nr_pages;
- work->range_cyclic = range_cyclic;
- work->reason = reason;
- work->auto_free = 1;
- wb_queue_work(wb, work);
+ wb->start_all_reason = reason;
+ wb_wakeup(wb);
}
/**
@@ -1340,7 +1343,7 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
dirty = inode->i_state & I_DIRTY;
if (inode->i_state & I_DIRTY_TIME) {
- if ((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) ||
+ if ((dirty & I_DIRTY_INODE) ||
wbc->sync_mode == WB_SYNC_ALL ||
unlikely(inode->i_state & I_DIRTY_TIME_EXPIRED) ||
unlikely(time_after(jiffies,
@@ -1814,17 +1817,6 @@ static struct wb_writeback_work *get_next_work_item(struct bdi_writeback *wb)
return work;
}
-/*
- * Add in the number of potentially dirty inodes, because each inode
- * write can dirty pagecache in the underlying blockdev.
- */
-static unsigned long get_nr_dirty_pages(void)
-{
- return global_node_page_state(NR_FILE_DIRTY) +
- global_node_page_state(NR_UNSTABLE_NFS) +
- get_nr_dirty_inodes();
-}
-
static long wb_check_background_flush(struct bdi_writeback *wb)
{
if (wb_over_bg_thresh(wb)) {
@@ -1877,6 +1869,30 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
return 0;
}
+static long wb_check_start_all(struct bdi_writeback *wb)
+{
+ long nr_pages;
+
+ if (!test_bit(WB_start_all, &wb->state))
+ return 0;
+
+ nr_pages = get_nr_dirty_pages();
+ if (nr_pages) {
+ struct wb_writeback_work work = {
+ .nr_pages = wb_split_bdi_pages(wb, nr_pages),
+ .sync_mode = WB_SYNC_NONE,
+ .range_cyclic = 1,
+ .reason = wb->start_all_reason,
+ };
+
+ nr_pages = wb_writeback(wb, &work);
+ }
+
+ clear_bit(WB_start_all, &wb->state);
+ return nr_pages;
+}
+
+
/*
* Retrieve work items and do the writeback they describe
*/
@@ -1893,6 +1909,11 @@ static long wb_do_writeback(struct bdi_writeback *wb)
}
/*
+ * Check for a flush-everything request
+ */
+ wrote += wb_check_start_all(wb);
+
+ /*
* Check for periodic writeback, kupdated() style
*/
wrote += wb_check_old_data_flush(wb);
@@ -1947,10 +1968,33 @@ void wb_workfn(struct work_struct *work)
}
/*
- * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back
- * the whole world.
+ * Start writeback of `nr_pages' pages on this bdi. If `nr_pages' is zero,
+ * write back the whole world.
*/
-void wakeup_flusher_threads(long nr_pages, enum wb_reason reason)
+static void __wakeup_flusher_threads_bdi(struct backing_dev_info *bdi,
+ enum wb_reason reason)
+{
+ struct bdi_writeback *wb;
+
+ if (!bdi_has_dirty_io(bdi))
+ return;
+
+ list_for_each_entry_rcu(wb, &bdi->wb_list, bdi_node)
+ wb_start_writeback(wb, reason);
+}
+
+void wakeup_flusher_threads_bdi(struct backing_dev_info *bdi,
+ enum wb_reason reason)
+{
+ rcu_read_lock();
+ __wakeup_flusher_threads_bdi(bdi, reason);
+ rcu_read_unlock();
+}
+
+/*
+ * Wakeup the flusher threads to start writeback of all currently dirty pages
+ */
+void wakeup_flusher_threads(enum wb_reason reason)
{
struct backing_dev_info *bdi;
@@ -1960,20 +2004,9 @@ void wakeup_flusher_threads(long nr_pages, enum wb_reason reason)
if (blk_needs_flush_plug(current))
blk_schedule_flush_plug(current);
- if (!nr_pages)
- nr_pages = get_nr_dirty_pages();
-
rcu_read_lock();
- list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
- struct bdi_writeback *wb;
-
- if (!bdi_has_dirty_io(bdi))
- continue;
-
- list_for_each_entry_rcu(wb, &bdi->wb_list, bdi_node)
- wb_start_writeback(wb, wb_split_bdi_pages(wb, nr_pages),
- false, reason);
- }
+ list_for_each_entry_rcu(bdi, &bdi_list, bdi_list)
+ __wakeup_flusher_threads_bdi(bdi, reason);
rcu_read_unlock();
}
@@ -2079,7 +2112,6 @@ static noinline void block_dump___mark_inode_dirty(struct inode *inode)
*/
void __mark_inode_dirty(struct inode *inode, int flags)
{
-#define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
struct super_block *sb = inode->i_sb;
int dirtytime;
@@ -2089,7 +2121,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
* Don't do this for I_DIRTY_PAGES - that doesn't actually
* dirty the inode itself
*/
- if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_TIME)) {
+ if (flags & (I_DIRTY_INODE | I_DIRTY_TIME)) {
trace_writeback_dirty_inode_start(inode, flags);
if (sb->s_op->dirty_inode)
@@ -2164,7 +2196,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
if (dirtytime)
inode->dirtied_time_when = jiffies;
- if (inode->i_state & (I_DIRTY_INODE | I_DIRTY_PAGES))
+ if (inode->i_state & I_DIRTY)
dirty_list = &wb->b_dirty;
else
dirty_list = &wb->b_dirty_time;
@@ -2188,8 +2220,6 @@ void __mark_inode_dirty(struct inode *inode, int flags)
}
out_unlock_inode:
spin_unlock(&inode->i_lock);
-
-#undef I_DIRTY_INODE
}
EXPORT_SYMBOL(__mark_inode_dirty);
@@ -2343,37 +2373,19 @@ void writeback_inodes_sb(struct super_block *sb, enum wb_reason reason)
EXPORT_SYMBOL(writeback_inodes_sb);
/**
- * try_to_writeback_inodes_sb_nr - try to start writeback if none underway
+ * try_to_writeback_inodes_sb - try to start writeback if none underway
* @sb: the superblock
- * @nr: the number of pages to write
- * @reason: the reason of writeback
+ * @reason: reason why some writeback work was initiated
*
- * Invoke writeback_inodes_sb_nr if no writeback is currently underway.
- * Returns 1 if writeback was started, 0 if not.
+ * Invoke __writeback_inodes_sb_nr if no writeback is currently underway.
*/
-bool try_to_writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr,
- enum wb_reason reason)
+void try_to_writeback_inodes_sb(struct super_block *sb, enum wb_reason reason)
{
if (!down_read_trylock(&sb->s_umount))
- return false;
+ return;
- __writeback_inodes_sb_nr(sb, nr, reason, true);
+ __writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason, true);
up_read(&sb->s_umount);
- return true;
-}
-EXPORT_SYMBOL(try_to_writeback_inodes_sb_nr);
-
-/**
- * try_to_writeback_inodes_sb - try to start writeback if none underway
- * @sb: the superblock
- * @reason: reason why some writeback work was initiated
- *
- * Implement by try_to_writeback_inodes_sb_nr()
- * Returns 1 if writeback was started, 0 if not.
- */
-bool try_to_writeback_inodes_sb(struct super_block *sb, enum wb_reason reason)
-{
- return try_to_writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason);
}
EXPORT_SYMBOL(try_to_writeback_inodes_sb);