summaryrefslogtreecommitdiff
path: root/mm/backing-dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/backing-dev.c')
-rw-r--r--mm/backing-dev.c246
1 files changed, 168 insertions, 78 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 3ffc3cfa7a14..c5740c6d37a2 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -16,6 +16,7 @@
#include <linux/writeback.h>
#include <linux/device.h>
#include <trace/events/writeback.h>
+#include "internal.h"
struct backing_dev_info noop_backing_dev_info;
EXPORT_SYMBOL_GPL(noop_backing_dev_info);
@@ -34,12 +35,23 @@ LIST_HEAD(bdi_list);
/* bdi_wq serves all asynchronous writeback tasks */
struct workqueue_struct *bdi_wq;
-#define K(x) ((x) << (PAGE_SHIFT - 10))
-
#ifdef CONFIG_DEBUG_FS
#include <linux/debugfs.h>
#include <linux/seq_file.h>
+struct wb_stats {
+ unsigned long nr_dirty;
+ unsigned long nr_io;
+ unsigned long nr_more_io;
+ unsigned long nr_dirty_time;
+ unsigned long nr_writeback;
+ unsigned long nr_reclaimable;
+ unsigned long nr_dirtied;
+ unsigned long nr_written;
+ unsigned long dirty_thresh;
+ unsigned long wb_thresh;
+};
+
static struct dentry *bdi_debug_root;
static void bdi_debug_init(void)
@@ -47,31 +59,68 @@ static void bdi_debug_init(void)
bdi_debug_root = debugfs_create_dir("bdi", NULL);
}
-static int bdi_debug_stats_show(struct seq_file *m, void *v)
+static void collect_wb_stats(struct wb_stats *stats,
+ struct bdi_writeback *wb)
{
- struct backing_dev_info *bdi = m->private;
- struct bdi_writeback *wb = &bdi->wb;
- unsigned long background_thresh;
- unsigned long dirty_thresh;
- unsigned long wb_thresh;
- unsigned long nr_dirty, nr_io, nr_more_io, nr_dirty_time;
struct inode *inode;
- nr_dirty = nr_io = nr_more_io = nr_dirty_time = 0;
spin_lock(&wb->list_lock);
list_for_each_entry(inode, &wb->b_dirty, i_io_list)
- nr_dirty++;
+ stats->nr_dirty++;
list_for_each_entry(inode, &wb->b_io, i_io_list)
- nr_io++;
+ stats->nr_io++;
list_for_each_entry(inode, &wb->b_more_io, i_io_list)
- nr_more_io++;
+ stats->nr_more_io++;
list_for_each_entry(inode, &wb->b_dirty_time, i_io_list)
- if (inode->i_state & I_DIRTY_TIME)
- nr_dirty_time++;
+ if (inode_state_read_once(inode) & I_DIRTY_TIME)
+ stats->nr_dirty_time++;
spin_unlock(&wb->list_lock);
+ stats->nr_writeback += wb_stat(wb, WB_WRITEBACK);
+ stats->nr_reclaimable += wb_stat(wb, WB_RECLAIMABLE);
+ stats->nr_dirtied += wb_stat(wb, WB_DIRTIED);
+ stats->nr_written += wb_stat(wb, WB_WRITTEN);
+ stats->wb_thresh += wb_calc_thresh(wb, stats->dirty_thresh);
+}
+
+#ifdef CONFIG_CGROUP_WRITEBACK
+static void bdi_collect_stats(struct backing_dev_info *bdi,
+ struct wb_stats *stats)
+{
+ struct bdi_writeback *wb;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(wb, &bdi->wb_list, bdi_node) {
+ if (!wb_tryget(wb))
+ continue;
+
+ collect_wb_stats(stats, wb);
+ wb_put(wb);
+ }
+ rcu_read_unlock();
+}
+#else
+static void bdi_collect_stats(struct backing_dev_info *bdi,
+ struct wb_stats *stats)
+{
+ collect_wb_stats(stats, &bdi->wb);
+}
+#endif
+
+static int bdi_debug_stats_show(struct seq_file *m, void *v)
+{
+ struct backing_dev_info *bdi = m->private;
+ unsigned long background_thresh;
+ unsigned long dirty_thresh;
+ struct wb_stats stats;
+ unsigned long tot_bw;
+
global_dirty_limits(&background_thresh, &dirty_thresh);
- wb_thresh = wb_calc_thresh(wb, dirty_thresh);
+
+ memset(&stats, 0, sizeof(stats));
+ stats.dirty_thresh = dirty_thresh;
+ bdi_collect_stats(bdi, &stats);
+ tot_bw = atomic_long_read(&bdi->tot_write_bandwidth);
seq_printf(m,
"BdiWriteback: %10lu kB\n"
@@ -88,37 +137,114 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
"b_dirty_time: %10lu\n"
"bdi_list: %10u\n"
"state: %10lx\n",
- (unsigned long) K(wb_stat(wb, WB_WRITEBACK)),
- (unsigned long) K(wb_stat(wb, WB_RECLAIMABLE)),
- K(wb_thresh),
+ K(stats.nr_writeback),
+ K(stats.nr_reclaimable),
+ K(stats.wb_thresh),
K(dirty_thresh),
K(background_thresh),
- (unsigned long) K(wb_stat(wb, WB_DIRTIED)),
- (unsigned long) K(wb_stat(wb, WB_WRITTEN)),
- (unsigned long) K(wb->write_bandwidth),
- nr_dirty,
- nr_io,
- nr_more_io,
- nr_dirty_time,
+ K(stats.nr_dirtied),
+ K(stats.nr_written),
+ K(tot_bw),
+ stats.nr_dirty,
+ stats.nr_io,
+ stats.nr_more_io,
+ stats.nr_dirty_time,
!list_empty(&bdi->bdi_list), bdi->wb.state);
return 0;
}
DEFINE_SHOW_ATTRIBUTE(bdi_debug_stats);
+static void wb_stats_show(struct seq_file *m, struct bdi_writeback *wb,
+ struct wb_stats *stats)
+{
+
+ seq_printf(m,
+ "WbCgIno: %10lu\n"
+ "WbWriteback: %10lu kB\n"
+ "WbReclaimable: %10lu kB\n"
+ "WbDirtyThresh: %10lu kB\n"
+ "WbDirtied: %10lu kB\n"
+ "WbWritten: %10lu kB\n"
+ "WbWriteBandwidth: %10lu kBps\n"
+ "b_dirty: %10lu\n"
+ "b_io: %10lu\n"
+ "b_more_io: %10lu\n"
+ "b_dirty_time: %10lu\n"
+ "state: %10lx\n\n",
+#ifdef CONFIG_CGROUP_WRITEBACK
+ cgroup_ino(wb->memcg_css->cgroup),
+#else
+ 1ul,
+#endif
+ K(stats->nr_writeback),
+ K(stats->nr_reclaimable),
+ K(stats->wb_thresh),
+ K(stats->nr_dirtied),
+ K(stats->nr_written),
+ K(wb->avg_write_bandwidth),
+ stats->nr_dirty,
+ stats->nr_io,
+ stats->nr_more_io,
+ stats->nr_dirty_time,
+ wb->state);
+}
+
+static int cgwb_debug_stats_show(struct seq_file *m, void *v)
+{
+ struct backing_dev_info *bdi = m->private;
+ unsigned long background_thresh;
+ unsigned long dirty_thresh;
+ struct bdi_writeback *wb;
+
+ global_dirty_limits(&background_thresh, &dirty_thresh);
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(wb, &bdi->wb_list, bdi_node) {
+ struct wb_stats stats = { .dirty_thresh = dirty_thresh };
+
+ if (!wb_tryget(wb))
+ continue;
+
+ collect_wb_stats(&stats, wb);
+
+ /*
+ * Calculate thresh of wb in writeback cgroup which is min of
+ * thresh in global domain and thresh in cgroup domain. Drop
+ * rcu lock because cgwb_calc_thresh may sleep in
+ * cgroup_rstat_flush. We can do so here because we have a ref.
+ */
+ if (mem_cgroup_wb_domain(wb)) {
+ rcu_read_unlock();
+ stats.wb_thresh = min(stats.wb_thresh, cgwb_calc_thresh(wb));
+ rcu_read_lock();
+ }
+
+ wb_stats_show(m, wb, &stats);
+
+ wb_put(wb);
+ }
+ rcu_read_unlock();
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(cgwb_debug_stats);
+
static void bdi_debug_register(struct backing_dev_info *bdi, const char *name)
{
bdi->debug_dir = debugfs_create_dir(name, bdi_debug_root);
debugfs_create_file("stats", 0444, bdi->debug_dir, bdi,
&bdi_debug_stats_fops);
+ debugfs_create_file("wb_stats", 0444, bdi->debug_dir, bdi,
+ &cgwb_debug_stats_fops);
}
static void bdi_debug_unregister(struct backing_dev_info *bdi)
{
debugfs_remove_recursive(bdi->debug_dir);
}
-#else
+#else /* CONFIG_DEBUG_FS */
static inline void bdi_debug_init(void)
{
}
@@ -129,7 +255,7 @@ static inline void bdi_debug_register(struct backing_dev_info *bdi,
static inline void bdi_debug_unregister(struct backing_dev_info *bdi)
{
}
-#endif
+#endif /* CONFIG_DEBUG_FS */
static ssize_t read_ahead_kb_store(struct device *dev,
struct device_attribute *attr,
@@ -373,31 +499,6 @@ static int __init default_bdi_init(void)
}
subsys_initcall(default_bdi_init);
-/*
- * This function is used when the first inode for this wb is marked dirty. It
- * wakes-up the corresponding bdi thread which should then take care of the
- * periodic background write-out of dirty inodes. Since the write-out would
- * starts only 'dirty_writeback_interval' centisecs from now anyway, we just
- * set up a timer which wakes the bdi thread up later.
- *
- * Note, we wouldn't bother setting up the timer, but this function is on the
- * fast-path (used by '__mark_inode_dirty()'), so we save few context switches
- * by delaying the wake-up.
- *
- * We have to be careful not to postpone flush work if it is scheduled for
- * earlier. Thus we use queue_delayed_work().
- */
-void wb_wakeup_delayed(struct bdi_writeback *wb)
-{
- unsigned long timeout;
-
- timeout = msecs_to_jiffies(dirty_writeback_interval * 10);
- spin_lock_irq(&wb->work_lock);
- if (test_bit(WB_registered, &wb->state))
- queue_delayed_work(bdi_wq, &wb->dwork, timeout);
- spin_unlock_irq(&wb->work_lock);
-}
-
static void wb_update_bandwidth_workfn(struct work_struct *work)
{
struct bdi_writeback *wb = container_of(to_delayed_work(work),
@@ -409,12 +510,12 @@ static void wb_update_bandwidth_workfn(struct work_struct *work)
/*
* Initial write bandwidth: 100 MB/s
*/
-#define INIT_BW (100 << (20 - PAGE_SHIFT))
+#define INIT_BW MB_TO_PAGES(100)
static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
gfp_t gfp)
{
- int i, err;
+ int err;
memset(wb, 0, sizeof(*wb));
@@ -437,24 +538,15 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
INIT_LIST_HEAD(&wb->work_list);
INIT_DELAYED_WORK(&wb->dwork, wb_workfn);
INIT_DELAYED_WORK(&wb->bw_dwork, wb_update_bandwidth_workfn);
- wb->dirty_sleep = jiffies;
err = fprop_local_init_percpu(&wb->completions, gfp);
if (err)
return err;
- for (i = 0; i < NR_WB_STAT_ITEMS; i++) {
- err = percpu_counter_init(&wb->stat[i], 0, gfp);
- if (err)
- goto out_destroy_stat;
- }
-
- return 0;
+ err = percpu_counter_init_many(wb->stat, 0, gfp, NR_WB_STAT_ITEMS);
+ if (err)
+ fprop_local_destroy_percpu(&wb->completions);
-out_destroy_stat:
- while (i--)
- percpu_counter_destroy(&wb->stat[i]);
- fprop_local_destroy_percpu(&wb->completions);
return err;
}
@@ -487,13 +579,8 @@ static void wb_shutdown(struct bdi_writeback *wb)
static void wb_exit(struct bdi_writeback *wb)
{
- int i;
-
WARN_ON(delayed_work_pending(&wb->dwork));
-
- for (i = 0; i < NR_WB_STAT_ITEMS; i++)
- percpu_counter_destroy(&wb->stat[i]);
-
+ percpu_counter_destroy_many(wb->stat, NR_WB_STAT_ITEMS);
fprop_local_destroy_percpu(&wb->completions);
}
@@ -546,6 +633,7 @@ static void cgwb_release_workfn(struct work_struct *work)
wb_exit(wb);
bdi_put(bdi);
WARN_ON_ONCE(!list_empty(&wb->b_attached));
+ WARN_ON_ONCE(work_pending(&wb->switch_work));
call_rcu(&wb->rcu, cgwb_free_rcu);
}
@@ -622,6 +710,8 @@ static int cgwb_create(struct backing_dev_info *bdi,
wb->memcg_css = memcg_css;
wb->blkcg_css = blkcg_css;
INIT_LIST_HEAD(&wb->b_attached);
+ INIT_WORK(&wb->switch_work, inode_switch_wbs_work_fn);
+ init_llist_head(&wb->switch_wbs_ctxs);
INIT_WORK(&wb->release_work, cgwb_release_workfn);
set_bit(WB_registered, &wb->state);
bdi_get(bdi);
@@ -733,9 +823,6 @@ struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi,
might_alloc(gfp);
- if (!memcg_css->parent)
- return &bdi->wb;
-
do {
wb = wb_get_lookup(bdi, memcg_css);
} while (!wb && !cgwb_create(bdi, memcg_css, gfp));
@@ -755,6 +842,8 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi)
if (!ret) {
bdi->wb.memcg_css = &root_mem_cgroup->css;
bdi->wb.blkcg_css = blkcg_root_css;
+ INIT_WORK(&bdi->wb.switch_work, inode_switch_wbs_work_fn);
+ init_llist_head(&bdi->wb.switch_wbs_ctxs);
}
return ret;
}
@@ -925,6 +1014,7 @@ int bdi_init(struct backing_dev_info *bdi)
INIT_LIST_HEAD(&bdi->bdi_list);
INIT_LIST_HEAD(&bdi->wb_list);
init_waitqueue_head(&bdi->wb_waitq);
+ bdi->last_bdp_sleep = jiffies;
return cgwb_bdi_init(bdi);
}
@@ -941,7 +1031,7 @@ struct backing_dev_info *bdi_alloc(int node_id)
kfree(bdi);
return NULL;
}
- bdi->capabilities = BDI_CAP_WRITEBACK | BDI_CAP_WRITEBACK_ACCT;
+ bdi->capabilities = BDI_CAP_WRITEBACK;
bdi->ra_pages = VM_READAHEAD_PAGES;
bdi->io_pages = VM_READAHEAD_PAGES;
timer_setup(&bdi->laptop_mode_wb_timer, laptop_mode_timer_fn, 0);
@@ -1066,7 +1156,7 @@ static void bdi_remove_from_list(struct backing_dev_info *bdi)
void bdi_unregister(struct backing_dev_info *bdi)
{
- del_timer_sync(&bdi->laptop_mode_wb_timer);
+ timer_delete_sync(&bdi->laptop_mode_wb_timer);
/* make sure nobody finds us on the bdi_list anymore */
bdi_remove_from_list(bdi);