summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-12-17 08:27:23 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2012-12-17 08:27:23 -0800
commit60da5bf47dd3d301a1d3bd4f0a4b9e29a184515c (patch)
tree30de83370440aae5350d9ab3fbe6583abd439ee8 /mm
parent3c2e81ef344a90bb0a39d84af6878b4aeff568a2 (diff)
parentcbae8d45d61f3a8c155caf267d01e5e0f0b2f4b7 (diff)
Merge branch 'for-3.8/core' of git://git.kernel.dk/linux-block
Pull block layer core updates from Jens Axboe: "Here are the core block IO bits for 3.8. The branch contains: - The final version of the surprise device removal fixups from Bart. - Don't hide EFI partitions under advanced partition types. It's fairly wide spread these days. This is especially dangerous for systems that have both msdos and efi partition tables, where you want to keep them in sync. - Cleanup of using -1 instead of the proper NUMA_NO_NODE - Export control of bdi flusher thread CPU mask and default to using the home node (if known) from Jeff. - Export unplug tracepoint for MD. - Core improvements from Shaohua. Reinstate the recursive merge, as the original bug has been fixed. Add plugging for discard and also fix a problem handling non pow-of-2 discard limits. There's a trivial merge in block/blk-exec.c due to a fix that went into 3.7-rc at a later point than -rc4 where this is based." * 'for-3.8/core' of git://git.kernel.dk/linux-block: block: export block_unplug tracepoint block: add plug for blkdev_issue_discard block: discard granularity might not be power of 2 deadline: Allow 0ms deadline latency, increase the read speed partitions: enable EFI/GPT support by default bsg: Remove unused function bsg_goose_queue() block: Make blk_cleanup_queue() wait until request_fn finished block: Avoid scheduling delayed work on a dead queue block: Avoid that request_fn is invoked on a dead queue block: Let blk_drain_queue() caller obtain the queue lock block: Rename queue dead flag bdi: add a user-tunable cpu_list for the bdi flusher threads block: use NUMA_NO_NODE instead of -1 block: recursive merge requests block CFQ: avoid moving request to different queue
Diffstat (limited to 'mm')
-rw-r--r--mm/backing-dev.c84
1 files changed, 84 insertions, 0 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index d3ca2b3ee176..bd6a6cabef71 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -10,6 +10,7 @@
#include <linux/module.h>
#include <linux/writeback.h>
#include <linux/device.h>
+#include <linux/slab.h>
#include <trace/events/writeback.h>
static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
@@ -221,12 +222,63 @@ static ssize_t max_ratio_store(struct device *dev,
}
BDI_SHOW(max_ratio, bdi->max_ratio)
+static ssize_t cpu_list_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct backing_dev_info *bdi = dev_get_drvdata(dev);
+ struct bdi_writeback *wb = &bdi->wb;
+ cpumask_var_t newmask;
+ ssize_t ret;
+ struct task_struct *task;
+
+ if (!alloc_cpumask_var(&newmask, GFP_KERNEL))
+ return -ENOMEM;
+
+ ret = cpulist_parse(buf, newmask);
+ if (!ret) {
+ spin_lock_bh(&bdi->wb_lock);
+ task = wb->task;
+ if (task)
+ get_task_struct(task);
+ spin_unlock_bh(&bdi->wb_lock);
+
+ mutex_lock(&bdi->flusher_cpumask_lock);
+ if (task) {
+ ret = set_cpus_allowed_ptr(task, newmask);
+ put_task_struct(task);
+ }
+ if (ret == 0) {
+ cpumask_copy(bdi->flusher_cpumask, newmask);
+ ret = count;
+ }
+ mutex_unlock(&bdi->flusher_cpumask_lock);
+
+ }
+ free_cpumask_var(newmask);
+
+ return ret;
+}
+
+static ssize_t cpu_list_show(struct device *dev,
+ struct device_attribute *attr, char *page)
+{
+ struct backing_dev_info *bdi = dev_get_drvdata(dev);
+ ssize_t ret;
+
+ mutex_lock(&bdi->flusher_cpumask_lock);
+ ret = cpulist_scnprintf(page, PAGE_SIZE-1, bdi->flusher_cpumask);
+ mutex_unlock(&bdi->flusher_cpumask_lock);
+
+ return ret;
+}
+
#define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store)
static struct device_attribute bdi_dev_attrs[] = {
__ATTR_RW(read_ahead_kb),
__ATTR_RW(min_ratio),
__ATTR_RW(max_ratio),
+ __ATTR_RW(cpu_list),
__ATTR_NULL,
};
@@ -428,6 +480,7 @@ static int bdi_forker_thread(void *ptr)
writeback_inodes_wb(&bdi->wb, 1024,
WB_REASON_FORKER_THREAD);
} else {
+ int ret;
/*
* The spinlock makes sure we do not lose
* wake-ups when racing with 'bdi_queue_work()'.
@@ -437,6 +490,14 @@ static int bdi_forker_thread(void *ptr)
spin_lock_bh(&bdi->wb_lock);
bdi->wb.task = task;
spin_unlock_bh(&bdi->wb_lock);
+ mutex_lock(&bdi->flusher_cpumask_lock);
+ ret = set_cpus_allowed_ptr(task,
+ bdi->flusher_cpumask);
+ mutex_unlock(&bdi->flusher_cpumask_lock);
+ if (ret)
+ printk_once("%s: failed to bind flusher"
+ " thread %s, error %d\n",
+ __func__, task->comm, ret);
wake_up_process(task);
}
bdi_clear_pending(bdi);
@@ -509,6 +570,17 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
dev_name(dev));
if (IS_ERR(wb->task))
return PTR_ERR(wb->task);
+ } else {
+ int node;
+ /*
+ * Set up a default cpumask for the flusher threads that
+ * includes all cpus on the same numa node as the device.
+ * The mask may be overridden via sysfs.
+ */
+ node = dev_to_node(bdi->dev);
+ if (node != NUMA_NO_NODE)
+ cpumask_copy(bdi->flusher_cpumask,
+ cpumask_of_node(node));
}
bdi_debug_register(bdi, dev_name(dev));
@@ -634,6 +706,15 @@ int bdi_init(struct backing_dev_info *bdi)
bdi_wb_init(&bdi->wb, bdi);
+ if (!bdi_cap_flush_forker(bdi)) {
+ bdi->flusher_cpumask = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
+ if (!bdi->flusher_cpumask)
+ return -ENOMEM;
+ cpumask_setall(bdi->flusher_cpumask);
+ mutex_init(&bdi->flusher_cpumask_lock);
+ } else
+ bdi->flusher_cpumask = NULL;
+
for (i = 0; i < NR_BDI_STAT_ITEMS; i++) {
err = percpu_counter_init(&bdi->bdi_stat[i], 0);
if (err)
@@ -656,6 +737,7 @@ int bdi_init(struct backing_dev_info *bdi)
err:
while (i--)
percpu_counter_destroy(&bdi->bdi_stat[i]);
+ kfree(bdi->flusher_cpumask);
}
return err;
@@ -683,6 +765,8 @@ void bdi_destroy(struct backing_dev_info *bdi)
bdi_unregister(bdi);
+ kfree(bdi->flusher_cpumask);
+
/*
* If bdi_unregister() had already been called earlier, the
* wakeup_timer could still be armed because bdi_prune_sb()