summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-02-10 14:05:11 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2018-02-10 14:05:11 -0800
commit9454473c9dccb7b9d25e5baf915a082bfd490b33 (patch)
tree46f7f1a8886088e2f0184f1cf0e47c8ac12d4849
parentcc5cb5af3a3363bc6f0530703895bf9c5fa2f159 (diff)
parent8525e5ff456592effe83640ea1702525e35b0363 (diff)
Merge tag 'for-linus-20180210' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe: "A few fixes to round off the merge window on the block side: - a set of bcache fixes by way of Michael Lyle, from the usual bcache suspects. - add a simple-to-hook-into function for bpf EIO error injection. - fix blk-wbt that mischarectized flushes as reads. Improve the logic so that flushes and writes are accounted as writes, and only reads as reads. From me. - fix requeue crash in BFQ, from Paolo" * tag 'for-linus-20180210' of git://git.kernel.dk/linux-block: block, bfq: add requeue-request hook bcache: fix for data collapse after re-attaching an attached device bcache: return attach error when no cache set exist bcache: set writeback_rate_update_seconds in range [1, 60] seconds bcache: fix for allocator and register thread race bcache: set error_limit correctly bcache: properly set task state in bch_writeback_thread() bcache: fix high CPU occupancy during journal bcache: add journal statistic block: Add should_fail_bio() for bpf error injection blk-wbt: account flush requests correctly
-rw-r--r--block/bfq-iosched.c107
-rw-r--r--block/blk-core.c11
-rw-r--r--block/blk-wbt.c10
-rw-r--r--drivers/md/bcache/alloc.c4
-rw-r--r--drivers/md/bcache/bcache.h9
-rw-r--r--drivers/md/bcache/btree.c9
-rw-r--r--drivers/md/bcache/journal.c52
-rw-r--r--drivers/md/bcache/super.c25
-rw-r--r--drivers/md/bcache/sysfs.c34
-rw-r--r--drivers/md/bcache/util.h2
-rw-r--r--drivers/md/bcache/writeback.c9
-rw-r--r--drivers/md/bcache/writeback.h3
12 files changed, 212 insertions, 63 deletions
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 47e6ec7427c4..aeca22d91101 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -3823,24 +3823,26 @@ static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
}
/*
- * We exploit the bfq_finish_request hook to decrement
- * rq_in_driver, but bfq_finish_request will not be
- * invoked on this request. So, to avoid unbalance,
- * just start this request, without incrementing
- * rq_in_driver. As a negative consequence,
- * rq_in_driver is deceptively lower than it should be
- * while this request is in service. This may cause
- * bfq_schedule_dispatch to be invoked uselessly.
+ * We exploit the bfq_finish_requeue_request hook to
+ * decrement rq_in_driver, but
+ * bfq_finish_requeue_request will not be invoked on
+ * this request. So, to avoid unbalance, just start
+ * this request, without incrementing rq_in_driver. As
+ * a negative consequence, rq_in_driver is deceptively
+ * lower than it should be while this request is in
+ * service. This may cause bfq_schedule_dispatch to be
+ * invoked uselessly.
*
* As for implementing an exact solution, the
- * bfq_finish_request hook, if defined, is probably
- * invoked also on this request. So, by exploiting
- * this hook, we could 1) increment rq_in_driver here,
- * and 2) decrement it in bfq_finish_request. Such a
- * solution would let the value of the counter be
- * always accurate, but it would entail using an extra
- * interface function. This cost seems higher than the
- * benefit, being the frequency of non-elevator-private
+ * bfq_finish_requeue_request hook, if defined, is
+ * probably invoked also on this request. So, by
+ * exploiting this hook, we could 1) increment
+ * rq_in_driver here, and 2) decrement it in
+ * bfq_finish_requeue_request. Such a solution would
+ * let the value of the counter be always accurate,
+ * but it would entail using an extra interface
+ * function. This cost seems higher than the benefit,
+ * being the frequency of non-elevator-private
* requests very low.
*/
goto start_rq;
@@ -4515,6 +4517,8 @@ static inline void bfq_update_insert_stats(struct request_queue *q,
unsigned int cmd_flags) {}
#endif
+static void bfq_prepare_request(struct request *rq, struct bio *bio);
+
static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
bool at_head)
{
@@ -4541,6 +4545,18 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
else
list_add_tail(&rq->queuelist, &bfqd->dispatch);
} else {
+ if (WARN_ON_ONCE(!bfqq)) {
+ /*
+ * This should never happen. Most likely rq is
+ * a requeued regular request, being
+ * re-inserted without being first
+ * re-prepared. Do a prepare, to avoid
+ * failure.
+ */
+ bfq_prepare_request(rq, rq->bio);
+ bfqq = RQ_BFQQ(rq);
+ }
+
idle_timer_disabled = __bfq_insert_request(bfqd, rq);
/*
* Update bfqq, because, if a queue merge has occurred
@@ -4697,22 +4713,44 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd)
bfq_schedule_dispatch(bfqd);
}
-static void bfq_finish_request_body(struct bfq_queue *bfqq)
+static void bfq_finish_requeue_request_body(struct bfq_queue *bfqq)
{
bfqq->allocated--;
bfq_put_queue(bfqq);
}
-static void bfq_finish_request(struct request *rq)
+/*
+ * Handle either a requeue or a finish for rq. The things to do are
+ * the same in both cases: all references to rq are to be dropped. In
+ * particular, rq is considered completed from the point of view of
+ * the scheduler.
+ */
+static void bfq_finish_requeue_request(struct request *rq)
{
- struct bfq_queue *bfqq;
+ struct bfq_queue *bfqq = RQ_BFQQ(rq);
struct bfq_data *bfqd;
- if (!rq->elv.icq)
+ /*
+ * Requeue and finish hooks are invoked in blk-mq without
+ * checking whether the involved request is actually still
+ * referenced in the scheduler. To handle this fact, the
+ * following two checks make this function exit in case of
+ * spurious invocations, for which there is nothing to do.
+ *
+ * First, check whether rq has nothing to do with an elevator.
+ */
+ if (unlikely(!(rq->rq_flags & RQF_ELVPRIV)))
+ return;
+
+ /*
+ * rq either is not associated with any icq, or is an already
+ * requeued request that has not (yet) been re-inserted into
+ * a bfq_queue.
+ */
+ if (!rq->elv.icq || !bfqq)
return;
- bfqq = RQ_BFQQ(rq);
bfqd = bfqq->bfqd;
if (rq->rq_flags & RQF_STARTED)
@@ -4727,13 +4765,14 @@ static void bfq_finish_request(struct request *rq)
spin_lock_irqsave(&bfqd->lock, flags);
bfq_completed_request(bfqq, bfqd);
- bfq_finish_request_body(bfqq);
+ bfq_finish_requeue_request_body(bfqq);
spin_unlock_irqrestore(&bfqd->lock, flags);
} else {
/*
* Request rq may be still/already in the scheduler,
- * in which case we need to remove it. And we cannot
+ * in which case we need to remove it (this should
+ * never happen in case of requeue). And we cannot
* defer such a check and removal, to avoid
* inconsistencies in the time interval from the end
* of this function to the start of the deferred work.
@@ -4748,9 +4787,26 @@ static void bfq_finish_request(struct request *rq)
bfqg_stats_update_io_remove(bfqq_group(bfqq),
rq->cmd_flags);
}
- bfq_finish_request_body(bfqq);
+ bfq_finish_requeue_request_body(bfqq);
}
+ /*
+ * Reset private fields. In case of a requeue, this allows
+ * this function to correctly do nothing if it is spuriously
+ * invoked again on this same request (see the check at the
+ * beginning of the function). Probably, a better general
+ * design would be to prevent blk-mq from invoking the requeue
+ * or finish hooks of an elevator, for a request that is not
+ * referred by that elevator.
+ *
+ * Resetting the following fields would break the
+ * request-insertion logic if rq is re-inserted into a bfq
+ * internal queue, without a re-preparation. Here we assume
+ * that re-insertions of requeued requests, without
+ * re-preparation, can happen only for pass_through or at_head
+ * requests (which are not re-inserted into bfq internal
+ * queues).
+ */
rq->elv.priv[0] = NULL;
rq->elv.priv[1] = NULL;
}
@@ -5426,7 +5482,8 @@ static struct elevator_type iosched_bfq_mq = {
.ops.mq = {
.limit_depth = bfq_limit_depth,
.prepare_request = bfq_prepare_request,
- .finish_request = bfq_finish_request,
+ .requeue_request = bfq_finish_requeue_request,
+ .finish_request = bfq_finish_requeue_request,
.exit_icq = bfq_exit_icq,
.insert_requests = bfq_insert_requests,
.dispatch_request = bfq_dispatch_request,
diff --git a/block/blk-core.c b/block/blk-core.c
index d0d104268f1a..2d1a7bbe0634 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -34,6 +34,7 @@
#include <linux/pm_runtime.h>
#include <linux/blk-cgroup.h>
#include <linux/debugfs.h>
+#include <linux/bpf.h>
#define CREATE_TRACE_POINTS
#include <trace/events/block.h>
@@ -2083,6 +2084,14 @@ static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part)
return false;
}
+static noinline int should_fail_bio(struct bio *bio)
+{
+ if (should_fail_request(&bio->bi_disk->part0, bio->bi_iter.bi_size))
+ return -EIO;
+ return 0;
+}
+ALLOW_ERROR_INJECTION(should_fail_bio, ERRNO);
+
/*
* Remap block n of partition p to block n+start(p) of the disk.
*/
@@ -2174,7 +2183,7 @@ generic_make_request_checks(struct bio *bio)
if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_rq_based(q))
goto not_supported;
- if (should_fail_request(&bio->bi_disk->part0, bio->bi_iter.bi_size))
+ if (should_fail_bio(bio))
goto end_io;
if (!bio->bi_partno) {
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index ae8de9780085..f92fc84b5e2c 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -697,7 +697,15 @@ u64 wbt_default_latency_nsec(struct request_queue *q)
static int wbt_data_dir(const struct request *rq)
{
- return rq_data_dir(rq);
+ const int op = req_op(rq);
+
+ if (op == REQ_OP_READ)
+ return READ;
+ else if (op == REQ_OP_WRITE || op == REQ_OP_FLUSH)
+ return WRITE;
+
+ /* don't account */
+ return -1;
}
int wbt_init(struct request_queue *q)
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 6cc6c0f9c3a9..458e1d38577d 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -287,8 +287,10 @@ do { \
break; \
\
mutex_unlock(&(ca)->set->bucket_lock); \
- if (kthread_should_stop()) \
+ if (kthread_should_stop()) { \
+ set_current_state(TASK_RUNNING); \
return 0; \
+ } \
\
schedule(); \
mutex_lock(&(ca)->set->bucket_lock); \
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 5e2d4e80198e..12e5197f186c 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -658,10 +658,15 @@ struct cache_set {
atomic_long_t writeback_keys_done;
atomic_long_t writeback_keys_failed;
+ atomic_long_t reclaim;
+ atomic_long_t flush_write;
+ atomic_long_t retry_flush_write;
+
enum {
ON_ERROR_UNREGISTER,
ON_ERROR_PANIC,
} on_error;
+#define DEFAULT_IO_ERROR_LIMIT 8
unsigned error_limit;
unsigned error_decay;
@@ -675,6 +680,8 @@ struct cache_set {
#define BUCKET_HASH_BITS 12
struct hlist_head bucket_hash[1 << BUCKET_HASH_BITS];
+
+ DECLARE_HEAP(struct btree *, flush_btree);
};
struct bbio {
@@ -917,7 +924,7 @@ void bcache_write_super(struct cache_set *);
int bch_flash_dev_create(struct cache_set *c, uint64_t size);
-int bch_cached_dev_attach(struct cached_dev *, struct cache_set *);
+int bch_cached_dev_attach(struct cached_dev *, struct cache_set *, uint8_t *);
void bch_cached_dev_detach(struct cached_dev *);
void bch_cached_dev_run(struct cached_dev *);
void bcache_device_stop(struct bcache_device *);
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index bf3a48aa9a9a..fad9fe8817eb 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -1869,14 +1869,17 @@ void bch_initial_gc_finish(struct cache_set *c)
*/
for_each_cache(ca, c, i) {
for_each_bucket(b, ca) {
- if (fifo_full(&ca->free[RESERVE_PRIO]))
+ if (fifo_full(&ca->free[RESERVE_PRIO]) &&
+ fifo_full(&ca->free[RESERVE_BTREE]))
break;
if (bch_can_invalidate_bucket(ca, b) &&
!GC_MARK(b)) {
__bch_invalidate_one_bucket(ca, b);
- fifo_push(&ca->free[RESERVE_PRIO],
- b - ca->buckets);
+ if (!fifo_push(&ca->free[RESERVE_PRIO],
+ b - ca->buckets))
+ fifo_push(&ca->free[RESERVE_BTREE],
+ b - ca->buckets);
}
}
}
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index a87165c1d8e5..1b736b860739 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -368,6 +368,12 @@ err:
}
/* Journalling */
+#define journal_max_cmp(l, r) \
+ (fifo_idx(&c->journal.pin, btree_current_write(l)->journal) < \
+ fifo_idx(&(c)->journal.pin, btree_current_write(r)->journal))
+#define journal_min_cmp(l, r) \
+ (fifo_idx(&c->journal.pin, btree_current_write(l)->journal) > \
+ fifo_idx(&(c)->journal.pin, btree_current_write(r)->journal))
static void btree_flush_write(struct cache_set *c)
{
@@ -375,28 +381,41 @@ static void btree_flush_write(struct cache_set *c)
* Try to find the btree node with that references the oldest journal
* entry, best is our current candidate and is locked if non NULL:
*/
- struct btree *b, *best;
- unsigned i;
+ struct btree *b;
+ int i;
+
+ atomic_long_inc(&c->flush_write);
+
retry:
- best = NULL;
-
- for_each_cached_btree(b, c, i)
- if (btree_current_write(b)->journal) {
- if (!best)
- best = b;
- else if (journal_pin_cmp(c,
- btree_current_write(best)->journal,
- btree_current_write(b)->journal)) {
- best = b;
+ spin_lock(&c->journal.lock);
+ if (heap_empty(&c->flush_btree)) {
+ for_each_cached_btree(b, c, i)
+ if (btree_current_write(b)->journal) {
+ if (!heap_full(&c->flush_btree))
+ heap_add(&c->flush_btree, b,
+ journal_max_cmp);
+ else if (journal_max_cmp(b,
+ heap_peek(&c->flush_btree))) {
+ c->flush_btree.data[0] = b;
+ heap_sift(&c->flush_btree, 0,
+ journal_max_cmp);
+ }
}
- }
- b = best;
+ for (i = c->flush_btree.used / 2 - 1; i >= 0; --i)
+ heap_sift(&c->flush_btree, i, journal_min_cmp);
+ }
+
+ b = NULL;
+ heap_pop(&c->flush_btree, b, journal_min_cmp);
+ spin_unlock(&c->journal.lock);
+
if (b) {
mutex_lock(&b->write_lock);
if (!btree_current_write(b)->journal) {
mutex_unlock(&b->write_lock);
/* We raced */
+ atomic_long_inc(&c->retry_flush_write);
goto retry;
}
@@ -476,6 +495,8 @@ static void journal_reclaim(struct cache_set *c)
unsigned iter, n = 0;
atomic_t p;
+ atomic_long_inc(&c->reclaim);
+
while (!atomic_read(&fifo_front(&c->journal.pin)))
fifo_pop(&c->journal.pin, p);
@@ -819,7 +840,8 @@ int bch_journal_alloc(struct cache_set *c)
j->w[0].c = c;
j->w[1].c = c;
- if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) ||
+ if (!(init_heap(&c->flush_btree, 128, GFP_KERNEL)) ||
+ !(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) ||
!(j->w[0].data = (void *) __get_free_pages(GFP_KERNEL, JSET_BITS)) ||
!(j->w[1].data = (void *) __get_free_pages(GFP_KERNEL, JSET_BITS)))
return -ENOMEM;
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 133b81225ea9..312895788036 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -957,7 +957,8 @@ void bch_cached_dev_detach(struct cached_dev *dc)
cached_dev_put(dc);
}
-int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
+int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
+ uint8_t *set_uuid)
{
uint32_t rtime = cpu_to_le32(get_seconds());
struct uuid_entry *u;
@@ -965,7 +966,8 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
bdevname(dc->bdev, buf);
- if (memcmp(dc->sb.set_uuid, c->sb.set_uuid, 16))
+ if ((set_uuid && memcmp(set_uuid, c->sb.set_uuid, 16)) ||
+ (!set_uuid && memcmp(dc->sb.set_uuid, c->sb.set_uuid, 16)))
return -ENOENT;
if (dc->disk.c) {
@@ -1194,7 +1196,7 @@ static void register_bdev(struct cache_sb *sb, struct page *sb_page,
list_add(&dc->list, &uncached_devices);
list_for_each_entry(c, &bch_cache_sets, list)
- bch_cached_dev_attach(dc, c);
+ bch_cached_dev_attach(dc, c, NULL);
if (BDEV_STATE(&dc->sb) == BDEV_STATE_NONE ||
BDEV_STATE(&dc->sb) == BDEV_STATE_STALE)
@@ -1553,7 +1555,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
c->congested_read_threshold_us = 2000;
c->congested_write_threshold_us = 20000;
- c->error_limit = 8 << IO_ERROR_SHIFT;
+ c->error_limit = DEFAULT_IO_ERROR_LIMIT;
return c;
err:
@@ -1716,7 +1718,7 @@ static void run_cache_set(struct cache_set *c)
bcache_write_super(c);
list_for_each_entry_safe(dc, t, &uncached_devices, list)
- bch_cached_dev_attach(dc, c);
+ bch_cached_dev_attach(dc, c, NULL);
flash_devs_run(c);
@@ -1833,6 +1835,7 @@ void bch_cache_release(struct kobject *kobj)
static int cache_alloc(struct cache *ca)
{
size_t free;
+ size_t btree_buckets;
struct bucket *b;
__module_get(THIS_MODULE);
@@ -1840,9 +1843,19 @@ static int cache_alloc(struct cache *ca)
bio_init(&ca->journal.bio, ca->journal.bio.bi_inline_vecs, 8);
+ /*
+ * when ca->sb.njournal_buckets is not zero, journal exists,
+ * and in bch_journal_replay(), tree node may split,
+ * so bucket of RESERVE_BTREE type is needed,
+ * the worst situation is all journal buckets are valid journal,
+ * and all the keys need to replay,
+ * so the number of RESERVE_BTREE type buckets should be as much
+ * as journal buckets
+ */
+ btree_buckets = ca->sb.njournal_buckets ?: 8;
free = roundup_pow_of_two(ca->sb.nbuckets) >> 10;
- if (!init_fifo(&ca->free[RESERVE_BTREE], 8, GFP_KERNEL) ||
+ if (!init_fifo(&ca->free[RESERVE_BTREE], btree_buckets, GFP_KERNEL) ||
!init_fifo_exact(&ca->free[RESERVE_PRIO], prio_buckets(ca), GFP_KERNEL) ||
!init_fifo(&ca->free[RESERVE_MOVINGGC], free, GFP_KERNEL) ||
!init_fifo(&ca->free[RESERVE_NONE], free, GFP_KERNEL) ||
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index b4184092c727..78cd7bd50fdd 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -65,6 +65,9 @@ read_attribute(bset_tree_stats);
read_attribute(state);
read_attribute(cache_read_races);
+read_attribute(reclaim);
+read_attribute(flush_write);
+read_attribute(retry_flush_write);
read_attribute(writeback_keys_done);
read_attribute(writeback_keys_failed);
read_attribute(io_errors);
@@ -195,7 +198,7 @@ STORE(__cached_dev)
{
struct cached_dev *dc = container_of(kobj, struct cached_dev,
disk.kobj);
- ssize_t v = size;
+ ssize_t v;
struct cache_set *c;
struct kobj_uevent_env *env;
@@ -215,7 +218,9 @@ STORE(__cached_dev)
sysfs_strtoul_clamp(writeback_rate,
dc->writeback_rate.rate, 1, INT_MAX);
- d_strtoul_nonzero(writeback_rate_update_seconds);
+ sysfs_strtoul_clamp(writeback_rate_update_seconds,
+ dc->writeback_rate_update_seconds,
+ 1, WRITEBACK_RATE_UPDATE_SECS_MAX);
d_strtoul(writeback_rate_i_term_inverse);
d_strtoul_nonzero(writeback_rate_p_term_inverse);
@@ -267,17 +272,20 @@ STORE(__cached_dev)
}
if (attr == &sysfs_attach) {
- if (bch_parse_uuid(buf, dc->sb.set_uuid) < 16)
+ uint8_t set_uuid[16];
+
+ if (bch_parse_uuid(buf, set_uuid) < 16)
return -EINVAL;
+ v = -ENOENT;
list_for_each_entry(c, &bch_cache_sets, list) {
- v = bch_cached_dev_attach(dc, c);
+ v = bch_cached_dev_attach(dc, c, set_uuid);
if (!v)
return size;
}
pr_err("Can't attach %s: cache set not found", buf);
- size = v;
+ return v;
}
if (attr == &sysfs_detach && dc->disk.c)
@@ -545,6 +553,15 @@ SHOW(__bch_cache_set)
sysfs_print(cache_read_races,
atomic_long_read(&c->cache_read_races));
+ sysfs_print(reclaim,
+ atomic_long_read(&c->reclaim));
+
+ sysfs_print(flush_write,
+ atomic_long_read(&c->flush_write));
+
+ sysfs_print(retry_flush_write,
+ atomic_long_read(&c->retry_flush_write));
+
sysfs_print(writeback_keys_done,
atomic_long_read(&c->writeback_keys_done));
sysfs_print(writeback_keys_failed,
@@ -556,7 +573,7 @@ SHOW(__bch_cache_set)
/* See count_io_errors for why 88 */
sysfs_print(io_error_halflife, c->error_decay * 88);
- sysfs_print(io_error_limit, c->error_limit >> IO_ERROR_SHIFT);
+ sysfs_print(io_error_limit, c->error_limit);
sysfs_hprint(congested,
((uint64_t) bch_get_congested(c)) << 9);
@@ -656,7 +673,7 @@ STORE(__bch_cache_set)
}
if (attr == &sysfs_io_error_limit)
- c->error_limit = strtoul_or_return(buf) << IO_ERROR_SHIFT;
+ c->error_limit = strtoul_or_return(buf);
/* See count_io_errors() for why 88 */
if (attr == &sysfs_io_error_halflife)
@@ -731,6 +748,9 @@ static struct attribute *bch_cache_set_internal_files[] = {
&sysfs_bset_tree_stats,
&sysfs_cache_read_races,
+ &sysfs_reclaim,
+ &sysfs_flush_write,
+ &sysfs_retry_flush_write,
&sysfs_writeback_keys_done,
&sysfs_writeback_keys_failed,
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
index 4df4c5c1cab2..a6763db7f061 100644
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -112,6 +112,8 @@ do { \
#define heap_full(h) ((h)->used == (h)->size)
+#define heap_empty(h) ((h)->used == 0)
+
#define DECLARE_FIFO(type, name) \
struct { \
size_t front, back, size, mask; \
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 51306a19ab03..f1d2fc15abcc 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -564,18 +564,21 @@ static int bch_writeback_thread(void *arg)
while (!kthread_should_stop()) {
down_write(&dc->writeback_lock);
+ set_current_state(TASK_INTERRUPTIBLE);
if (!atomic_read(&dc->has_dirty) ||
(!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) &&
!dc->writeback_running)) {
up_write(&dc->writeback_lock);
- set_current_state(TASK_INTERRUPTIBLE);
- if (kthread_should_stop())
+ if (kthread_should_stop()) {
+ set_current_state(TASK_RUNNING);
return 0;
+ }
schedule();
continue;
}
+ set_current_state(TASK_RUNNING);
searched_full_index = refill_dirty(dc);
@@ -652,7 +655,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc)
dc->writeback_rate.rate = 1024;
dc->writeback_rate_minimum = 8;
- dc->writeback_rate_update_seconds = 5;
+ dc->writeback_rate_update_seconds = WRITEBACK_RATE_UPDATE_SECS_DEFAULT;
dc->writeback_rate_p_term_inverse = 40;
dc->writeback_rate_i_term_inverse = 10000;
diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
index 66f1c527fa24..587b25599856 100644
--- a/drivers/md/bcache/writeback.h
+++ b/drivers/md/bcache/writeback.h
@@ -8,6 +8,9 @@
#define MAX_WRITEBACKS_IN_PASS 5
#define MAX_WRITESIZE_IN_PASS 5000 /* *512b */
+#define WRITEBACK_RATE_UPDATE_SECS_MAX 60
+#define WRITEBACK_RATE_UPDATE_SECS_DEFAULT 5
+
/*
* 14 (16384ths) is chosen here as something that each backing device
* should be a reasonable fraction of the share, and not to blow up