summaryrefslogtreecommitdiff
path: root/drivers/md/bcache/alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/bcache/alloc.c')
-rw-r--r--drivers/md/bcache/alloc.c210
1 files changed, 118 insertions, 92 deletions
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index ca4abe1ccd8d..7708d92df23e 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Primary bucket allocation code
*
@@ -23,21 +24,18 @@
* Since the gens and priorities are all stored contiguously on disk, we can
* batch this up: We fill up the free_inc list with freshly invalidated buckets,
* call prio_write(), and when prio_write() finishes we pull buckets off the
- * free_inc list and optionally discard them.
+ * free_inc list.
*
* free_inc isn't the only freelist - if it was, we'd often to sleep while
* priorities and gens were being written before we could allocate. c->free is a
* smaller freelist, and buckets on that list are always ready to be used.
*
- * If we've got discards enabled, that happens when a bucket moves from the
- * free_inc list to the free list.
- *
* There is another freelist, because sometimes we have buckets that we know
* have nothing pointing into them - these we can reuse without waiting for
* priorities to be rewritten. These come from freed btree nodes and buckets
* that garbage collection discovered no longer had valid keys pointing into
* them (because they were overwritten). That's the unused list - buckets on the
- * unused list move to the free list, optionally being discarded in the process.
+ * unused list move to the free list.
*
* It's also important to ensure that gens don't wrap around - with respect to
* either the oldest gen in the btree or the gen on disk. This is quite
@@ -48,7 +46,7 @@
*
* bch_bucket_alloc() allocates a single bucket from a specific cache.
*
- * bch_bucket_alloc_set() allocates one or more buckets from different caches
+ * bch_bucket_alloc_set() allocates one bucket from different caches
* out of a cache set.
*
* free_some_buckets() drives all the processes described above. It's called
@@ -68,6 +66,8 @@
#include <linux/random.h>
#include <trace/events/bcache.h>
+#define MAX_OPEN_BUCKETS 128
+
/* Bucket heap / gen */
uint8_t bch_inc_gen(struct cache *ca, struct bucket *b)
@@ -84,8 +84,7 @@ void bch_rescale_priorities(struct cache_set *c, int sectors)
{
struct cache *ca;
struct bucket *b;
- unsigned next = c->nbuckets * c->sb.bucket_size / 1024;
- unsigned i;
+ unsigned long next = c->nbuckets * c->cache->sb.bucket_size / 1024;
int r;
atomic_sub(sectors, &c->rescale);
@@ -101,14 +100,14 @@ void bch_rescale_priorities(struct cache_set *c, int sectors)
c->min_prio = USHRT_MAX;
- for_each_cache(ca, c, i)
- for_each_bucket(b, ca)
- if (b->prio &&
- b->prio != BTREE_PRIO &&
- !atomic_read(&b->pin)) {
- b->prio--;
- c->min_prio = min(c->min_prio, b->prio);
- }
+ ca = c->cache;
+ for_each_bucket(b, ca)
+ if (b->prio &&
+ b->prio != BTREE_PRIO &&
+ !atomic_read(&b->pin)) {
+ b->prio--;
+ c->min_prio = min(c->min_prio, b->prio);
+ }
mutex_unlock(&c->bucket_lock);
}
@@ -116,8 +115,7 @@ void bch_rescale_priorities(struct cache_set *c, int sectors)
/*
* Background allocation thread: scans for buckets to be invalidated,
* invalidates them, rewrites prios/gens (marking them as invalidated on disk),
- * then optionally issues discard commands to the newly free buckets, then puts
- * them on the various freelists.
+ * then puts them on the various freelists.
*/
static inline bool can_inc_bucket_gen(struct bucket *b)
@@ -127,12 +125,9 @@ static inline bool can_inc_bucket_gen(struct bucket *b)
bool bch_can_invalidate_bucket(struct cache *ca, struct bucket *b)
{
- BUG_ON(!ca->set->gc_mark_valid);
-
- return (!GC_MARK(b) ||
- GC_MARK(b) == GC_MARK_RECLAIMABLE) &&
- !atomic_read(&b->pin) &&
- can_inc_bucket_gen(b);
+ return (ca->set->gc_mark_valid || b->reclaimable_in_gc) &&
+ ((!GC_MARK(b) || GC_MARK(b) == GC_MARK_RECLAIMABLE) &&
+ !atomic_read(&b->pin) && can_inc_bucket_gen(b));
}
void __bch_invalidate_one_bucket(struct cache *ca, struct bucket *b)
@@ -146,6 +141,7 @@ void __bch_invalidate_one_bucket(struct cache *ca, struct bucket *b)
bch_inc_gen(ca, b);
b->prio = INITIAL_PRIO;
atomic_inc(&b->pin);
+ b->reclaimable_in_gc = 0;
}
static void bch_invalidate_one_bucket(struct cache *ca, struct bucket *b)
@@ -166,7 +162,7 @@ static void bch_invalidate_one_bucket(struct cache *ca, struct bucket *b)
#define bucket_prio(b) \
({ \
- unsigned min_prio = (INITIAL_PRIO - ca->set->min_prio) / 8; \
+ unsigned int min_prio = (INITIAL_PRIO - ca->set->min_prio) / 8; \
\
(b->prio - ca->set->min_prio + min_prio) * GC_SECTORS_USED(b); \
})
@@ -241,6 +237,7 @@ static void invalidate_buckets_random(struct cache *ca)
while (!fifo_full(&ca->free_inc)) {
size_t n;
+
get_random_bytes(&n, sizeof(n));
n %= (size_t) (ca->sb.nbuckets - ca->sb.first_bucket);
@@ -284,8 +281,11 @@ do { \
break; \
\
mutex_unlock(&(ca)->set->bucket_lock); \
- if (kthread_should_stop()) \
- return 0; \
+ if (kthread_should_stop() || \
+ test_bit(CACHE_SET_IO_DISABLE, &ca->set->flags)) { \
+ set_current_state(TASK_RUNNING); \
+ goto out; \
+ } \
\
schedule(); \
mutex_lock(&(ca)->set->bucket_lock); \
@@ -295,7 +295,7 @@ do { \
static int bch_allocator_push(struct cache *ca, long bucket)
{
- unsigned i;
+ unsigned int i;
/* Prios/gens are actually the most important reserve */
if (fifo_push(&ca->free[RESERVE_PRIO], bucket))
@@ -317,21 +317,13 @@ static int bch_allocator_thread(void *arg)
while (1) {
/*
* First, we pull buckets off of the unused and free_inc lists,
- * possibly issue discards to them, then we add the bucket to
- * the free list:
+ * then we add the bucket to the free list:
*/
- while (!fifo_empty(&ca->free_inc)) {
+ while (1) {
long bucket;
- fifo_pop(&ca->free_inc, bucket);
-
- if (ca->discard) {
- mutex_unlock(&ca->set->bucket_lock);
- blkdev_issue_discard(ca->bdev,
- bucket_to_sector(ca->set, bucket),
- ca->sb.bucket_size, GFP_KERNEL, 0);
- mutex_lock(&ca->set->bucket_lock);
- }
+ if (!fifo_pop(&ca->free_inc, bucket))
+ break;
allocator_wait(ca, bch_allocator_push(ca, bucket));
wake_up(&ca->set->btree_cache_wait);
@@ -345,8 +337,7 @@ static int bch_allocator_thread(void *arg)
*/
retry_invalidate:
- allocator_wait(ca, ca->set->gc_mark_valid &&
- !ca->invalidate_needs_gc);
+ allocator_wait(ca, !ca->invalidate_needs_gc);
invalidate_buckets(ca);
/*
@@ -354,7 +345,7 @@ retry_invalidate:
* new stuff to them:
*/
allocator_wait(ca, !atomic_read(&ca->set->prio_blocked));
- if (CACHE_SYNC(&ca->set->sb)) {
+ if (CACHE_SYNC(&ca->sb)) {
/*
* This could deadlock if an allocation with a btree
* node locked ever blocked - having the btree node
@@ -369,19 +360,30 @@ retry_invalidate:
if (!fifo_full(&ca->free_inc))
goto retry_invalidate;
- bch_prio_write(ca);
+ if (bch_prio_write(ca, false) < 0) {
+ ca->invalidate_needs_gc = 1;
+ wake_up_gc(ca->set);
+ }
}
}
+out:
+ wait_for_kthread_stop();
+ return 0;
}
/* Allocation */
-long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait)
+long bch_bucket_alloc(struct cache *ca, unsigned int reserve, bool wait)
{
DEFINE_WAIT(w);
struct bucket *b;
long r;
+
+ /* No allocation if CACHE_SET_IO_DISABLE bit is set */
+ if (unlikely(test_bit(CACHE_SET_IO_DISABLE, &ca->set->flags)))
+ return -1;
+
/* fastpath */
if (fifo_pop(&ca->free[RESERVE_NONE], r) ||
fifo_pop(&ca->free[reserve], r))
@@ -397,21 +399,26 @@ long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait)
TASK_UNINTERRUPTIBLE);
mutex_unlock(&ca->set->bucket_lock);
+
+ atomic_inc(&ca->set->bucket_wait_cnt);
schedule();
+ atomic_dec(&ca->set->bucket_wait_cnt);
+
mutex_lock(&ca->set->bucket_lock);
} while (!fifo_pop(&ca->free[RESERVE_NONE], r) &&
!fifo_pop(&ca->free[reserve], r));
finish_wait(&ca->set->bucket_wait, &w);
out:
- wake_up_process(ca->alloc_thread);
+ if (ca->alloc_thread)
+ wake_up_process(ca->alloc_thread);
trace_bcache_alloc(ca, reserve);
if (expensive_debug_checks(ca->set)) {
size_t iter;
long i;
- unsigned j;
+ unsigned int j;
for (iter = 0; iter < prio_buckets(ca) * 2; iter++)
BUG_ON(ca->prio_buckets[iter] == (uint64_t) r);
@@ -439,6 +446,11 @@ out:
b->prio = INITIAL_PRIO;
}
+ if (ca->set->avail_nbuckets > 0) {
+ ca->set->avail_nbuckets--;
+ bch_update_bucket_in_use(ca->set, &ca->set->gc_stats);
+ }
+
return r;
}
@@ -446,56 +458,56 @@ void __bch_bucket_free(struct cache *ca, struct bucket *b)
{
SET_GC_MARK(b, 0);
SET_GC_SECTORS_USED(b, 0);
+
+ if (ca->set->avail_nbuckets < ca->set->nbuckets) {
+ ca->set->avail_nbuckets++;
+ bch_update_bucket_in_use(ca->set, &ca->set->gc_stats);
+ }
}
void bch_bucket_free(struct cache_set *c, struct bkey *k)
{
- unsigned i;
+ unsigned int i;
for (i = 0; i < KEY_PTRS(k); i++)
- __bch_bucket_free(PTR_CACHE(c, k, i),
- PTR_BUCKET(c, k, i));
+ __bch_bucket_free(c->cache, PTR_BUCKET(c, k, i));
}
-int __bch_bucket_alloc_set(struct cache_set *c, unsigned reserve,
- struct bkey *k, int n, bool wait)
+int __bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve,
+ struct bkey *k, bool wait)
{
- int i;
+ struct cache *ca;
+ long b;
+
+ /* No allocation if CACHE_SET_IO_DISABLE bit is set */
+ if (unlikely(test_bit(CACHE_SET_IO_DISABLE, &c->flags)))
+ return -1;
lockdep_assert_held(&c->bucket_lock);
- BUG_ON(!n || n > c->caches_loaded || n > 8);
bkey_init(k);
- /* sort by free space/prio of oldest data in caches */
-
- for (i = 0; i < n; i++) {
- struct cache *ca = c->cache_by_alloc[i];
- long b = bch_bucket_alloc(ca, reserve, wait);
-
- if (b == -1)
- goto err;
+ ca = c->cache;
+ b = bch_bucket_alloc(ca, reserve, wait);
+ if (b < 0)
+ return -1;
- k->ptr[i] = PTR(ca->buckets[b].gen,
- bucket_to_sector(c, b),
- ca->sb.nr_this_dev);
+ k->ptr[0] = MAKE_PTR(ca->buckets[b].gen,
+ bucket_to_sector(c, b),
+ ca->sb.nr_this_dev);
- SET_KEY_PTRS(k, i + 1);
- }
+ SET_KEY_PTRS(k, 1);
return 0;
-err:
- bch_bucket_free(c, k);
- bkey_put(c, k);
- return -1;
}
-int bch_bucket_alloc_set(struct cache_set *c, unsigned reserve,
- struct bkey *k, int n, bool wait)
+int bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve,
+ struct bkey *k, bool wait)
{
int ret;
+
mutex_lock(&c->bucket_lock);
- ret = __bch_bucket_alloc_set(c, reserve, k, n, wait);
+ ret = __bch_bucket_alloc_set(c, reserve, k, wait);
mutex_unlock(&c->bucket_lock);
return ret;
}
@@ -504,22 +516,28 @@ int bch_bucket_alloc_set(struct cache_set *c, unsigned reserve,
struct open_bucket {
struct list_head list;
- unsigned last_write_point;
- unsigned sectors_free;
+ unsigned int last_write_point;
+ unsigned int sectors_free;
BKEY_PADDED(key);
};
/*
* We keep multiple buckets open for writes, and try to segregate different
- * write streams for better cache utilization: first we look for a bucket where
- * the last write to it was sequential with the current write, and failing that
- * we look for a bucket that was last used by the same task.
+ * write streams for better cache utilization: first we try to segregate flash
+ * only volume write streams from cached devices, secondly we look for a bucket
+ * where the last write to it was sequential with the current write, and
+ * failing that we look for a bucket that was last used by the same task.
*
* The ideas is if you've got multiple tasks pulling data into the cache at the
* same time, you'll get better cache utilization if you try to segregate their
* data and preserve locality.
*
- * For example, say you've starting Firefox at the same time you're copying a
+ * For example, dirty sectors of flash only volume is not reclaimable, if their
+ * dirty sectors mixed with dirty sectors of cached device, such buckets will
+ * be marked as dirty and won't be reclaimed, though the dirty data of cached
+ * device have been written back to backend device.
+ *
+ * And say you've starting Firefox at the same time you're copying a
* bunch of files. Firefox will likely end up being fairly hot and stay in the
* cache awhile, but the data you copied might not be; if you wrote all that
* data to the same buckets it'd get invalidated at the same time.
@@ -530,13 +548,16 @@ struct open_bucket {
*/
static struct open_bucket *pick_data_bucket(struct cache_set *c,
const struct bkey *search,
- unsigned write_point,
+ unsigned int write_point,
struct bkey *alloc)
{
struct open_bucket *ret, *ret_task = NULL;
list_for_each_entry_reverse(ret, &c->data_buckets, list)
- if (!bkey_cmp(&ret->key, search))
+ if (UUID_FLASH_ONLY(&c->uuids[KEY_INODE(&ret->key)]) !=
+ UUID_FLASH_ONLY(&c->uuids[KEY_INODE(search)]))
+ continue;
+ else if (!bkey_cmp(&ret->key, search))
goto found;
else if (ret->last_write_point == write_point)
ret_task = ret;
@@ -545,7 +566,7 @@ static struct open_bucket *pick_data_bucket(struct cache_set *c,
struct open_bucket, list);
found:
if (!ret->sectors_free && KEY_PTRS(alloc)) {
- ret->sectors_free = c->sb.bucket_size;
+ ret->sectors_free = c->cache->sb.bucket_size;
bkey_copy(&ret->key, alloc);
bkey_init(alloc);
}
@@ -566,12 +587,16 @@ found:
*
* If s->writeback is true, will not fail.
*/
-bool bch_alloc_sectors(struct cache_set *c, struct bkey *k, unsigned sectors,
- unsigned write_point, unsigned write_prio, bool wait)
+bool bch_alloc_sectors(struct cache_set *c,
+ struct bkey *k,
+ unsigned int sectors,
+ unsigned int write_point,
+ unsigned int write_prio,
+ bool wait)
{
struct open_bucket *b;
BKEY_PADDED(key) alloc;
- unsigned i;
+ unsigned int i;
/*
* We might have to allocate a new bucket, which we can't do with a
@@ -584,13 +609,13 @@ bool bch_alloc_sectors(struct cache_set *c, struct bkey *k, unsigned sectors,
spin_lock(&c->data_bucket_lock);
while (!(b = pick_data_bucket(c, k, write_point, &alloc.key))) {
- unsigned watermark = write_prio
+ unsigned int watermark = write_prio
? RESERVE_MOVINGGC
: RESERVE_NONE;
spin_unlock(&c->data_bucket_lock);
- if (bch_bucket_alloc_set(c, watermark, &alloc.key, 1, wait))
+ if (bch_bucket_alloc_set(c, watermark, &alloc.key, wait))
return false;
spin_lock(&c->data_bucket_lock);
@@ -598,7 +623,7 @@ bool bch_alloc_sectors(struct cache_set *c, struct bkey *k, unsigned sectors,
/*
* If we had to allocate, we might race and not need to allocate the
- * second time we call find_data_bucket(). If we allocated a bucket but
+ * second time we call pick_data_bucket(). If we allocated a bucket but
* didn't use it, drop the refcount bch_bucket_alloc_set() took:
*/
if (KEY_PTRS(&alloc.key))
@@ -632,10 +657,10 @@ bool bch_alloc_sectors(struct cache_set *c, struct bkey *k, unsigned sectors,
SET_PTR_OFFSET(&b->key, i, PTR_OFFSET(&b->key, i) + sectors);
atomic_long_add(sectors,
- &PTR_CACHE(c, &b->key, i)->sectors_written);
+ &c->cache->sectors_written);
}
- if (b->sectors_free < c->sb.block_size)
+ if (b->sectors_free < c->cache->sb.block_size)
b->sectors_free = 0;
/*
@@ -671,8 +696,9 @@ int bch_open_buckets_alloc(struct cache_set *c)
spin_lock_init(&c->data_bucket_lock);
- for (i = 0; i < 6; i++) {
+ for (i = 0; i < MAX_OPEN_BUCKETS; i++) {
struct open_bucket *b = kzalloc(sizeof(*b), GFP_KERNEL);
+
if (!b)
return -ENOMEM;