summaryrefslogtreecommitdiff
path: root/drivers/md/dm-cache-policy-smq.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/dm-cache-policy-smq.c')
-rw-r--r--drivers/md/dm-cache-policy-smq.c821
1 files changed, 461 insertions, 360 deletions
diff --git a/drivers/md/dm-cache-policy-smq.c b/drivers/md/dm-cache-policy-smq.c
index f19c6930a67c..e0c40aec5e96 100644
--- a/drivers/md/dm-cache-policy-smq.c
+++ b/drivers/md/dm-cache-policy-smq.c
@@ -4,8 +4,9 @@
* This file is released under the GPL.
*/
-#include "dm-cache-policy.h"
+#include "dm-cache-background-tracker.h"
#include "dm-cache-policy-internal.h"
+#include "dm-cache-policy.h"
#include "dm.h"
#include <linux/hash.h>
@@ -38,10 +39,11 @@ struct entry {
unsigned hash_next:28;
unsigned prev:28;
unsigned next:28;
- unsigned level:7;
+ unsigned level:6;
bool dirty:1;
bool allocated:1;
bool sentinel:1;
+ bool pending_work:1;
dm_oblock_t oblock;
};
@@ -279,14 +281,28 @@ static unsigned q_size(struct queue *q)
*/
static void q_push(struct queue *q, struct entry *e)
{
+ BUG_ON(e->pending_work);
+
if (!e->sentinel)
q->nr_elts++;
l_add_tail(q->es, q->qs + e->level, e);
}
+static void q_push_front(struct queue *q, struct entry *e)
+{
+ BUG_ON(e->pending_work);
+
+ if (!e->sentinel)
+ q->nr_elts++;
+
+ l_add_head(q->es, q->qs + e->level, e);
+}
+
static void q_push_before(struct queue *q, struct entry *old, struct entry *e)
{
+ BUG_ON(e->pending_work);
+
if (!e->sentinel)
q->nr_elts++;
@@ -336,19 +352,6 @@ static struct entry *q_pop(struct queue *q)
}
/*
- * Pops an entry from a level that is not past a sentinel.
- */
-static struct entry *q_pop_old(struct queue *q, unsigned max_level)
-{
- struct entry *e = q_peek(q, max_level, false);
-
- if (e)
- q_del(q, e);
-
- return e;
-}
-
-/*
* This function assumes there is a non-sentinel entry to pop. It's only
* used by redistribute, so we know this is true. It also doesn't adjust
* the q->nr_elts count.
@@ -446,45 +449,49 @@ static void q_redistribute(struct queue *q)
break;
e->level = level + 1u;
- l_add_head(q->es, l_above, e);
+ l_add_tail(q->es, l_above, e);
}
}
}
-static void q_requeue_before(struct queue *q, struct entry *dest, struct entry *e, unsigned extra_levels)
+static void q_requeue(struct queue *q, struct entry *e, unsigned extra_levels,
+ struct entry *s1, struct entry *s2)
{
struct entry *de;
- unsigned new_level;
-
- q_del(q, e);
+ unsigned sentinels_passed = 0;
+ unsigned new_level = min(q->nr_levels - 1u, e->level + extra_levels);
+ /* try and find an entry to swap with */
if (extra_levels && (e->level < q->nr_levels - 1u)) {
- new_level = min(q->nr_levels - 1u, e->level + extra_levels);
- for (de = l_head(q->es, q->qs + new_level); de; de = l_next(q->es, de)) {
- if (de->sentinel)
- continue;
+ for (de = l_head(q->es, q->qs + new_level); de && de->sentinel; de = l_next(q->es, de))
+ sentinels_passed++;
+ if (de) {
q_del(q, de);
de->level = e->level;
+ if (s1) {
+ switch (sentinels_passed) {
+ case 0:
+ q_push_before(q, s1, de);
+ break;
+
+ case 1:
+ q_push_before(q, s2, de);
+ break;
- if (dest)
- q_push_before(q, dest, de);
- else
+ default:
+ q_push(q, de);
+ }
+ } else
q_push(q, de);
- break;
}
-
- e->level = new_level;
}
+ q_del(q, e);
+ e->level = new_level;
q_push(q, e);
}
-static void q_requeue(struct queue *q, struct entry *e, unsigned extra_levels)
-{
- q_requeue_before(q, NULL, e, extra_levels);
-}
-
/*----------------------------------------------------------------*/
#define FP_SHIFT 8
@@ -550,7 +557,7 @@ static enum performance stats_assess(struct stats *s)
/*----------------------------------------------------------------*/
-struct hash_table {
+struct smq_hash_table {
struct entry_space *es;
unsigned long long hash_bits;
unsigned *buckets;
@@ -560,7 +567,7 @@ struct hash_table {
* All cache entries are stored in a chained hash table. To save space we
* use indexing again, and only store indexes to the next entry.
*/
-static int h_init(struct hash_table *ht, struct entry_space *es, unsigned nr_entries)
+static int h_init(struct smq_hash_table *ht, struct entry_space *es, unsigned nr_entries)
{
unsigned i, nr_buckets;
@@ -578,34 +585,34 @@ static int h_init(struct hash_table *ht, struct entry_space *es, unsigned nr_ent
return 0;
}
-static void h_exit(struct hash_table *ht)
+static void h_exit(struct smq_hash_table *ht)
{
vfree(ht->buckets);
}
-static struct entry *h_head(struct hash_table *ht, unsigned bucket)
+static struct entry *h_head(struct smq_hash_table *ht, unsigned bucket)
{
return to_entry(ht->es, ht->buckets[bucket]);
}
-static struct entry *h_next(struct hash_table *ht, struct entry *e)
+static struct entry *h_next(struct smq_hash_table *ht, struct entry *e)
{
return to_entry(ht->es, e->hash_next);
}
-static void __h_insert(struct hash_table *ht, unsigned bucket, struct entry *e)
+static void __h_insert(struct smq_hash_table *ht, unsigned bucket, struct entry *e)
{
e->hash_next = ht->buckets[bucket];
ht->buckets[bucket] = to_index(ht->es, e);
}
-static void h_insert(struct hash_table *ht, struct entry *e)
+static void h_insert(struct smq_hash_table *ht, struct entry *e)
{
unsigned h = hash_64(from_oblock(e->oblock), ht->hash_bits);
__h_insert(ht, h, e);
}
-static struct entry *__h_lookup(struct hash_table *ht, unsigned h, dm_oblock_t oblock,
+static struct entry *__h_lookup(struct smq_hash_table *ht, unsigned h, dm_oblock_t oblock,
struct entry **prev)
{
struct entry *e;
@@ -621,7 +628,7 @@ static struct entry *__h_lookup(struct hash_table *ht, unsigned h, dm_oblock_t o
return NULL;
}
-static void __h_unlink(struct hash_table *ht, unsigned h,
+static void __h_unlink(struct smq_hash_table *ht, unsigned h,
struct entry *e, struct entry *prev)
{
if (prev)
@@ -633,7 +640,7 @@ static void __h_unlink(struct hash_table *ht, unsigned h,
/*
* Also moves each entry to the front of the bucket.
*/
-static struct entry *h_lookup(struct hash_table *ht, dm_oblock_t oblock)
+static struct entry *h_lookup(struct smq_hash_table *ht, dm_oblock_t oblock)
{
struct entry *e, *prev;
unsigned h = hash_64(from_oblock(oblock), ht->hash_bits);
@@ -651,7 +658,7 @@ static struct entry *h_lookup(struct hash_table *ht, dm_oblock_t oblock)
return e;
}
-static void h_remove(struct hash_table *ht, struct entry *e)
+static void h_remove(struct smq_hash_table *ht, struct entry *e)
{
unsigned h = hash_64(from_oblock(e->oblock), ht->hash_bits);
struct entry *prev;
@@ -699,7 +706,10 @@ static void init_entry(struct entry *e)
e->next = INDEXER_NULL;
e->prev = INDEXER_NULL;
e->level = 0u;
+ e->dirty = true; /* FIXME: audit */
e->allocated = true;
+ e->sentinel = false;
+ e->pending_work = false;
}
static struct entry *alloc_entry(struct entry_alloc *ea)
@@ -762,11 +772,11 @@ static struct entry *get_entry(struct entry_alloc *ea, unsigned index)
#define NR_HOTSPOT_LEVELS 64u
#define NR_CACHE_LEVELS 64u
-#define WRITEBACK_PERIOD (10 * HZ)
-#define DEMOTE_PERIOD (60 * HZ)
+#define WRITEBACK_PERIOD (10ul * HZ)
+#define DEMOTE_PERIOD (60ul * HZ)
#define HOTSPOT_UPDATE_PERIOD (HZ)
-#define CACHE_UPDATE_PERIOD (10u * HZ)
+#define CACHE_UPDATE_PERIOD (60ul * HZ)
struct smq_policy {
struct dm_cache_policy policy;
@@ -814,8 +824,8 @@ struct smq_policy {
* The hash tables allows us to quickly find an entry by origin
* block.
*/
- struct hash_table table;
- struct hash_table hotspot_table;
+ struct smq_hash_table table;
+ struct smq_hash_table hotspot_table;
bool current_writeback_sentinels;
unsigned long next_writeback_period;
@@ -828,6 +838,10 @@ struct smq_policy {
unsigned long next_hotspot_period;
unsigned long next_cache_period;
+
+ struct background_tracker *bg_work;
+
+ bool migrations_allowed;
};
/*----------------------------------------------------------------*/
@@ -876,15 +890,15 @@ static void __update_demote_sentinels(struct smq_policy *mq)
static void update_sentinels(struct smq_policy *mq)
{
if (time_after(jiffies, mq->next_writeback_period)) {
- __update_writeback_sentinels(mq);
mq->next_writeback_period = jiffies + WRITEBACK_PERIOD;
mq->current_writeback_sentinels = !mq->current_writeback_sentinels;
+ __update_writeback_sentinels(mq);
}
if (time_after(jiffies, mq->next_demote_period)) {
- __update_demote_sentinels(mq);
mq->next_demote_period = jiffies + DEMOTE_PERIOD;
mq->current_demote_sentinels = !mq->current_demote_sentinels;
+ __update_demote_sentinels(mq);
}
}
@@ -920,55 +934,40 @@ static void sentinels_init(struct smq_policy *mq)
/*----------------------------------------------------------------*/
-/*
- * These methods tie together the dirty queue, clean queue and hash table.
- */
-static void push_new(struct smq_policy *mq, struct entry *e)
+static void del_queue(struct smq_policy *mq, struct entry *e)
{
- struct queue *q = e->dirty ? &mq->dirty : &mq->clean;
- h_insert(&mq->table, e);
- q_push(q, e);
+ q_del(e->dirty ? &mq->dirty : &mq->clean, e);
}
-static void push(struct smq_policy *mq, struct entry *e)
+static void push_queue(struct smq_policy *mq, struct entry *e)
{
- struct entry *sentinel;
-
- h_insert(&mq->table, e);
-
- /*
- * Punch this into the queue just in front of the sentinel, to
- * ensure it's cleaned straight away.
- */
- if (e->dirty) {
- sentinel = writeback_sentinel(mq, e->level);
- q_push_before(&mq->dirty, sentinel, e);
- } else {
- sentinel = demote_sentinel(mq, e->level);
- q_push_before(&mq->clean, sentinel, e);
- }
+ if (e->dirty)
+ q_push(&mq->dirty, e);
+ else
+ q_push(&mq->clean, e);
}
-/*
- * Removes an entry from cache. Removes from the hash table.
- */
-static void __del(struct smq_policy *mq, struct queue *q, struct entry *e)
+// !h, !q, a -> h, q, a
+static void push(struct smq_policy *mq, struct entry *e)
{
- q_del(q, e);
- h_remove(&mq->table, e);
+ h_insert(&mq->table, e);
+ if (!e->pending_work)
+ push_queue(mq, e);
}
-static void del(struct smq_policy *mq, struct entry *e)
+static void push_queue_front(struct smq_policy *mq, struct entry *e)
{
- __del(mq, e->dirty ? &mq->dirty : &mq->clean, e);
+ if (e->dirty)
+ q_push_front(&mq->dirty, e);
+ else
+ q_push_front(&mq->clean, e);
}
-static struct entry *pop_old(struct smq_policy *mq, struct queue *q, unsigned max_level)
+static void push_front(struct smq_policy *mq, struct entry *e)
{
- struct entry *e = q_pop_old(q, max_level);
- if (e)
- h_remove(&mq->table, e);
- return e;
+ h_insert(&mq->table, e);
+ if (!e->pending_work)
+ push_queue_front(mq, e);
}
static dm_cblock_t infer_cblock(struct smq_policy *mq, struct entry *e)
@@ -978,16 +977,21 @@ static dm_cblock_t infer_cblock(struct smq_policy *mq, struct entry *e)
static void requeue(struct smq_policy *mq, struct entry *e)
{
- struct entry *sentinel;
+ /*
+ * Pending work has temporarily been taken out of the queues.
+ */
+ if (e->pending_work)
+ return;
if (!test_and_set_bit(from_cblock(infer_cblock(mq, e)), mq->cache_hit_bits)) {
- if (e->dirty) {
- sentinel = writeback_sentinel(mq, e->level);
- q_requeue_before(&mq->dirty, sentinel, e, 1u);
- } else {
- sentinel = demote_sentinel(mq, e->level);
- q_requeue_before(&mq->clean, sentinel, e, 1u);
+ if (!e->dirty) {
+ q_requeue(&mq->clean, e, 1u, NULL, NULL);
+ return;
}
+
+ q_requeue(&mq->dirty, e, 1u,
+ get_sentinel(&mq->writeback_sentinel_alloc, e->level, !mq->current_writeback_sentinels),
+ get_sentinel(&mq->writeback_sentinel_alloc, e->level, mq->current_writeback_sentinels));
}
}
@@ -1026,6 +1030,8 @@ static void update_promote_levels(struct smq_policy *mq)
unsigned threshold_level = allocator_empty(&mq->cache_alloc) ?
default_promote_level(mq) : (NR_HOTSPOT_LEVELS / 2u);
+ threshold_level = max(threshold_level, NR_HOTSPOT_LEVELS);
+
/*
* If the hotspot queue is performing badly then we have little
* confidence that we know which blocks to promote. So we cut down
@@ -1045,7 +1051,7 @@ static void update_promote_levels(struct smq_policy *mq)
}
mq->read_promote_level = NR_HOTSPOT_LEVELS - threshold_level;
- mq->write_promote_level = (NR_HOTSPOT_LEVELS - threshold_level) + 2u;
+ mq->write_promote_level = (NR_HOTSPOT_LEVELS - threshold_level);
}
/*
@@ -1095,34 +1101,142 @@ static void end_cache_period(struct smq_policy *mq)
}
}
-static int demote_cblock(struct smq_policy *mq,
- struct policy_locker *locker,
- dm_oblock_t *oblock)
+/*----------------------------------------------------------------*/
+
+/*
+ * Targets are given as a percentage.
+ */
+#define CLEAN_TARGET 25u
+#define FREE_TARGET 25u
+
+static unsigned percent_to_target(struct smq_policy *mq, unsigned p)
{
- struct entry *demoted = q_peek(&mq->clean, mq->clean.nr_levels, false);
- if (!demoted)
- /*
- * We could get a block from mq->dirty, but that
- * would add extra latency to the triggering bio as it
- * waits for the writeback. Better to not promote this
- * time and hope there's a clean block next time this block
- * is hit.
- */
- return -ENOSPC;
+ return from_cblock(mq->cache_size) * p / 100u;
+}
+
+static bool clean_target_met(struct smq_policy *mq, bool idle)
+{
+ /*
+ * Cache entries may not be populated. So we cannot rely on the
+ * size of the clean queue.
+ */
+ unsigned nr_clean = from_cblock(mq->cache_size) - q_size(&mq->dirty);
- if (locker->fn(locker, demoted->oblock))
+ if (idle)
/*
- * We couldn't lock this block.
+ * We'd like to clean everything.
*/
- return -EBUSY;
+ return q_size(&mq->dirty) == 0u;
+ else
+ return (nr_clean + btracker_nr_writebacks_queued(mq->bg_work)) >=
+ percent_to_target(mq, CLEAN_TARGET);
+}
- del(mq, demoted);
- *oblock = demoted->oblock;
- free_entry(&mq->cache_alloc, demoted);
+static bool free_target_met(struct smq_policy *mq, bool idle)
+{
+ unsigned nr_free = from_cblock(mq->cache_size) -
+ mq->cache_alloc.nr_allocated;
- return 0;
+ if (idle)
+ return (nr_free + btracker_nr_demotions_queued(mq->bg_work)) >=
+ percent_to_target(mq, FREE_TARGET);
+ else
+ return true;
}
+/*----------------------------------------------------------------*/
+
+static void mark_pending(struct smq_policy *mq, struct entry *e)
+{
+ BUG_ON(e->sentinel);
+ BUG_ON(!e->allocated);
+ BUG_ON(e->pending_work);
+ e->pending_work = true;
+}
+
+static void clear_pending(struct smq_policy *mq, struct entry *e)
+{
+ BUG_ON(!e->pending_work);
+ e->pending_work = false;
+}
+
+static void queue_writeback(struct smq_policy *mq)
+{
+ int r;
+ struct policy_work work;
+ struct entry *e;
+
+ e = q_peek(&mq->dirty, mq->dirty.nr_levels, !mq->migrations_allowed);
+ if (e) {
+ mark_pending(mq, e);
+ q_del(&mq->dirty, e);
+
+ work.op = POLICY_WRITEBACK;
+ work.oblock = e->oblock;
+ work.cblock = infer_cblock(mq, e);
+
+ r = btracker_queue(mq->bg_work, &work, NULL);
+ WARN_ON_ONCE(r); // FIXME: finish, I think we have to get rid of this race.
+ }
+}
+
+static void queue_demotion(struct smq_policy *mq)
+{
+ struct policy_work work;
+ struct entry *e;
+
+ if (unlikely(WARN_ON_ONCE(!mq->migrations_allowed)))
+ return;
+
+ e = q_peek(&mq->clean, mq->clean.nr_levels, true);
+ if (!e) {
+ if (!clean_target_met(mq, false))
+ queue_writeback(mq);
+ return;
+ }
+
+ mark_pending(mq, e);
+ q_del(&mq->clean, e);
+
+ work.op = POLICY_DEMOTE;
+ work.oblock = e->oblock;
+ work.cblock = infer_cblock(mq, e);
+ btracker_queue(mq->bg_work, &work, NULL);
+}
+
+static void queue_promotion(struct smq_policy *mq, dm_oblock_t oblock,
+ struct policy_work **workp)
+{
+ struct entry *e;
+ struct policy_work work;
+
+ if (!mq->migrations_allowed)
+ return;
+
+ if (allocator_empty(&mq->cache_alloc)) {
+ if (!free_target_met(mq, false))
+ queue_demotion(mq);
+ return;
+ }
+
+ if (btracker_promotion_already_present(mq->bg_work, oblock))
+ return;
+
+ /*
+ * We allocate the entry now to reserve the cblock. If the
+ * background work is aborted we must remember to free it.
+ */
+ e = alloc_entry(&mq->cache_alloc);
+ BUG_ON(!e);
+ e->pending_work = true;
+ work.op = POLICY_PROMOTE;
+ work.oblock = oblock;
+ work.cblock = infer_cblock(mq, e);
+ btracker_queue(mq->bg_work, &work, workp);
+}
+
+/*----------------------------------------------------------------*/
+
enum promote_result {
PROMOTE_NOT,
PROMOTE_TEMPORARY,
@@ -1137,49 +1251,18 @@ static enum promote_result maybe_promote(bool promote)
return promote ? PROMOTE_PERMANENT : PROMOTE_NOT;
}
-static enum promote_result should_promote(struct smq_policy *mq, struct entry *hs_e, struct bio *bio,
- bool fast_promote)
+static enum promote_result should_promote(struct smq_policy *mq, struct entry *hs_e,
+ int data_dir, bool fast_promote)
{
- if (bio_data_dir(bio) == WRITE) {
+ if (data_dir == WRITE) {
if (!allocator_empty(&mq->cache_alloc) && fast_promote)
return PROMOTE_TEMPORARY;
- else
- return maybe_promote(hs_e->level >= mq->write_promote_level);
+ return maybe_promote(hs_e->level >= mq->write_promote_level);
} else
return maybe_promote(hs_e->level >= mq->read_promote_level);
}
-static void insert_in_cache(struct smq_policy *mq, dm_oblock_t oblock,
- struct policy_locker *locker,
- struct policy_result *result, enum promote_result pr)
-{
- int r;
- struct entry *e;
-
- if (allocator_empty(&mq->cache_alloc)) {
- result->op = POLICY_REPLACE;
- r = demote_cblock(mq, locker, &result->old_oblock);
- if (r) {
- result->op = POLICY_MISS;
- return;
- }
-
- } else
- result->op = POLICY_NEW;
-
- e = alloc_entry(&mq->cache_alloc);
- BUG_ON(!e);
- e->oblock = oblock;
-
- if (pr == PROMOTE_TEMPORARY)
- push(mq, e);
- else
- push_new(mq, e);
-
- result->cblock = infer_cblock(mq, e);
-}
-
static dm_oblock_t to_hblock(struct smq_policy *mq, dm_oblock_t b)
{
sector_t r = from_oblock(b);
@@ -1187,7 +1270,7 @@ static dm_oblock_t to_hblock(struct smq_policy *mq, dm_oblock_t b)
return to_oblock(r);
}
-static struct entry *update_hotspot_queue(struct smq_policy *mq, dm_oblock_t b, struct bio *bio)
+static struct entry *update_hotspot_queue(struct smq_policy *mq, dm_oblock_t b)
{
unsigned hi;
dm_oblock_t hb = to_hblock(mq, b);
@@ -1199,7 +1282,8 @@ static struct entry *update_hotspot_queue(struct smq_policy *mq, dm_oblock_t b,
hi = get_index(&mq->hotspot_alloc, e);
q_requeue(&mq->hotspot, e,
test_and_set_bit(hi, mq->hotspot_hit_bits) ?
- 0u : mq->hotspot_level_jump);
+ 0u : mq->hotspot_level_jump,
+ NULL, NULL);
} else {
stats_miss(&mq->hotspot_stats);
@@ -1225,47 +1309,6 @@ static struct entry *update_hotspot_queue(struct smq_policy *mq, dm_oblock_t b,
return e;
}
-/*
- * Looks the oblock up in the hash table, then decides whether to put in
- * pre_cache, or cache etc.
- */
-static int map(struct smq_policy *mq, struct bio *bio, dm_oblock_t oblock,
- bool can_migrate, bool fast_promote,
- struct policy_locker *locker, struct policy_result *result)
-{
- struct entry *e, *hs_e;
- enum promote_result pr;
-
- hs_e = update_hotspot_queue(mq, oblock, bio);
-
- e = h_lookup(&mq->table, oblock);
- if (e) {
- stats_level_accessed(&mq->cache_stats, e->level);
-
- requeue(mq, e);
- result->op = POLICY_HIT;
- result->cblock = infer_cblock(mq, e);
-
- } else {
- stats_miss(&mq->cache_stats);
-
- pr = should_promote(mq, hs_e, bio, fast_promote);
- if (pr == PROMOTE_NOT)
- result->op = POLICY_MISS;
-
- else {
- if (!can_migrate) {
- result->op = POLICY_MISS;
- return -EWOULDBLOCK;
- }
-
- insert_in_cache(mq, oblock, locker, result, pr);
- }
- }
-
- return 0;
-}
-
/*----------------------------------------------------------------*/
/*
@@ -1282,6 +1325,7 @@ static void smq_destroy(struct dm_cache_policy *p)
{
struct smq_policy *mq = to_smq_policy(p);
+ btracker_destroy(mq->bg_work);
h_exit(&mq->hotspot_table);
h_exit(&mq->table);
free_bitset(mq->hotspot_hit_bits);
@@ -1290,234 +1334,247 @@ static void smq_destroy(struct dm_cache_policy *p)
kfree(mq);
}
-static int smq_map(struct dm_cache_policy *p, dm_oblock_t oblock,
- bool can_block, bool can_migrate, bool fast_promote,
- struct bio *bio, struct policy_locker *locker,
- struct policy_result *result)
-{
- int r;
- unsigned long flags;
- struct smq_policy *mq = to_smq_policy(p);
-
- result->op = POLICY_MISS;
-
- spin_lock_irqsave(&mq->lock, flags);
- r = map(mq, bio, oblock, can_migrate, fast_promote, locker, result);
- spin_unlock_irqrestore(&mq->lock, flags);
-
- return r;
-}
+/*----------------------------------------------------------------*/
-static int smq_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock)
+static int __lookup(struct smq_policy *mq, dm_oblock_t oblock, dm_cblock_t *cblock,
+ int data_dir, bool fast_copy,
+ struct policy_work **work, bool *background_work)
{
- int r;
- unsigned long flags;
- struct smq_policy *mq = to_smq_policy(p);
- struct entry *e;
+ struct entry *e, *hs_e;
+ enum promote_result pr;
+
+ *background_work = false;
- spin_lock_irqsave(&mq->lock, flags);
e = h_lookup(&mq->table, oblock);
if (e) {
+ stats_level_accessed(&mq->cache_stats, e->level);
+
+ requeue(mq, e);
*cblock = infer_cblock(mq, e);
- r = 0;
- } else
- r = -ENOENT;
- spin_unlock_irqrestore(&mq->lock, flags);
+ return 0;
- return r;
-}
+ } else {
+ stats_miss(&mq->cache_stats);
-static void __smq_set_clear_dirty(struct smq_policy *mq, dm_oblock_t oblock, bool set)
-{
- struct entry *e;
+ /*
+ * The hotspot queue only gets updated with misses.
+ */
+ hs_e = update_hotspot_queue(mq, oblock);
- e = h_lookup(&mq->table, oblock);
- BUG_ON(!e);
+ pr = should_promote(mq, hs_e, data_dir, fast_copy);
+ if (pr != PROMOTE_NOT) {
+ queue_promotion(mq, oblock, work);
+ *background_work = true;
+ }
- del(mq, e);
- e->dirty = set;
- push(mq, e);
+ return -ENOENT;
+ }
}
-static void smq_set_dirty(struct dm_cache_policy *p, dm_oblock_t oblock)
+static int smq_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock,
+ int data_dir, bool fast_copy,
+ bool *background_work)
{
+ int r;
unsigned long flags;
struct smq_policy *mq = to_smq_policy(p);
spin_lock_irqsave(&mq->lock, flags);
- __smq_set_clear_dirty(mq, oblock, true);
+ r = __lookup(mq, oblock, cblock,
+ data_dir, fast_copy,
+ NULL, background_work);
spin_unlock_irqrestore(&mq->lock, flags);
+
+ return r;
}
-static void smq_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock)
+static int smq_lookup_with_work(struct dm_cache_policy *p,
+ dm_oblock_t oblock, dm_cblock_t *cblock,
+ int data_dir, bool fast_copy,
+ struct policy_work **work)
{
- struct smq_policy *mq = to_smq_policy(p);
+ int r;
+ bool background_queued;
unsigned long flags;
+ struct smq_policy *mq = to_smq_policy(p);
spin_lock_irqsave(&mq->lock, flags);
- __smq_set_clear_dirty(mq, oblock, false);
+ r = __lookup(mq, oblock, cblock, data_dir, fast_copy, work, &background_queued);
spin_unlock_irqrestore(&mq->lock, flags);
-}
-static unsigned random_level(dm_cblock_t cblock)
-{
- return hash_32(from_cblock(cblock), 9) & (NR_CACHE_LEVELS - 1);
+ return r;
}
-static int smq_load_mapping(struct dm_cache_policy *p,
- dm_oblock_t oblock, dm_cblock_t cblock,
- uint32_t hint, bool hint_valid)
+static int smq_get_background_work(struct dm_cache_policy *p, bool idle,
+ struct policy_work **result)
{
+ int r;
+ unsigned long flags;
struct smq_policy *mq = to_smq_policy(p);
- struct entry *e;
- e = alloc_particular_entry(&mq->cache_alloc, from_cblock(cblock));
- e->oblock = oblock;
- e->dirty = false; /* this gets corrected in a minute */
- e->level = hint_valid ? min(hint, NR_CACHE_LEVELS - 1) : random_level(cblock);
- push(mq, e);
-
- return 0;
-}
+ spin_lock_irqsave(&mq->lock, flags);
+ r = btracker_issue(mq->bg_work, result);
+ if (r == -ENODATA) {
+ /* find some writeback work to do */
+ if (mq->migrations_allowed && !free_target_met(mq, idle))
+ queue_demotion(mq);
-static uint32_t smq_get_hint(struct dm_cache_policy *p, dm_cblock_t cblock)
-{
- struct smq_policy *mq = to_smq_policy(p);
- struct entry *e = get_entry(&mq->cache_alloc, from_cblock(cblock));
+ else if (!clean_target_met(mq, idle))
+ queue_writeback(mq);
- if (!e->allocated)
- return 0;
+ r = btracker_issue(mq->bg_work, result);
+ }
+ spin_unlock_irqrestore(&mq->lock, flags);
- return e->level;
+ return r;
}
-static void __remove_mapping(struct smq_policy *mq, dm_oblock_t oblock)
-{
- struct entry *e;
+/*
+ * We need to clear any pending work flags that have been set, and in the
+ * case of promotion free the entry for the destination cblock.
+ */
+static void __complete_background_work(struct smq_policy *mq,
+ struct policy_work *work,
+ bool success)
+{
+ struct entry *e = get_entry(&mq->cache_alloc,
+ from_cblock(work->cblock));
+
+ switch (work->op) {
+ case POLICY_PROMOTE:
+ // !h, !q, a
+ clear_pending(mq, e);
+ if (success) {
+ e->oblock = work->oblock;
+ push(mq, e);
+ // h, q, a
+ } else {
+ free_entry(&mq->cache_alloc, e);
+ // !h, !q, !a
+ }
+ break;
- e = h_lookup(&mq->table, oblock);
- BUG_ON(!e);
+ case POLICY_DEMOTE:
+ // h, !q, a
+ if (success) {
+ h_remove(&mq->table, e);
+ free_entry(&mq->cache_alloc, e);
+ // !h, !q, !a
+ } else {
+ clear_pending(mq, e);
+ push_queue(mq, e);
+ // h, q, a
+ }
+ break;
- del(mq, e);
- free_entry(&mq->cache_alloc, e);
+ case POLICY_WRITEBACK:
+ // h, !q, a
+ clear_pending(mq, e);
+ push_queue(mq, e);
+ // h, q, a
+ break;
+ }
+
+ btracker_complete(mq->bg_work, work);
}
-static void smq_remove_mapping(struct dm_cache_policy *p, dm_oblock_t oblock)
+static void smq_complete_background_work(struct dm_cache_policy *p,
+ struct policy_work *work,
+ bool success)
{
- struct smq_policy *mq = to_smq_policy(p);
unsigned long flags;
+ struct smq_policy *mq = to_smq_policy(p);
spin_lock_irqsave(&mq->lock, flags);
- __remove_mapping(mq, oblock);
+ __complete_background_work(mq, work, success);
spin_unlock_irqrestore(&mq->lock, flags);
}
-static int __remove_cblock(struct smq_policy *mq, dm_cblock_t cblock)
+// in_hash(oblock) -> in_hash(oblock)
+static void __smq_set_clear_dirty(struct smq_policy *mq, dm_cblock_t cblock, bool set)
{
struct entry *e = get_entry(&mq->cache_alloc, from_cblock(cblock));
- if (!e || !e->allocated)
- return -ENODATA;
-
- del(mq, e);
- free_entry(&mq->cache_alloc, e);
-
- return 0;
+ if (e->pending_work)
+ e->dirty = set;
+ else {
+ del_queue(mq, e);
+ e->dirty = set;
+ push_queue(mq, e);
+ }
}
-static int smq_remove_cblock(struct dm_cache_policy *p, dm_cblock_t cblock)
+static void smq_set_dirty(struct dm_cache_policy *p, dm_cblock_t cblock)
{
- int r;
unsigned long flags;
struct smq_policy *mq = to_smq_policy(p);
spin_lock_irqsave(&mq->lock, flags);
- r = __remove_cblock(mq, cblock);
+ __smq_set_clear_dirty(mq, cblock, true);
spin_unlock_irqrestore(&mq->lock, flags);
-
- return r;
}
-
-#define CLEAN_TARGET_CRITICAL 5u /* percent */
-
-static bool clean_target_met(struct smq_policy *mq, bool critical)
+static void smq_clear_dirty(struct dm_cache_policy *p, dm_cblock_t cblock)
{
- if (critical) {
- /*
- * Cache entries may not be populated. So we're cannot rely on the
- * size of the clean queue.
- */
- unsigned nr_clean = from_cblock(mq->cache_size) - q_size(&mq->dirty);
- unsigned target = from_cblock(mq->cache_size) * CLEAN_TARGET_CRITICAL / 100u;
+ struct smq_policy *mq = to_smq_policy(p);
+ unsigned long flags;
- return nr_clean >= target;
- } else
- return !q_size(&mq->dirty);
+ spin_lock_irqsave(&mq->lock, flags);
+ __smq_set_clear_dirty(mq, cblock, false);
+ spin_unlock_irqrestore(&mq->lock, flags);
}
-static int __smq_writeback_work(struct smq_policy *mq, dm_oblock_t *oblock,
- dm_cblock_t *cblock, bool critical_only)
+static unsigned random_level(dm_cblock_t cblock)
{
- struct entry *e = NULL;
- bool target_met = clean_target_met(mq, critical_only);
-
- if (critical_only)
- /*
- * Always try and keep the bottom level clean.
- */
- e = pop_old(mq, &mq->dirty, target_met ? 1u : mq->dirty.nr_levels);
+ return hash_32(from_cblock(cblock), 9) & (NR_CACHE_LEVELS - 1);
+}
- else
- e = pop_old(mq, &mq->dirty, mq->dirty.nr_levels);
+static int smq_load_mapping(struct dm_cache_policy *p,
+ dm_oblock_t oblock, dm_cblock_t cblock,
+ bool dirty, uint32_t hint, bool hint_valid)
+{
+ struct smq_policy *mq = to_smq_policy(p);
+ struct entry *e;
- if (!e)
- return -ENODATA;
+ e = alloc_particular_entry(&mq->cache_alloc, from_cblock(cblock));
+ e->oblock = oblock;
+ e->dirty = dirty;
+ e->level = hint_valid ? min(hint, NR_CACHE_LEVELS - 1) : random_level(cblock);
+ e->pending_work = false;
- *oblock = e->oblock;
- *cblock = infer_cblock(mq, e);
- e->dirty = false;
- push_new(mq, e);
+ /*
+ * When we load mappings we push ahead of both sentinels in order to
+ * allow demotions and cleaning to occur immediately.
+ */
+ push_front(mq, e);
return 0;
}
-static int smq_writeback_work(struct dm_cache_policy *p, dm_oblock_t *oblock,
- dm_cblock_t *cblock, bool critical_only)
+static int smq_invalidate_mapping(struct dm_cache_policy *p, dm_cblock_t cblock)
{
- int r;
- unsigned long flags;
struct smq_policy *mq = to_smq_policy(p);
+ struct entry *e = get_entry(&mq->cache_alloc, from_cblock(cblock));
- spin_lock_irqsave(&mq->lock, flags);
- r = __smq_writeback_work(mq, oblock, cblock, critical_only);
- spin_unlock_irqrestore(&mq->lock, flags);
-
- return r;
-}
-
-static void __force_mapping(struct smq_policy *mq,
- dm_oblock_t current_oblock, dm_oblock_t new_oblock)
-{
- struct entry *e = h_lookup(&mq->table, current_oblock);
+ if (!e->allocated)
+ return -ENODATA;
- if (e) {
- del(mq, e);
- e->oblock = new_oblock;
- e->dirty = true;
- push(mq, e);
- }
+ // FIXME: what if this block has pending background work?
+ del_queue(mq, e);
+ h_remove(&mq->table, e);
+ free_entry(&mq->cache_alloc, e);
+ return 0;
}
-static void smq_force_mapping(struct dm_cache_policy *p,
- dm_oblock_t current_oblock, dm_oblock_t new_oblock)
+static uint32_t smq_get_hint(struct dm_cache_policy *p, dm_cblock_t cblock)
{
- unsigned long flags;
struct smq_policy *mq = to_smq_policy(p);
+ struct entry *e = get_entry(&mq->cache_alloc, from_cblock(cblock));
- spin_lock_irqsave(&mq->lock, flags);
- __force_mapping(mq, current_oblock, new_oblock);
- spin_unlock_irqrestore(&mq->lock, flags);
+ if (!e->allocated)
+ return 0;
+
+ return e->level;
}
static dm_cblock_t smq_residency(struct dm_cache_policy *p)
@@ -1546,6 +1603,12 @@ static void smq_tick(struct dm_cache_policy *p, bool can_block)
spin_unlock_irqrestore(&mq->lock, flags);
}
+static void smq_allow_migrations(struct dm_cache_policy *p, bool allow)
+{
+ struct smq_policy *mq = to_smq_policy(p);
+ mq->migrations_allowed = allow;
+}
+
/*
* smq has no config values, but the old mq policy did. To avoid breaking
* software we continue to accept these configurables for the mq policy,
@@ -1590,18 +1653,18 @@ static int mq_emit_config_values(struct dm_cache_policy *p, char *result,
static void init_policy_functions(struct smq_policy *mq, bool mimic_mq)
{
mq->policy.destroy = smq_destroy;
- mq->policy.map = smq_map;
mq->policy.lookup = smq_lookup;
+ mq->policy.lookup_with_work = smq_lookup_with_work;
+ mq->policy.get_background_work = smq_get_background_work;
+ mq->policy.complete_background_work = smq_complete_background_work;
mq->policy.set_dirty = smq_set_dirty;
mq->policy.clear_dirty = smq_clear_dirty;
mq->policy.load_mapping = smq_load_mapping;
+ mq->policy.invalidate_mapping = smq_invalidate_mapping;
mq->policy.get_hint = smq_get_hint;
- mq->policy.remove_mapping = smq_remove_mapping;
- mq->policy.remove_cblock = smq_remove_cblock;
- mq->policy.writeback_work = smq_writeback_work;
- mq->policy.force_mapping = smq_force_mapping;
mq->policy.residency = smq_residency;
mq->policy.tick = smq_tick;
+ mq->policy.allow_migrations = smq_allow_migrations;
if (mimic_mq) {
mq->policy.set_config_value = mq_set_config_value;
@@ -1633,7 +1696,8 @@ static void calc_hotspot_params(sector_t origin_size,
static struct dm_cache_policy *__smq_create(dm_cblock_t cache_size,
sector_t origin_size,
sector_t cache_block_size,
- bool mimic_mq)
+ bool mimic_mq,
+ bool migrations_allowed)
{
unsigned i;
unsigned nr_sentinels_per_queue = 2u * NR_CACHE_LEVELS;
@@ -1658,11 +1722,11 @@ static struct dm_cache_policy *__smq_create(dm_cblock_t cache_size,
}
init_allocator(&mq->writeback_sentinel_alloc, &mq->es, 0, nr_sentinels_per_queue);
- for (i = 0; i < nr_sentinels_per_queue; i++)
+ for (i = 0; i < nr_sentinels_per_queue; i++)
get_entry(&mq->writeback_sentinel_alloc, i)->sentinel = true;
init_allocator(&mq->demote_sentinel_alloc, &mq->es, nr_sentinels_per_queue, total_sentinels);
- for (i = 0; i < nr_sentinels_per_queue; i++)
+ for (i = 0; i < nr_sentinels_per_queue; i++)
get_entry(&mq->demote_sentinel_alloc, i)->sentinel = true;
init_allocator(&mq->hotspot_alloc, &mq->es, total_sentinels,
@@ -1715,8 +1779,16 @@ static struct dm_cache_policy *__smq_create(dm_cblock_t cache_size,
mq->next_hotspot_period = jiffies;
mq->next_cache_period = jiffies;
+ mq->bg_work = btracker_create(10240); /* FIXME: hard coded value */
+ if (!mq->bg_work)
+ goto bad_btracker;
+
+ mq->migrations_allowed = migrations_allowed;
+
return &mq->policy;
+bad_btracker:
+ h_exit(&mq->hotspot_table);
bad_alloc_hotspot_table:
h_exit(&mq->table);
bad_alloc_table:
@@ -1735,21 +1807,28 @@ static struct dm_cache_policy *smq_create(dm_cblock_t cache_size,
sector_t origin_size,
sector_t cache_block_size)
{
- return __smq_create(cache_size, origin_size, cache_block_size, false);
+ return __smq_create(cache_size, origin_size, cache_block_size, false, true);
}
static struct dm_cache_policy *mq_create(dm_cblock_t cache_size,
sector_t origin_size,
sector_t cache_block_size)
{
- return __smq_create(cache_size, origin_size, cache_block_size, true);
+ return __smq_create(cache_size, origin_size, cache_block_size, true, true);
+}
+
+static struct dm_cache_policy *cleaner_create(dm_cblock_t cache_size,
+ sector_t origin_size,
+ sector_t cache_block_size)
+{
+ return __smq_create(cache_size, origin_size, cache_block_size, false, false);
}
/*----------------------------------------------------------------*/
static struct dm_cache_policy_type smq_policy_type = {
.name = "smq",
- .version = {1, 5, 0},
+ .version = {2, 0, 0},
.hint_size = 4,
.owner = THIS_MODULE,
.create = smq_create
@@ -1757,15 +1836,23 @@ static struct dm_cache_policy_type smq_policy_type = {
static struct dm_cache_policy_type mq_policy_type = {
.name = "mq",
- .version = {1, 5, 0},
+ .version = {2, 0, 0},
.hint_size = 4,
.owner = THIS_MODULE,
.create = mq_create,
};
+static struct dm_cache_policy_type cleaner_policy_type = {
+ .name = "cleaner",
+ .version = {2, 0, 0},
+ .hint_size = 4,
+ .owner = THIS_MODULE,
+ .create = cleaner_create,
+};
+
static struct dm_cache_policy_type default_policy_type = {
.name = "default",
- .version = {1, 5, 0},
+ .version = {2, 0, 0},
.hint_size = 4,
.owner = THIS_MODULE,
.create = smq_create,
@@ -1785,23 +1872,36 @@ static int __init smq_init(void)
r = dm_cache_policy_register(&mq_policy_type);
if (r) {
DMERR("register failed (as mq) %d", r);
- dm_cache_policy_unregister(&smq_policy_type);
- return -ENOMEM;
+ goto out_mq;
+ }
+
+ r = dm_cache_policy_register(&cleaner_policy_type);
+ if (r) {
+ DMERR("register failed (as cleaner) %d", r);
+ goto out_cleaner;
}
r = dm_cache_policy_register(&default_policy_type);
if (r) {
DMERR("register failed (as default) %d", r);
- dm_cache_policy_unregister(&mq_policy_type);
- dm_cache_policy_unregister(&smq_policy_type);
- return -ENOMEM;
+ goto out_default;
}
return 0;
+
+out_default:
+ dm_cache_policy_unregister(&cleaner_policy_type);
+out_cleaner:
+ dm_cache_policy_unregister(&mq_policy_type);
+out_mq:
+ dm_cache_policy_unregister(&smq_policy_type);
+
+ return -ENOMEM;
}
static void __exit smq_exit(void)
{
+ dm_cache_policy_unregister(&cleaner_policy_type);
dm_cache_policy_unregister(&smq_policy_type);
dm_cache_policy_unregister(&mq_policy_type);
dm_cache_policy_unregister(&default_policy_type);
@@ -1816,3 +1916,4 @@ MODULE_DESCRIPTION("smq cache policy");
MODULE_ALIAS("dm-cache-default");
MODULE_ALIAS("dm-cache-mq");
+MODULE_ALIAS("dm-cache-cleaner");