summaryrefslogtreecommitdiff
path: root/drivers/md/dm-cache-policy-smq.c
AgeCommit message (Collapse)Author
2018-10-16dm: remove unnecessary unlikely() around WARN_ON_ONCE()Igor Stoppa
WARN_ON() already contains an unlikely(), so it's not necessary to wrap it into another. Signed-off-by: Igor Stoppa <igor.stoppa@huawei.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2018-06-12treewide: Use array_size() in vzalloc()Kees Cook
The vzalloc() function has no 2-factor argument form, so multiplication factors need to be wrapped in array_size(). This patch replaces cases of: vzalloc(a * b) with: vzalloc(array_size(a, b)) as well as handling cases of: vzalloc(a * b * c) with: vzalloc(array3_size(a, b, c)) This does, however, attempt to ignore constant size factors like: vzalloc(4 * 1024) though any constants defined via macros get caught up in the conversion. Any factors with a sizeof() of "unsigned char", "char", and "u8" were dropped, since they're redundant. The Coccinelle script used for this was: // Fix redundant parens around sizeof(). @@ type TYPE; expression THING, E; @@ ( vzalloc( - (sizeof(TYPE)) * E + sizeof(TYPE) * E , ...) | vzalloc( - (sizeof(THING)) * E + sizeof(THING) * E , ...) ) // Drop single-byte sizes and redundant parens. @@ expression COUNT; typedef u8; typedef __u8; @@ ( vzalloc( - sizeof(u8) * (COUNT) + COUNT , ...) | vzalloc( - sizeof(__u8) * (COUNT) + COUNT , ...) | vzalloc( - sizeof(char) * (COUNT) + COUNT , ...) | vzalloc( - sizeof(unsigned char) * (COUNT) + COUNT , ...) | vzalloc( - sizeof(u8) * COUNT + COUNT , ...) | vzalloc( - sizeof(__u8) * COUNT + COUNT , ...) | vzalloc( - sizeof(char) * COUNT + COUNT , ...) | vzalloc( - sizeof(unsigned char) * COUNT + COUNT , ...) ) // 2-factor product with sizeof(type/expression) and identifier or constant. @@ type TYPE; expression THING; identifier COUNT_ID; constant COUNT_CONST; @@ ( vzalloc( - sizeof(TYPE) * (COUNT_ID) + array_size(COUNT_ID, sizeof(TYPE)) , ...) | vzalloc( - sizeof(TYPE) * COUNT_ID + array_size(COUNT_ID, sizeof(TYPE)) , ...) | vzalloc( - sizeof(TYPE) * (COUNT_CONST) + array_size(COUNT_CONST, sizeof(TYPE)) , ...) | vzalloc( - sizeof(TYPE) * COUNT_CONST + array_size(COUNT_CONST, sizeof(TYPE)) , ...) | vzalloc( - sizeof(THING) * (COUNT_ID) + array_size(COUNT_ID, sizeof(THING)) , ...) | vzalloc( - sizeof(THING) * COUNT_ID + array_size(COUNT_ID, sizeof(THING)) , ...) | vzalloc( - sizeof(THING) * (COUNT_CONST) + array_size(COUNT_CONST, sizeof(THING)) , ...) | vzalloc( - sizeof(THING) * COUNT_CONST + array_size(COUNT_CONST, sizeof(THING)) , ...) ) // 2-factor product, only identifiers. @@ identifier SIZE, COUNT; @@ vzalloc( - SIZE * COUNT + array_size(COUNT, SIZE) , ...) // 3-factor product with 1 sizeof(type) or sizeof(expression), with // redundant parens removed. @@ expression THING; identifier STRIDE, COUNT; type TYPE; @@ ( vzalloc( - sizeof(TYPE) * (COUNT) * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | vzalloc( - sizeof(TYPE) * (COUNT) * STRIDE + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | vzalloc( - sizeof(TYPE) * COUNT * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | vzalloc( - sizeof(TYPE) * COUNT * STRIDE + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | vzalloc( - sizeof(THING) * (COUNT) * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | vzalloc( - sizeof(THING) * (COUNT) * STRIDE + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | vzalloc( - sizeof(THING) * COUNT * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | vzalloc( - sizeof(THING) * COUNT * STRIDE + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) ) // 3-factor product with 2 sizeof(variable), with redundant parens removed. @@ expression THING1, THING2; identifier COUNT; type TYPE1, TYPE2; @@ ( vzalloc( - sizeof(TYPE1) * sizeof(TYPE2) * COUNT + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2)) , ...) | vzalloc( - sizeof(TYPE1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2)) , ...) | vzalloc( - sizeof(THING1) * sizeof(THING2) * COUNT + array3_size(COUNT, sizeof(THING1), sizeof(THING2)) , ...) | vzalloc( - sizeof(THING1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(THING1), sizeof(THING2)) , ...) | vzalloc( - sizeof(TYPE1) * sizeof(THING2) * COUNT + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2)) , ...) | vzalloc( - sizeof(TYPE1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2)) , ...) ) // 3-factor product, only identifiers, with redundant parens removed. @@ identifier STRIDE, SIZE, COUNT; @@ ( vzalloc( - (COUNT) * STRIDE * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | vzalloc( - COUNT * (STRIDE) * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | vzalloc( - COUNT * STRIDE * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | vzalloc( - (COUNT) * (STRIDE) * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | vzalloc( - COUNT * (STRIDE) * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | vzalloc( - (COUNT) * STRIDE * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | vzalloc( - (COUNT) * (STRIDE) * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | vzalloc( - COUNT * STRIDE * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) ) // Any remaining multi-factor products, first at least 3-factor products // when they're not all constants... @@ expression E1, E2, E3; constant C1, C2, C3; @@ ( vzalloc(C1 * C2 * C3, ...) | vzalloc( - E1 * E2 * E3 + array3_size(E1, E2, E3) , ...) ) // And then all remaining 2 factors products when they're not all constants. @@ expression E1, E2; constant C1, C2; @@ ( vzalloc(C1 * C2, ...) | vzalloc( - E1 * E2 + array_size(E1, E2) , ...) ) Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-12treewide: Use array_size() in vmalloc()Kees Cook
The vmalloc() function has no 2-factor argument form, so multiplication factors need to be wrapped in array_size(). This patch replaces cases of: vmalloc(a * b) with: vmalloc(array_size(a, b)) as well as handling cases of: vmalloc(a * b * c) with: vmalloc(array3_size(a, b, c)) This does, however, attempt to ignore constant size factors like: vmalloc(4 * 1024) though any constants defined via macros get caught up in the conversion. Any factors with a sizeof() of "unsigned char", "char", and "u8" were dropped, since they're redundant. The Coccinelle script used for this was: // Fix redundant parens around sizeof(). @@ type TYPE; expression THING, E; @@ ( vmalloc( - (sizeof(TYPE)) * E + sizeof(TYPE) * E , ...) | vmalloc( - (sizeof(THING)) * E + sizeof(THING) * E , ...) ) // Drop single-byte sizes and redundant parens. @@ expression COUNT; typedef u8; typedef __u8; @@ ( vmalloc( - sizeof(u8) * (COUNT) + COUNT , ...) | vmalloc( - sizeof(__u8) * (COUNT) + COUNT , ...) | vmalloc( - sizeof(char) * (COUNT) + COUNT , ...) | vmalloc( - sizeof(unsigned char) * (COUNT) + COUNT , ...) | vmalloc( - sizeof(u8) * COUNT + COUNT , ...) | vmalloc( - sizeof(__u8) * COUNT + COUNT , ...) | vmalloc( - sizeof(char) * COUNT + COUNT , ...) | vmalloc( - sizeof(unsigned char) * COUNT + COUNT , ...) ) // 2-factor product with sizeof(type/expression) and identifier or constant. @@ type TYPE; expression THING; identifier COUNT_ID; constant COUNT_CONST; @@ ( vmalloc( - sizeof(TYPE) * (COUNT_ID) + array_size(COUNT_ID, sizeof(TYPE)) , ...) | vmalloc( - sizeof(TYPE) * COUNT_ID + array_size(COUNT_ID, sizeof(TYPE)) , ...) | vmalloc( - sizeof(TYPE) * (COUNT_CONST) + array_size(COUNT_CONST, sizeof(TYPE)) , ...) | vmalloc( - sizeof(TYPE) * COUNT_CONST + array_size(COUNT_CONST, sizeof(TYPE)) , ...) | vmalloc( - sizeof(THING) * (COUNT_ID) + array_size(COUNT_ID, sizeof(THING)) , ...) | vmalloc( - sizeof(THING) * COUNT_ID + array_size(COUNT_ID, sizeof(THING)) , ...) | vmalloc( - sizeof(THING) * (COUNT_CONST) + array_size(COUNT_CONST, sizeof(THING)) , ...) | vmalloc( - sizeof(THING) * COUNT_CONST + array_size(COUNT_CONST, sizeof(THING)) , ...) ) // 2-factor product, only identifiers. @@ identifier SIZE, COUNT; @@ vmalloc( - SIZE * COUNT + array_size(COUNT, SIZE) , ...) // 3-factor product with 1 sizeof(type) or sizeof(expression), with // redundant parens removed. @@ expression THING; identifier STRIDE, COUNT; type TYPE; @@ ( vmalloc( - sizeof(TYPE) * (COUNT) * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | vmalloc( - sizeof(TYPE) * (COUNT) * STRIDE + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | vmalloc( - sizeof(TYPE) * COUNT * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | vmalloc( - sizeof(TYPE) * COUNT * STRIDE + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | vmalloc( - sizeof(THING) * (COUNT) * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | vmalloc( - sizeof(THING) * (COUNT) * STRIDE + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | vmalloc( - sizeof(THING) * COUNT * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | vmalloc( - sizeof(THING) * COUNT * STRIDE + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) ) // 3-factor product with 2 sizeof(variable), with redundant parens removed. @@ expression THING1, THING2; identifier COUNT; type TYPE1, TYPE2; @@ ( vmalloc( - sizeof(TYPE1) * sizeof(TYPE2) * COUNT + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2)) , ...) | vmalloc( - sizeof(TYPE1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2)) , ...) | vmalloc( - sizeof(THING1) * sizeof(THING2) * COUNT + array3_size(COUNT, sizeof(THING1), sizeof(THING2)) , ...) | vmalloc( - sizeof(THING1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(THING1), sizeof(THING2)) , ...) | vmalloc( - sizeof(TYPE1) * sizeof(THING2) * COUNT + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2)) , ...) | vmalloc( - sizeof(TYPE1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2)) , ...) ) // 3-factor product, only identifiers, with redundant parens removed. @@ identifier STRIDE, SIZE, COUNT; @@ ( vmalloc( - (COUNT) * STRIDE * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | vmalloc( - COUNT * (STRIDE) * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | vmalloc( - COUNT * STRIDE * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | vmalloc( - (COUNT) * (STRIDE) * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | vmalloc( - COUNT * (STRIDE) * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | vmalloc( - (COUNT) * STRIDE * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | vmalloc( - (COUNT) * (STRIDE) * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | vmalloc( - COUNT * STRIDE * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) ) // Any remaining multi-factor products, first at least 3-factor products // when they're not all constants... @@ expression E1, E2, E3; constant C1, C2, C3; @@ ( vmalloc(C1 * C2 * C3, ...) | vmalloc( - E1 * E2 * E3 + array3_size(E1, E2, E3) , ...) ) // And then all remaining 2 factors products when they're not all constants. @@ expression E1, E2; constant C1, C2; @@ ( vmalloc(C1 * C2, ...) | vmalloc( - E1 * E2 + array_size(E1, E2) , ...) ) Signed-off-by: Kees Cook <keescook@chromium.org>
2017-11-10dm cache policy smq: allocate cache blocks in orderJoe Thornber
Previously, cache blocks were being allocated in reverse order. Fix this by pulling the block off the head of the free list. Shouldn't have any impact on performance or latency but it is more correct to have the cache blocks allocated/mapped in ascending order. This fix will slightly increase the chances of two adjacent oblocks being in adjacent cblocks. Signed-off-by: Joe Thornber <ejt@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2017-11-10dm cache policy smq: change max background work from 10240 to 4096 blocksJoe Thornber
10240 blocks was too much, lowering this reduces the latency of copying and consumes less memory. Signed-off-by: Joe Thornber <ejt@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2017-11-10dm cache policy smq: take origin idle status into account when queuing ↵Joe Thornber
writebacks If the origin device is idle try and writeback more data. Signed-off-by: Joe Thornber <ejt@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2017-11-10dm cache policy smq: handle races with queuing background_workJoe Thornber
The background_tracker holds a set of promotions/demotions that the cache policy wishes the core target to implement. When adding a new operation to the tracker it's possible that an operation on the same block is already present (but in practise this doesn't appear to be happening). Catch these situations and do the appropriate cleanup. Signed-off-by: Joe Thornber <ejt@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2017-05-14dm cache policy smq: don't do any writebacks unless IDLEJoe Thornber
If there are no clean blocks to be demoted the writeback will be triggered at that point. Preemptively writing back can hurt high IO load scenarios. Signed-off-by: Joe Thornber <ejt@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2017-05-14dm cache policy smq: stop preemptively demoting blocksJoe Thornber
It causes a lot of churn if the working set's size is close to the fast device's size. Signed-off-by: Joe Thornber <ejt@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2017-05-14dm cache policy smq: put newly promoted entries at the top of the multiqueueJoe Thornber
This stops entries bouncing in and out of the cache quickly. Signed-off-by: Joe Thornber <ejt@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2017-05-14dm cache policy smq: be more aggressive about triggering a writebackJoe Thornber
If there are no clean entries to demote we really want to writeback immediately. Signed-off-by: Joe Thornber <ejt@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2017-05-14dm cache policy smq: only demote entries in bottom half of the clean multiqueueJoe Thornber
Heavy IO load may mean there are very few clean blocks in the cache, and we risk demoting entries that get hit a lot. Signed-off-by: Joe Thornber <ejt@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2017-05-04dm cache policy smq: cleanup free_target_met() and clean_target_met()Mike Snitzer
Depending on the passed @idle arg, there may be no need to calculate 'nr_free' or 'nr_clean' respectively in free_target_met() and clean_target_met(). Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2017-05-04dm cache policy smq: allow demotions to happen even during continuous IOJoe Thornber
dm-cache's smq policy tries hard to do it's work during the idle periods when there is no IO. But if there are no idle periods (eg, a long fio run) we still need to allow some demotions and promotions to occur. To achieve this, pass @idle=true to queue_promotion()'s free_target_met() call so that free_target_met() doesn't short-circuit the possibility of demotion simply because it isn't an idle period. Fixes: b29d4986d0 ("dm cache: significant rework to leverage dm-bio-prison-v2") Reported-by: John Harrigan <jharriga@redhat.com> Signed-off-by: Joe Thornber <ejt@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2017-03-31dm cache policy smq: make the cleaner policy write-back more aggressivelyJoe Thornber
By ignoring the sentinels the cleaner policy is able to write-back dirty cache data much faster. There is no reason to respect the sentinels, which denote that a block was changed recently, when using the cleaner policy given that the cleaner is tasked with writing back all dirty data. Signed-off-by: Joe Thornber <ejt@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2017-03-07dm cache: significant rework to leverage dm-bio-prison-v2Joe Thornber
The cache policy interfaces have been updated to work well with the new bio-prison v2 interface's ability to queue work immediately (promotion, demotion, etc) -- overriding benefit being reduced latency on processing IO through the cache. Previously such work would be left for the DM cache core to queue on various lists and then process in batches later -- this caused a serious delay in latency for IO driven by the cache. The background tracker code was factored out so that all cache policies can make use of it. Also, the "cleaner" policy has been removed and is now a variant of the smq policy that simply disallows migrations. Signed-off-by: Joe Thornber <ejt@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2016-12-08dm cache policy smq: use hash_32() instead of hash_32_generic()Mike Snitzer
Switch to using hash_32() because hash_32_generic() should only be used by the kernel's selftests. Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2016-09-22dm cache policy smq: distribute entries to random levels when switching to smqJoe Thornber
For smq the 32 bit 'hint' stores the multiqueue level that the entry should be stored in. If a different policy has been used previously, and then switched to smq, the hints will be invalid. In which case we used to put all entries in the bottom level of the multiqueue, and then redistribute. Redistribution is faster if we put entries with invalid hints in random levels initially. Signed-off-by: Joe Thornber <ejt@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2016-09-22dm cache: speed up writing of the hint arrayJoe Thornber
It's far quicker to always delete the hint array and recreate with dm_array_new() because we avoid the copying caused by mutation. Also simplifies the policy interface, replacing the walk_hints() with the simpler get_hint(). Signed-off-by: Joe Thornber <ejt@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2016-03-10dm cache policy smq: clarify that mq registration failure was for 'mq'Mike Snitzer
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2016-03-10dm cache: make the 'mq' policy an alias for 'smq'Joe Thornber
smq seems to be performing better than the old mq policy in all situations, as well as using a quarter of the memory. Make 'mq' an alias for 'smq' when choosing a cache policy. The tunables that were present for the old mq are faked, and have no effect. mq should be considered deprecated now. Signed-off-by: Joe Thornber <ejt@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2015-10-31dm: convert ffs to __ffsMikulas Patocka
ffs counts bit starting with 1 (for the least significant bit), __ffs counts bits starting with 0. This patch changes various occurrences of ffs to __ffs and removes subtraction of 1 from the result. Note that __ffs (unlike ffs) is not defined when called with zero argument, but it is not called with zero argument in any of these cases. Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2015-08-12dm cache policy smq: change the mutex to a spinlockJoe Thornber
We no longer sleep in any of the smq functions, so this can become a spinlock. Switching from mutex to spinlock improves performance when the fast cache device is a very low latency device (e.g. NVMe SSD). The switch to spinlock also allows for removal of the extra tick_lock; which is no longer needed since the main lock being a spinlock now fulfills the locking requirements needed by interrupt context. Signed-off-by: Joe Thornber <ejt@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2015-08-12dm cache policy smq: move 'dm-cache-default' module alias to SMQYi Zhang
When creating dm-cache with the default policy, it will call request_module("dm-cache-default") to register the default policy. But the "dm-cache-default" alias was left referring to the MQ policy. Fix this by moving the module alias to SMQ. Fixes: bccab6a0 (dm cache: switch the "default" cache replacement policy from mq to smq) Signed-off-by: Yi Zhang <yizhan@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2015-07-27dm cache policy smq: fix alloc_bitset check that always evaluates as falseColin Ian King
static analysis by cppcheck has found a check on alloc_bitset that always evaluates as false and hence never finds an allocation failure: [drivers/md/dm-cache-policy-smq.c:1689]: (warning) Logical conjunction always evaluates to false: !EXPR && EXPR. Fix this by removing the incorrect mq->cache_hit_bits check Signed-off-by: Colin Ian King <colin.king@canonical.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2015-06-26dm cache policy smq: fix "default" version to be 1.4.0Mike Snitzer
Commit bccab6a0 ("dm cache: switch the "default" cache replacement policy from mq to smq") should've incremented the "default" policy's version number to 1.4.0 rather than reverting to version 1.0.0. Reported-by: Alasdair G Kergon <agk@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2015-06-17dm cache: switch the "default" cache replacement policy from mq to smqMike Snitzer
The Stochastic multiqueue (SMQ) policy (vs MQ) offers the promise of less memory utilization, improved performance and increased adaptability in the face of changing workloads. SMQ also does not have any cumbersome tuning knobs. Users may switch from "mq" to "smq" simply by appropriately reloading a DM table that is using the cache target. Doing so will cause all of the mq policy's hints to be dropped. Also, performance of the cache may degrade slightly until smq recalculates the origin device's hotspots that should be cached. In the future the "mq" policy will just silently make use of "smq" and the mq code will be removed. Signed-off-by: Mike Snitzer <snitzer@redhat.com> Acked-by: Joe Thornber <ejt@redhat.com>
2015-06-11dm cache: age and write back cache entries even without active IOJoe Thornber
The policy tick() method is normally called from interrupt context. Both the mq and smq policies do some bottom half work for the tick method in their map functions. However if no IO is going through the cache, then that bottom half work doesn't occur. With these policies this means recently hit entries do not age and do not get written back as early as we'd like. Fix this by introducing a new 'can_block' parameter to the tick() method. When this is set the bottom half work occurs immediately. 'can_block' is set when the tick method is called every second by the core target (not in interrupt context). Signed-off-by: Joe Thornber <ejt@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2015-06-11dm cache: add stochastic-multi-queue (smq) policyJoe Thornber
The stochastic-multi-queue (smq) policy addresses some of the problems with the current multiqueue (mq) policy. Memory usage ------------ The mq policy uses a lot of memory; 88 bytes per cache block on a 64 bit machine. SMQ uses 28bit indexes to implement it's data structures rather than pointers. It avoids storing an explicit hit count for each block. It has a 'hotspot' queue rather than a pre cache which uses a quarter of the entries (each hotspot block covers a larger area than a single cache block). All these mean smq uses ~25bytes per cache block. Still a lot of memory, but a substantial improvement nontheless. Level balancing --------------- MQ places entries in different levels of the multiqueue structures based on their hit count (~ln(hit count)). This means the bottom levels generally have the most entries, and the top ones have very few. Having unbalanced levels like this reduces the efficacy of the multiqueue. SMQ does not maintain a hit count, instead it swaps hit entries with the least recently used entry from the level above. The over all ordering being a side effect of this stochastic process. With this scheme we can decide how many entries occupy each multiqueue level, resulting in better promotion/demotion decisions. Adaptability ------------ The MQ policy maintains a hit count for each cache block. For a different block to get promoted to the cache it's hit count has to exceed the lowest currently in the cache. This means it can take a long time for the cache to adapt between varying IO patterns. Periodically degrading the hit counts could help with this, but I haven't found a nice general solution. SMQ doesn't maintain hit counts, so a lot of this problem just goes away. In addition it tracks performance of the hotspot queue, which is used to decide which blocks to promote. If the hotspot queue is performing badly then it starts moving entries more quickly between levels. This lets it adapt to new IO patterns very quickly. Performance ----------- In my tests SMQ shows substantially better performance than MQ. Once this matures a bit more I'm sure it'll become the default policy. Signed-off-by: Joe Thornber <ejt@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>