diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-05-03 10:31:20 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-05-03 10:31:20 -0700 |
commit | d35a878ae1c50977b55e352fd46e36e35add72a0 (patch) | |
tree | 7cd4e0ec418c6f3be365e56ee3c49bab218cd608 /drivers/md/dm-bio-prison-v2.c | |
parent | e5021876c91dc3894b2174cca8fa797f8e29e7b9 (diff) | |
parent | 390020ad2af9ca04844c4f3b1f299ad8746d84c8 (diff) |
Merge tag 'for-4.12/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper updates from Mike Snitzer:
- A major update for DM cache that reduces the latency for deciding
whether blocks should migrate to/from the cache. The bio-prison-v2
interface supports this improvement by enabling direct dispatch of
work to workqueues rather than having to delay the actual work
dispatch to the DM cache core. So the dm-cache policies are much more
nimble by being able to drive IO as they see fit. One immediate
benefit from the improved latency is a cache that should be much more
adaptive to changing workloads.
- Add a new DM integrity target that emulates a block device that has
additional per-sector tags that can be used for storing integrity
information.
- Add a new authenticated encryption feature to the DM crypt target
that builds on the capabilities provided by the DM integrity target.
- Add MD interface for switching the raid4/5/6 journal mode and update
the DM raid target to use it to enable aid4/5/6 journal write-back
support.
- Switch the DM verity target over to using the asynchronous hash
crypto API (this helps work better with architectures that have
access to off-CPU algorithm providers, which should reduce CPU
utilization).
- Various request-based DM and DM multipath fixes and improvements from
Bart and Christoph.
- A DM thinp target fix for a bio structure leak that occurs for each
discard IFF discard passdown is enabled.
- A fix for a possible deadlock in DM bufio and a fix to re-check the
new buffer allocation watermark in the face of competing admin
changes to the 'max_cache_size_bytes' tunable.
- A couple DM core cleanups.
* tag 'for-4.12/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (50 commits)
dm bufio: check new buffer allocation watermark every 30 seconds
dm bufio: avoid a possible ABBA deadlock
dm mpath: make it easier to detect unintended I/O request flushes
dm mpath: cleanup QUEUE_IF_NO_PATH bit manipulation by introducing assign_bit()
dm mpath: micro-optimize the hot path relative to MPATHF_QUEUE_IF_NO_PATH
dm: introduce enum dm_queue_mode to cleanup related code
dm mpath: verify __pg_init_all_paths locking assumptions at runtime
dm: verify suspend_locking assumptions at runtime
dm block manager: remove an unused argument from dm_block_manager_create()
dm rq: check blk_mq_register_dev() return value in dm_mq_init_request_queue()
dm mpath: delay requeuing while path initialization is in progress
dm mpath: avoid that path removal can trigger an infinite loop
dm mpath: split and rename activate_path() to prepare for its expanded use
dm ioctl: prevent stack leak in dm ioctl call
dm integrity: use previously calculated log2 of sectors_per_block
dm integrity: use hex2bin instead of open-coded variant
dm crypt: replace custom implementation of hex2bin()
dm crypt: remove obsolete references to per-CPU state
dm verity: switch to using asynchronous hash crypto API
dm crypt: use WQ_HIGHPRI for the IO and crypt workqueues
...
Diffstat (limited to 'drivers/md/dm-bio-prison-v2.c')
-rw-r--r-- | drivers/md/dm-bio-prison-v2.c | 369 |
1 files changed, 369 insertions, 0 deletions
diff --git a/drivers/md/dm-bio-prison-v2.c b/drivers/md/dm-bio-prison-v2.c new file mode 100644 index 000000000000..c9b11f799cd8 --- /dev/null +++ b/drivers/md/dm-bio-prison-v2.c @@ -0,0 +1,369 @@ +/* + * Copyright (C) 2012-2017 Red Hat, Inc. + * + * This file is released under the GPL. + */ + +#include "dm.h" +#include "dm-bio-prison-v2.h" + +#include <linux/spinlock.h> +#include <linux/mempool.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/rwsem.h> + +/*----------------------------------------------------------------*/ + +#define MIN_CELLS 1024 + +struct dm_bio_prison_v2 { + struct workqueue_struct *wq; + + spinlock_t lock; + mempool_t *cell_pool; + struct rb_root cells; +}; + +static struct kmem_cache *_cell_cache; + +/*----------------------------------------------------------------*/ + +/* + * @nr_cells should be the number of cells you want in use _concurrently_. + * Don't confuse it with the number of distinct keys. + */ +struct dm_bio_prison_v2 *dm_bio_prison_create_v2(struct workqueue_struct *wq) +{ + struct dm_bio_prison_v2 *prison = kmalloc(sizeof(*prison), GFP_KERNEL); + + if (!prison) + return NULL; + + prison->wq = wq; + spin_lock_init(&prison->lock); + + prison->cell_pool = mempool_create_slab_pool(MIN_CELLS, _cell_cache); + if (!prison->cell_pool) { + kfree(prison); + return NULL; + } + + prison->cells = RB_ROOT; + + return prison; +} +EXPORT_SYMBOL_GPL(dm_bio_prison_create_v2); + +void dm_bio_prison_destroy_v2(struct dm_bio_prison_v2 *prison) +{ + mempool_destroy(prison->cell_pool); + kfree(prison); +} +EXPORT_SYMBOL_GPL(dm_bio_prison_destroy_v2); + +struct dm_bio_prison_cell_v2 *dm_bio_prison_alloc_cell_v2(struct dm_bio_prison_v2 *prison, gfp_t gfp) +{ + return mempool_alloc(prison->cell_pool, gfp); +} +EXPORT_SYMBOL_GPL(dm_bio_prison_alloc_cell_v2); + +void dm_bio_prison_free_cell_v2(struct dm_bio_prison_v2 *prison, + struct dm_bio_prison_cell_v2 *cell) +{ + mempool_free(cell, prison->cell_pool); +} +EXPORT_SYMBOL_GPL(dm_bio_prison_free_cell_v2); + +static void __setup_new_cell(struct dm_cell_key_v2 *key, + struct dm_bio_prison_cell_v2 *cell) +{ + memset(cell, 0, sizeof(*cell)); + memcpy(&cell->key, key, sizeof(cell->key)); + bio_list_init(&cell->bios); +} + +static int cmp_keys(struct dm_cell_key_v2 *lhs, + struct dm_cell_key_v2 *rhs) +{ + if (lhs->virtual < rhs->virtual) + return -1; + + if (lhs->virtual > rhs->virtual) + return 1; + + if (lhs->dev < rhs->dev) + return -1; + + if (lhs->dev > rhs->dev) + return 1; + + if (lhs->block_end <= rhs->block_begin) + return -1; + + if (lhs->block_begin >= rhs->block_end) + return 1; + + return 0; +} + +/* + * Returns true if node found, otherwise it inserts a new one. + */ +static bool __find_or_insert(struct dm_bio_prison_v2 *prison, + struct dm_cell_key_v2 *key, + struct dm_bio_prison_cell_v2 *cell_prealloc, + struct dm_bio_prison_cell_v2 **result) +{ + int r; + struct rb_node **new = &prison->cells.rb_node, *parent = NULL; + + while (*new) { + struct dm_bio_prison_cell_v2 *cell = + container_of(*new, struct dm_bio_prison_cell_v2, node); + + r = cmp_keys(key, &cell->key); + + parent = *new; + if (r < 0) + new = &((*new)->rb_left); + + else if (r > 0) + new = &((*new)->rb_right); + + else { + *result = cell; + return true; + } + } + + __setup_new_cell(key, cell_prealloc); + *result = cell_prealloc; + rb_link_node(&cell_prealloc->node, parent, new); + rb_insert_color(&cell_prealloc->node, &prison->cells); + + return false; +} + +static bool __get(struct dm_bio_prison_v2 *prison, + struct dm_cell_key_v2 *key, + unsigned lock_level, + struct bio *inmate, + struct dm_bio_prison_cell_v2 *cell_prealloc, + struct dm_bio_prison_cell_v2 **cell) +{ + if (__find_or_insert(prison, key, cell_prealloc, cell)) { + if ((*cell)->exclusive_lock) { + if (lock_level <= (*cell)->exclusive_level) { + bio_list_add(&(*cell)->bios, inmate); + return false; + } + } + + (*cell)->shared_count++; + + } else + (*cell)->shared_count = 1; + + return true; +} + +bool dm_cell_get_v2(struct dm_bio_prison_v2 *prison, + struct dm_cell_key_v2 *key, + unsigned lock_level, + struct bio *inmate, + struct dm_bio_prison_cell_v2 *cell_prealloc, + struct dm_bio_prison_cell_v2 **cell_result) +{ + int r; + unsigned long flags; + + spin_lock_irqsave(&prison->lock, flags); + r = __get(prison, key, lock_level, inmate, cell_prealloc, cell_result); + spin_unlock_irqrestore(&prison->lock, flags); + + return r; +} +EXPORT_SYMBOL_GPL(dm_cell_get_v2); + +static bool __put(struct dm_bio_prison_v2 *prison, + struct dm_bio_prison_cell_v2 *cell) +{ + BUG_ON(!cell->shared_count); + cell->shared_count--; + + // FIXME: shared locks granted above the lock level could starve this + if (!cell->shared_count) { + if (cell->exclusive_lock){ + if (cell->quiesce_continuation) { + queue_work(prison->wq, cell->quiesce_continuation); + cell->quiesce_continuation = NULL; + } + } else { + rb_erase(&cell->node, &prison->cells); + return true; + } + } + + return false; +} + +bool dm_cell_put_v2(struct dm_bio_prison_v2 *prison, + struct dm_bio_prison_cell_v2 *cell) +{ + bool r; + unsigned long flags; + + spin_lock_irqsave(&prison->lock, flags); + r = __put(prison, cell); + spin_unlock_irqrestore(&prison->lock, flags); + + return r; +} +EXPORT_SYMBOL_GPL(dm_cell_put_v2); + +static int __lock(struct dm_bio_prison_v2 *prison, + struct dm_cell_key_v2 *key, + unsigned lock_level, + struct dm_bio_prison_cell_v2 *cell_prealloc, + struct dm_bio_prison_cell_v2 **cell_result) +{ + struct dm_bio_prison_cell_v2 *cell; + + if (__find_or_insert(prison, key, cell_prealloc, &cell)) { + if (cell->exclusive_lock) + return -EBUSY; + + cell->exclusive_lock = true; + cell->exclusive_level = lock_level; + *cell_result = cell; + + // FIXME: we don't yet know what level these shared locks + // were taken at, so have to quiesce them all. + return cell->shared_count > 0; + + } else { + cell = cell_prealloc; + cell->shared_count = 0; + cell->exclusive_lock = true; + cell->exclusive_level = lock_level; + *cell_result = cell; + } + + return 0; +} + +int dm_cell_lock_v2(struct dm_bio_prison_v2 *prison, + struct dm_cell_key_v2 *key, + unsigned lock_level, + struct dm_bio_prison_cell_v2 *cell_prealloc, + struct dm_bio_prison_cell_v2 **cell_result) +{ + int r; + unsigned long flags; + + spin_lock_irqsave(&prison->lock, flags); + r = __lock(prison, key, lock_level, cell_prealloc, cell_result); + spin_unlock_irqrestore(&prison->lock, flags); + + return r; +} +EXPORT_SYMBOL_GPL(dm_cell_lock_v2); + +static void __quiesce(struct dm_bio_prison_v2 *prison, + struct dm_bio_prison_cell_v2 *cell, + struct work_struct *continuation) +{ + if (!cell->shared_count) + queue_work(prison->wq, continuation); + else + cell->quiesce_continuation = continuation; +} + +void dm_cell_quiesce_v2(struct dm_bio_prison_v2 *prison, + struct dm_bio_prison_cell_v2 *cell, + struct work_struct *continuation) +{ + unsigned long flags; + + spin_lock_irqsave(&prison->lock, flags); + __quiesce(prison, cell, continuation); + spin_unlock_irqrestore(&prison->lock, flags); +} +EXPORT_SYMBOL_GPL(dm_cell_quiesce_v2); + +static int __promote(struct dm_bio_prison_v2 *prison, + struct dm_bio_prison_cell_v2 *cell, + unsigned new_lock_level) +{ + if (!cell->exclusive_lock) + return -EINVAL; + + cell->exclusive_level = new_lock_level; + return cell->shared_count > 0; +} + +int dm_cell_lock_promote_v2(struct dm_bio_prison_v2 *prison, + struct dm_bio_prison_cell_v2 *cell, + unsigned new_lock_level) +{ + int r; + unsigned long flags; + + spin_lock_irqsave(&prison->lock, flags); + r = __promote(prison, cell, new_lock_level); + spin_unlock_irqrestore(&prison->lock, flags); + + return r; +} +EXPORT_SYMBOL_GPL(dm_cell_lock_promote_v2); + +static bool __unlock(struct dm_bio_prison_v2 *prison, + struct dm_bio_prison_cell_v2 *cell, + struct bio_list *bios) +{ + BUG_ON(!cell->exclusive_lock); + + bio_list_merge(bios, &cell->bios); + bio_list_init(&cell->bios); + + if (cell->shared_count) { + cell->exclusive_lock = 0; + return false; + } + + rb_erase(&cell->node, &prison->cells); + return true; +} + +bool dm_cell_unlock_v2(struct dm_bio_prison_v2 *prison, + struct dm_bio_prison_cell_v2 *cell, + struct bio_list *bios) +{ + bool r; + unsigned long flags; + + spin_lock_irqsave(&prison->lock, flags); + r = __unlock(prison, cell, bios); + spin_unlock_irqrestore(&prison->lock, flags); + + return r; +} +EXPORT_SYMBOL_GPL(dm_cell_unlock_v2); + +/*----------------------------------------------------------------*/ + +int __init dm_bio_prison_init_v2(void) +{ + _cell_cache = KMEM_CACHE(dm_bio_prison_cell_v2, 0); + if (!_cell_cache) + return -ENOMEM; + + return 0; +} + +void dm_bio_prison_exit_v2(void) +{ + kmem_cache_destroy(_cell_cache); + _cell_cache = NULL; +} |