diff options
Diffstat (limited to 'drivers/md/bcache/bcache.h')
| -rw-r--r-- | drivers/md/bcache/bcache.h | 135 |
1 files changed, 78 insertions, 57 deletions
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index fdf75352e16a..8ccacba85547 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -107,7 +107,7 @@ * * BTREE NODES: * - * Our unit of allocation is a bucket, and we we can't arbitrarily allocate and + * Our unit of allocation is a bucket, and we can't arbitrarily allocate and * free smaller than a bucket - so, that's how big our btree nodes are. * * (If buckets are really big we'll only use part of the bucket for a btree node @@ -176,10 +176,10 @@ * - updates to non leaf nodes just happen synchronously (see btree_split()). */ -#define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__ +#define pr_fmt(fmt) "bcache: %s() " fmt, __func__ -#include <linux/bcache.h> #include <linux/bio.h> +#include <linux/closure.h> #include <linux/kobject.h> #include <linux/list.h> #include <linux/mutex.h> @@ -190,9 +190,9 @@ #include <linux/workqueue.h> #include <linux/kthread.h> +#include "bcache_ondisk.h" #include "bset.h" #include "util.h" -#include "closure.h" struct bucket { atomic_t pin; @@ -200,6 +200,7 @@ struct bucket { uint8_t gen; uint8_t last_gc; /* Most out of date gen in the btree */ uint16_t gc_mark; /* Bitfield used by GC. See below for field */ + uint16_t reclaimable_in_gc:1; }; /* @@ -264,7 +265,8 @@ struct bcache_device { #define BCACHE_DEV_UNLINK_DONE 2 #define BCACHE_DEV_WB_RUNNING 3 #define BCACHE_DEV_RATE_DW_RUNNING 4 - unsigned int nr_stripes; + int nr_stripes; +#define BCH_MIN_STRIPE_SZ ((4 << 20) >> SECTOR_SHIFT) unsigned int stripe_size; atomic_t *stripe_sectors_dirty; unsigned long *full_dirty_stripes; @@ -275,7 +277,7 @@ struct bcache_device { int (*cache_miss)(struct btree *b, struct search *s, struct bio *bio, unsigned int sectors); - int (*ioctl)(struct bcache_device *d, fmode_t mode, + int (*ioctl)(struct bcache_device *d, blk_mode_t mode, unsigned int cmd, unsigned long arg); }; @@ -299,8 +301,10 @@ struct cached_dev { struct list_head list; struct bcache_device disk; struct block_device *bdev; + struct file *bdev_file; struct cache_sb sb; + struct cache_sb_disk *sb_disk; struct bio sb_bio; struct bio_vec sb_bv[1]; struct closure sb_write; @@ -329,6 +333,9 @@ struct cached_dev { */ atomic_t has_dirty; +#define BCH_CACHE_READA_ALL 0 +#define BCH_CACHE_READA_META_ONLY 1 + unsigned int cache_readahead_policy; struct bch_ratelimit writeback_rate; struct delayed_work writeback_rate_update; @@ -360,7 +367,6 @@ struct cached_dev { /* The rest of this all shows up in sysfs */ unsigned int sequential_cutoff; - unsigned int readahead; unsigned int io_disable:1; unsigned int verify:1; @@ -369,6 +375,7 @@ struct cached_dev { unsigned int partial_stripes_expensive:1; unsigned int writeback_metadata:1; unsigned int writeback_running:1; + unsigned int writeback_consider_fragment:1; unsigned char writeback_percent; unsigned int writeback_delay; @@ -381,6 +388,9 @@ struct cached_dev { unsigned int writeback_rate_update_seconds; unsigned int writeback_rate_i_term_inverse; unsigned int writeback_rate_p_term_inverse; + unsigned int writeback_rate_fp_term_low; + unsigned int writeback_rate_fp_term_mid; + unsigned int writeback_rate_fp_term_high; unsigned int writeback_rate_minimum; enum stop_on_failure stop_when_cache_set_failed; @@ -389,7 +399,12 @@ struct cached_dev { unsigned int error_limit; unsigned int offline_seconds; - char backing_dev_name[BDEVNAME_SIZE]; + /* + * Retry to update writeback_rate if contention happens for + * down_read(dc->writeback_lock) in update_writeback_rate() + */ +#define BCH_WBRATE_UPDATE_MAX_SKIPS 15 + unsigned int rate_update_retry; }; enum alloc_reserve { @@ -403,11 +418,13 @@ enum alloc_reserve { struct cache { struct cache_set *set; struct cache_sb sb; + struct cache_sb_disk *sb_disk; struct bio sb_bio; struct bio_vec sb_bv[1]; struct kobject kobj; struct block_device *bdev; + struct file *bdev_file; struct task_struct *alloc_thread; @@ -430,8 +447,7 @@ struct cache { * free_inc: Incoming buckets - these are buckets that currently have * cached data in them, and we can't reuse them until after we write * their new gen to disk. After prio_write() finishes writing the new - * gens/prios, they'll be moved to the free list (and possibly discarded - * in the process) + * gens/prios, they'll be moved to the free list. */ DECLARE_FIFO(long, free)[RESERVE_NR]; DECLARE_FIFO(long, free_inc); @@ -450,8 +466,6 @@ struct cache { */ unsigned int invalidate_needs_gc; - bool discard; /* Get rid of? */ - struct journal_device journal; /* The rest of this all shows up in sysfs */ @@ -462,8 +476,6 @@ struct cache { atomic_long_t meta_sectors_written; atomic_long_t btree_sectors_written; atomic_long_t sectors_written; - - char cache_dev_name[BDEVNAME_SIZE]; }; struct gc_stat { @@ -512,11 +524,7 @@ struct cache_set { atomic_t idle_counter; atomic_t at_max_writeback_rate; - struct cache_sb sb; - - struct cache *cache[MAX_CACHES_PER_SET]; - struct cache *cache_by_alloc[MAX_CACHES_PER_SET]; - int caches_loaded; + struct cache *cache; struct bcache_device **devices; unsigned int devices_max_used; @@ -534,7 +542,7 @@ struct cache_set { struct bio_set bio_split; /* For the btree cache */ - struct shrinker shrink; + struct shrinker *shrink; /* For the btree cache and anything allocation related */ struct mutex bucket_lock; @@ -582,6 +590,7 @@ struct cache_set { */ wait_queue_head_t btree_cache_wait; struct task_struct *btree_cache_alloc_lock; + spinlock_t btree_cannibalize_lock; /* * When we free a btree node, we increment the gen of the bucket the @@ -595,6 +604,7 @@ struct cache_set { */ atomic_t prio_blocked; wait_queue_head_t bucket_wait; + atomic_t bucket_wait_cnt; /* * For any bio we don't skip we subtract the number of sectors from @@ -664,6 +674,7 @@ struct cache_set { struct mutex verify_lock; #endif + uint8_t set_uuid[16]; unsigned int nr_uuids; struct uuid_entry *uuids; BKEY_PADDED(uuid_bucket); @@ -705,8 +716,8 @@ struct cache_set { atomic_long_t writeback_keys_failed; atomic_long_t reclaim; + atomic_long_t reclaimed_journal_buckets; atomic_long_t flush_write; - atomic_long_t retry_flush_write; enum { ON_ERROR_UNREGISTER, @@ -723,11 +734,10 @@ struct cache_set { unsigned int gc_always_rewrite:1; unsigned int shrinker_disabled:1; unsigned int copy_gc_enabled:1; + unsigned int idle_max_writeback_rate_enabled:1; #define BUCKET_HASH_BITS 12 struct hlist_head bucket_hash[1 << BUCKET_HASH_BITS]; - - DECLARE_HEAP(struct btree *, flush_btree); }; struct bbio { @@ -753,15 +763,35 @@ struct bbio { #define btree_default_blocks(c) \ ((unsigned int) ((PAGE_SECTORS * (c)->btree_pages) >> (c)->block_bits)) -#define bucket_pages(c) ((c)->sb.bucket_size / PAGE_SECTORS) -#define bucket_bytes(c) ((c)->sb.bucket_size << 9) -#define block_bytes(c) ((c)->sb.block_size << 9) +#define bucket_bytes(ca) ((ca)->sb.bucket_size << 9) +#define block_bytes(ca) ((ca)->sb.block_size << 9) + +static inline unsigned int meta_bucket_pages(struct cache_sb *sb) +{ + unsigned int n, max_pages; + + max_pages = min_t(unsigned int, + __rounddown_pow_of_two(USHRT_MAX) / PAGE_SECTORS, + MAX_ORDER_NR_PAGES); + + n = sb->bucket_size / PAGE_SECTORS; + if (n > max_pages) + n = max_pages; -#define prios_per_bucket(c) \ - ((bucket_bytes(c) - sizeof(struct prio_set)) / \ + return n; +} + +static inline unsigned int meta_bucket_bytes(struct cache_sb *sb) +{ + return meta_bucket_pages(sb) << PAGE_SHIFT; +} + +#define prios_per_bucket(ca) \ + ((meta_bucket_bytes(&(ca)->sb) - sizeof(struct prio_set)) / \ sizeof(struct bucket_disk)) -#define prio_buckets(c) \ - DIV_ROUND_UP((size_t) (c)->sb.nbuckets, prios_per_bucket(c)) + +#define prio_buckets(ca) \ + DIV_ROUND_UP((size_t) (ca)->sb.nbuckets, prios_per_bucket(ca)) static inline size_t sector_to_bucket(struct cache_set *c, sector_t s) { @@ -775,14 +805,7 @@ static inline sector_t bucket_to_sector(struct cache_set *c, size_t b) static inline sector_t bucket_remainder(struct cache_set *c, sector_t s) { - return s & (c->sb.bucket_size - 1); -} - -static inline struct cache *PTR_CACHE(struct cache_set *c, - const struct bkey *k, - unsigned int ptr) -{ - return c->cache[PTR_DEV(k, ptr)]; + return s & (c->cache->sb.bucket_size - 1); } static inline size_t PTR_BUCKET_NR(struct cache_set *c, @@ -796,7 +819,7 @@ static inline struct bucket *PTR_BUCKET(struct cache_set *c, const struct bkey *k, unsigned int ptr) { - return PTR_CACHE(c, k, ptr)->buckets + PTR_BUCKET_NR(c, k, ptr); + return c->cache->buckets + PTR_BUCKET_NR(c, k, ptr); } static inline uint8_t gen_after(uint8_t a, uint8_t b) @@ -815,7 +838,7 @@ static inline uint8_t ptr_stale(struct cache_set *c, const struct bkey *k, static inline bool ptr_available(struct cache_set *c, const struct bkey *k, unsigned int i) { - return (PTR_DEV(k, i) < MAX_CACHES_PER_SET) && PTR_CACHE(c, k, i); + return (PTR_DEV(k, i) < MAX_CACHES_PER_SET) && c->cache; } /* Btree key macros */ @@ -863,9 +886,6 @@ do { \ /* Looping macros */ -#define for_each_cache(ca, cs, iter) \ - for (iter = 0; ca = cs->cache[iter], iter < (cs)->sb.nr_in_set; iter++) - #define for_each_bucket(b, ca) \ for (b = (ca)->buckets + (ca)->sb.first_bucket; \ b < (ca)->buckets + (ca)->sb.nbuckets; b++) @@ -907,11 +927,9 @@ static inline uint8_t bucket_gc_gen(struct bucket *b) static inline void wake_up_allocators(struct cache_set *c) { - struct cache *ca; - unsigned int i; + struct cache *ca = c->cache; - for_each_cache(ca, c, i) - wake_up_process(ca->alloc_thread); + wake_up_process(ca->alloc_thread); } static inline void closure_bio_submit(struct cache_set *c, @@ -924,7 +942,7 @@ static inline void closure_bio_submit(struct cache_set *c, bio_endio(bio); return; } - generic_make_request(bio); + submit_bio_noacct(bio); } /* @@ -968,9 +986,9 @@ void bch_bucket_free(struct cache_set *c, struct bkey *k); long bch_bucket_alloc(struct cache *ca, unsigned int reserve, bool wait); int __bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve, - struct bkey *k, int n, bool wait); + struct bkey *k, bool wait); int bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve, - struct bkey *k, int n, bool wait); + struct bkey *k, bool wait); bool bch_alloc_sectors(struct cache_set *c, struct bkey *k, unsigned int sectors, unsigned int write_point, unsigned int write_prio, bool wait); @@ -979,19 +997,20 @@ bool bch_cached_dev_error(struct cached_dev *dc); __printf(2, 3) bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...); -void bch_prio_write(struct cache *ca); +int bch_prio_write(struct cache *ca, bool wait); void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent); extern struct workqueue_struct *bcache_wq; extern struct workqueue_struct *bch_journal_wq; +extern struct workqueue_struct *bch_flush_wq; extern struct mutex bch_register_lock; extern struct list_head bch_cache_sets; -extern struct kobj_type bch_cached_dev_ktype; -extern struct kobj_type bch_flash_dev_ktype; -extern struct kobj_type bch_cache_set_ktype; -extern struct kobj_type bch_cache_set_internal_ktype; -extern struct kobj_type bch_cache_ktype; +extern const struct kobj_type bch_cached_dev_ktype; +extern const struct kobj_type bch_flash_dev_ktype; +extern const struct kobj_type bch_cache_set_ktype; +extern const struct kobj_type bch_cache_set_internal_ktype; +extern const struct kobj_type bch_cache_ktype; void bch_cached_dev_release(struct kobject *kobj); void bch_flash_dev_release(struct kobject *kobj); @@ -1006,7 +1025,7 @@ int bch_flash_dev_create(struct cache_set *c, uint64_t size); int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, uint8_t *set_uuid); void bch_cached_dev_detach(struct cached_dev *dc); -void bch_cached_dev_run(struct cached_dev *dc); +int bch_cached_dev_run(struct cached_dev *dc); void bcache_device_stop(struct bcache_device *d); void bch_cache_set_unregister(struct cache_set *c); @@ -1025,5 +1044,7 @@ void bch_debug_exit(void); void bch_debug_init(void); void bch_request_exit(void); int bch_request_init(void); +void bch_btree_exit(void); +int bch_btree_init(void); #endif /* _BCACHE_H */ |
