summaryrefslogtreecommitdiff
path: root/drivers/md/bcache/bcache.h
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/bcache/bcache.h')
-rw-r--r--drivers/md/bcache/bcache.h135
1 files changed, 78 insertions, 57 deletions
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index fdf75352e16a..8ccacba85547 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -107,7 +107,7 @@
*
* BTREE NODES:
*
- * Our unit of allocation is a bucket, and we we can't arbitrarily allocate and
+ * Our unit of allocation is a bucket, and we can't arbitrarily allocate and
* free smaller than a bucket - so, that's how big our btree nodes are.
*
* (If buckets are really big we'll only use part of the bucket for a btree node
@@ -176,10 +176,10 @@
* - updates to non leaf nodes just happen synchronously (see btree_split()).
*/
-#define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__
+#define pr_fmt(fmt) "bcache: %s() " fmt, __func__
-#include <linux/bcache.h>
#include <linux/bio.h>
+#include <linux/closure.h>
#include <linux/kobject.h>
#include <linux/list.h>
#include <linux/mutex.h>
@@ -190,9 +190,9 @@
#include <linux/workqueue.h>
#include <linux/kthread.h>
+#include "bcache_ondisk.h"
#include "bset.h"
#include "util.h"
-#include "closure.h"
struct bucket {
atomic_t pin;
@@ -200,6 +200,7 @@ struct bucket {
uint8_t gen;
uint8_t last_gc; /* Most out of date gen in the btree */
uint16_t gc_mark; /* Bitfield used by GC. See below for field */
+ uint16_t reclaimable_in_gc:1;
};
/*
@@ -264,7 +265,8 @@ struct bcache_device {
#define BCACHE_DEV_UNLINK_DONE 2
#define BCACHE_DEV_WB_RUNNING 3
#define BCACHE_DEV_RATE_DW_RUNNING 4
- unsigned int nr_stripes;
+ int nr_stripes;
+#define BCH_MIN_STRIPE_SZ ((4 << 20) >> SECTOR_SHIFT)
unsigned int stripe_size;
atomic_t *stripe_sectors_dirty;
unsigned long *full_dirty_stripes;
@@ -275,7 +277,7 @@ struct bcache_device {
int (*cache_miss)(struct btree *b, struct search *s,
struct bio *bio, unsigned int sectors);
- int (*ioctl)(struct bcache_device *d, fmode_t mode,
+ int (*ioctl)(struct bcache_device *d, blk_mode_t mode,
unsigned int cmd, unsigned long arg);
};
@@ -299,8 +301,10 @@ struct cached_dev {
struct list_head list;
struct bcache_device disk;
struct block_device *bdev;
+ struct file *bdev_file;
struct cache_sb sb;
+ struct cache_sb_disk *sb_disk;
struct bio sb_bio;
struct bio_vec sb_bv[1];
struct closure sb_write;
@@ -329,6 +333,9 @@ struct cached_dev {
*/
atomic_t has_dirty;
+#define BCH_CACHE_READA_ALL 0
+#define BCH_CACHE_READA_META_ONLY 1
+ unsigned int cache_readahead_policy;
struct bch_ratelimit writeback_rate;
struct delayed_work writeback_rate_update;
@@ -360,7 +367,6 @@ struct cached_dev {
/* The rest of this all shows up in sysfs */
unsigned int sequential_cutoff;
- unsigned int readahead;
unsigned int io_disable:1;
unsigned int verify:1;
@@ -369,6 +375,7 @@ struct cached_dev {
unsigned int partial_stripes_expensive:1;
unsigned int writeback_metadata:1;
unsigned int writeback_running:1;
+ unsigned int writeback_consider_fragment:1;
unsigned char writeback_percent;
unsigned int writeback_delay;
@@ -381,6 +388,9 @@ struct cached_dev {
unsigned int writeback_rate_update_seconds;
unsigned int writeback_rate_i_term_inverse;
unsigned int writeback_rate_p_term_inverse;
+ unsigned int writeback_rate_fp_term_low;
+ unsigned int writeback_rate_fp_term_mid;
+ unsigned int writeback_rate_fp_term_high;
unsigned int writeback_rate_minimum;
enum stop_on_failure stop_when_cache_set_failed;
@@ -389,7 +399,12 @@ struct cached_dev {
unsigned int error_limit;
unsigned int offline_seconds;
- char backing_dev_name[BDEVNAME_SIZE];
+ /*
+ * Retry to update writeback_rate if contention happens for
+ * down_read(dc->writeback_lock) in update_writeback_rate()
+ */
+#define BCH_WBRATE_UPDATE_MAX_SKIPS 15
+ unsigned int rate_update_retry;
};
enum alloc_reserve {
@@ -403,11 +418,13 @@ enum alloc_reserve {
struct cache {
struct cache_set *set;
struct cache_sb sb;
+ struct cache_sb_disk *sb_disk;
struct bio sb_bio;
struct bio_vec sb_bv[1];
struct kobject kobj;
struct block_device *bdev;
+ struct file *bdev_file;
struct task_struct *alloc_thread;
@@ -430,8 +447,7 @@ struct cache {
* free_inc: Incoming buckets - these are buckets that currently have
* cached data in them, and we can't reuse them until after we write
* their new gen to disk. After prio_write() finishes writing the new
- * gens/prios, they'll be moved to the free list (and possibly discarded
- * in the process)
+ * gens/prios, they'll be moved to the free list.
*/
DECLARE_FIFO(long, free)[RESERVE_NR];
DECLARE_FIFO(long, free_inc);
@@ -450,8 +466,6 @@ struct cache {
*/
unsigned int invalidate_needs_gc;
- bool discard; /* Get rid of? */
-
struct journal_device journal;
/* The rest of this all shows up in sysfs */
@@ -462,8 +476,6 @@ struct cache {
atomic_long_t meta_sectors_written;
atomic_long_t btree_sectors_written;
atomic_long_t sectors_written;
-
- char cache_dev_name[BDEVNAME_SIZE];
};
struct gc_stat {
@@ -512,11 +524,7 @@ struct cache_set {
atomic_t idle_counter;
atomic_t at_max_writeback_rate;
- struct cache_sb sb;
-
- struct cache *cache[MAX_CACHES_PER_SET];
- struct cache *cache_by_alloc[MAX_CACHES_PER_SET];
- int caches_loaded;
+ struct cache *cache;
struct bcache_device **devices;
unsigned int devices_max_used;
@@ -534,7 +542,7 @@ struct cache_set {
struct bio_set bio_split;
/* For the btree cache */
- struct shrinker shrink;
+ struct shrinker *shrink;
/* For the btree cache and anything allocation related */
struct mutex bucket_lock;
@@ -582,6 +590,7 @@ struct cache_set {
*/
wait_queue_head_t btree_cache_wait;
struct task_struct *btree_cache_alloc_lock;
+ spinlock_t btree_cannibalize_lock;
/*
* When we free a btree node, we increment the gen of the bucket the
@@ -595,6 +604,7 @@ struct cache_set {
*/
atomic_t prio_blocked;
wait_queue_head_t bucket_wait;
+ atomic_t bucket_wait_cnt;
/*
* For any bio we don't skip we subtract the number of sectors from
@@ -664,6 +674,7 @@ struct cache_set {
struct mutex verify_lock;
#endif
+ uint8_t set_uuid[16];
unsigned int nr_uuids;
struct uuid_entry *uuids;
BKEY_PADDED(uuid_bucket);
@@ -705,8 +716,8 @@ struct cache_set {
atomic_long_t writeback_keys_failed;
atomic_long_t reclaim;
+ atomic_long_t reclaimed_journal_buckets;
atomic_long_t flush_write;
- atomic_long_t retry_flush_write;
enum {
ON_ERROR_UNREGISTER,
@@ -723,11 +734,10 @@ struct cache_set {
unsigned int gc_always_rewrite:1;
unsigned int shrinker_disabled:1;
unsigned int copy_gc_enabled:1;
+ unsigned int idle_max_writeback_rate_enabled:1;
#define BUCKET_HASH_BITS 12
struct hlist_head bucket_hash[1 << BUCKET_HASH_BITS];
-
- DECLARE_HEAP(struct btree *, flush_btree);
};
struct bbio {
@@ -753,15 +763,35 @@ struct bbio {
#define btree_default_blocks(c) \
((unsigned int) ((PAGE_SECTORS * (c)->btree_pages) >> (c)->block_bits))
-#define bucket_pages(c) ((c)->sb.bucket_size / PAGE_SECTORS)
-#define bucket_bytes(c) ((c)->sb.bucket_size << 9)
-#define block_bytes(c) ((c)->sb.block_size << 9)
+#define bucket_bytes(ca) ((ca)->sb.bucket_size << 9)
+#define block_bytes(ca) ((ca)->sb.block_size << 9)
+
+static inline unsigned int meta_bucket_pages(struct cache_sb *sb)
+{
+ unsigned int n, max_pages;
+
+ max_pages = min_t(unsigned int,
+ __rounddown_pow_of_two(USHRT_MAX) / PAGE_SECTORS,
+ MAX_ORDER_NR_PAGES);
+
+ n = sb->bucket_size / PAGE_SECTORS;
+ if (n > max_pages)
+ n = max_pages;
-#define prios_per_bucket(c) \
- ((bucket_bytes(c) - sizeof(struct prio_set)) / \
+ return n;
+}
+
+static inline unsigned int meta_bucket_bytes(struct cache_sb *sb)
+{
+ return meta_bucket_pages(sb) << PAGE_SHIFT;
+}
+
+#define prios_per_bucket(ca) \
+ ((meta_bucket_bytes(&(ca)->sb) - sizeof(struct prio_set)) / \
sizeof(struct bucket_disk))
-#define prio_buckets(c) \
- DIV_ROUND_UP((size_t) (c)->sb.nbuckets, prios_per_bucket(c))
+
+#define prio_buckets(ca) \
+ DIV_ROUND_UP((size_t) (ca)->sb.nbuckets, prios_per_bucket(ca))
static inline size_t sector_to_bucket(struct cache_set *c, sector_t s)
{
@@ -775,14 +805,7 @@ static inline sector_t bucket_to_sector(struct cache_set *c, size_t b)
static inline sector_t bucket_remainder(struct cache_set *c, sector_t s)
{
- return s & (c->sb.bucket_size - 1);
-}
-
-static inline struct cache *PTR_CACHE(struct cache_set *c,
- const struct bkey *k,
- unsigned int ptr)
-{
- return c->cache[PTR_DEV(k, ptr)];
+ return s & (c->cache->sb.bucket_size - 1);
}
static inline size_t PTR_BUCKET_NR(struct cache_set *c,
@@ -796,7 +819,7 @@ static inline struct bucket *PTR_BUCKET(struct cache_set *c,
const struct bkey *k,
unsigned int ptr)
{
- return PTR_CACHE(c, k, ptr)->buckets + PTR_BUCKET_NR(c, k, ptr);
+ return c->cache->buckets + PTR_BUCKET_NR(c, k, ptr);
}
static inline uint8_t gen_after(uint8_t a, uint8_t b)
@@ -815,7 +838,7 @@ static inline uint8_t ptr_stale(struct cache_set *c, const struct bkey *k,
static inline bool ptr_available(struct cache_set *c, const struct bkey *k,
unsigned int i)
{
- return (PTR_DEV(k, i) < MAX_CACHES_PER_SET) && PTR_CACHE(c, k, i);
+ return (PTR_DEV(k, i) < MAX_CACHES_PER_SET) && c->cache;
}
/* Btree key macros */
@@ -863,9 +886,6 @@ do { \
/* Looping macros */
-#define for_each_cache(ca, cs, iter) \
- for (iter = 0; ca = cs->cache[iter], iter < (cs)->sb.nr_in_set; iter++)
-
#define for_each_bucket(b, ca) \
for (b = (ca)->buckets + (ca)->sb.first_bucket; \
b < (ca)->buckets + (ca)->sb.nbuckets; b++)
@@ -907,11 +927,9 @@ static inline uint8_t bucket_gc_gen(struct bucket *b)
static inline void wake_up_allocators(struct cache_set *c)
{
- struct cache *ca;
- unsigned int i;
+ struct cache *ca = c->cache;
- for_each_cache(ca, c, i)
- wake_up_process(ca->alloc_thread);
+ wake_up_process(ca->alloc_thread);
}
static inline void closure_bio_submit(struct cache_set *c,
@@ -924,7 +942,7 @@ static inline void closure_bio_submit(struct cache_set *c,
bio_endio(bio);
return;
}
- generic_make_request(bio);
+ submit_bio_noacct(bio);
}
/*
@@ -968,9 +986,9 @@ void bch_bucket_free(struct cache_set *c, struct bkey *k);
long bch_bucket_alloc(struct cache *ca, unsigned int reserve, bool wait);
int __bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve,
- struct bkey *k, int n, bool wait);
+ struct bkey *k, bool wait);
int bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve,
- struct bkey *k, int n, bool wait);
+ struct bkey *k, bool wait);
bool bch_alloc_sectors(struct cache_set *c, struct bkey *k,
unsigned int sectors, unsigned int write_point,
unsigned int write_prio, bool wait);
@@ -979,19 +997,20 @@ bool bch_cached_dev_error(struct cached_dev *dc);
__printf(2, 3)
bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...);
-void bch_prio_write(struct cache *ca);
+int bch_prio_write(struct cache *ca, bool wait);
void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent);
extern struct workqueue_struct *bcache_wq;
extern struct workqueue_struct *bch_journal_wq;
+extern struct workqueue_struct *bch_flush_wq;
extern struct mutex bch_register_lock;
extern struct list_head bch_cache_sets;
-extern struct kobj_type bch_cached_dev_ktype;
-extern struct kobj_type bch_flash_dev_ktype;
-extern struct kobj_type bch_cache_set_ktype;
-extern struct kobj_type bch_cache_set_internal_ktype;
-extern struct kobj_type bch_cache_ktype;
+extern const struct kobj_type bch_cached_dev_ktype;
+extern const struct kobj_type bch_flash_dev_ktype;
+extern const struct kobj_type bch_cache_set_ktype;
+extern const struct kobj_type bch_cache_set_internal_ktype;
+extern const struct kobj_type bch_cache_ktype;
void bch_cached_dev_release(struct kobject *kobj);
void bch_flash_dev_release(struct kobject *kobj);
@@ -1006,7 +1025,7 @@ int bch_flash_dev_create(struct cache_set *c, uint64_t size);
int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
uint8_t *set_uuid);
void bch_cached_dev_detach(struct cached_dev *dc);
-void bch_cached_dev_run(struct cached_dev *dc);
+int bch_cached_dev_run(struct cached_dev *dc);
void bcache_device_stop(struct bcache_device *d);
void bch_cache_set_unregister(struct cache_set *c);
@@ -1025,5 +1044,7 @@ void bch_debug_exit(void);
void bch_debug_init(void);
void bch_request_exit(void);
int bch_request_init(void);
+void bch_btree_exit(void);
+int bch_btree_init(void);
#endif /* _BCACHE_H */