diff options
Diffstat (limited to 'drivers/md/bcache/journal.c')
| -rw-r--r-- | drivers/md/bcache/journal.c | 382 |
1 files changed, 149 insertions, 233 deletions
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 90aac4e2333f..144693b7c46a 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -53,14 +53,12 @@ static int journal_read_bucket(struct cache *ca, struct list_head *list, reread: left = ca->sb.bucket_size - offset; len = min_t(unsigned int, left, PAGE_SECTORS << JSET_BITS); - bio_reset(bio); + bio_reset(bio, ca->bdev, REQ_OP_READ); bio->bi_iter.bi_sector = bucket + offset; - bio_set_dev(bio, ca->bdev); bio->bi_iter.bi_size = len << 9; bio->bi_end_io = journal_read_endio; bio->bi_private = &cl; - bio_set_op_attrs(bio, REQ_OP_READ, 0); bch_bio_map(bio, data); closure_bio_submit(ca->set, bio, &cl); @@ -98,7 +96,7 @@ reread: left = ca->sb.bucket_size - offset; return ret; } - blocks = set_blocks(j, block_bytes(ca->set)); + blocks = set_blocks(j, block_bytes(ca)); /* * Nodes in 'list' are in linear increasing order of @@ -111,7 +109,7 @@ reread: left = ca->sb.bucket_size - offset; * Check from the oldest jset for last_seq. If * i->j.seq < j->last_seq, it means the oldest jset * in list is expired and useless, remove it from - * this list. Otherwise, j is a condidate jset for + * this list. Otherwise, j is a candidate jset for * further following checks. */ while (!list_empty(list)) { @@ -151,7 +149,8 @@ add: bytes, GFP_KERNEL); if (!i) return -ENOMEM; - memcpy(&i->j, j, bytes); + unsafe_memcpy(&i->j, j, bytes, + /* "bytes" was calculated by set_bytes() above */); /* Add to the location after 'where' points to */ list_add(&i->list, where); ret = 1; @@ -179,115 +178,108 @@ int bch_journal_read(struct cache_set *c, struct list_head *list) ret; \ }) - struct cache *ca; - unsigned int iter; + struct cache *ca = c->cache; int ret = 0; + struct journal_device *ja = &ca->journal; + DECLARE_BITMAP(bitmap, SB_JOURNAL_BUCKETS); + unsigned int i, l, r, m; + uint64_t seq; - for_each_cache(ca, c, iter) { - struct journal_device *ja = &ca->journal; - DECLARE_BITMAP(bitmap, SB_JOURNAL_BUCKETS); - unsigned int i, l, r, m; - uint64_t seq; - - bitmap_zero(bitmap, SB_JOURNAL_BUCKETS); - pr_debug("%u journal buckets\n", ca->sb.njournal_buckets); + bitmap_zero(bitmap, SB_JOURNAL_BUCKETS); + pr_debug("%u journal buckets\n", ca->sb.njournal_buckets); + /* + * Read journal buckets ordered by golden ratio hash to quickly + * find a sequence of buckets with valid journal entries + */ + for (i = 0; i < ca->sb.njournal_buckets; i++) { /* - * Read journal buckets ordered by golden ratio hash to quickly - * find a sequence of buckets with valid journal entries + * We must try the index l with ZERO first for + * correctness due to the scenario that the journal + * bucket is circular buffer which might have wrapped */ - for (i = 0; i < ca->sb.njournal_buckets; i++) { - /* - * We must try the index l with ZERO first for - * correctness due to the scenario that the journal - * bucket is circular buffer which might have wrapped - */ - l = (i * 2654435769U) % ca->sb.njournal_buckets; + l = (i * 2654435769U) % ca->sb.njournal_buckets; - if (test_bit(l, bitmap)) - break; + if (test_bit(l, bitmap)) + break; - if (read_bucket(l)) - goto bsearch; - } + if (read_bucket(l)) + goto bsearch; + } - /* - * If that fails, check all the buckets we haven't checked - * already - */ - pr_debug("falling back to linear search\n"); + /* + * If that fails, check all the buckets we haven't checked + * already + */ + pr_debug("falling back to linear search\n"); - for (l = find_first_zero_bit(bitmap, ca->sb.njournal_buckets); - l < ca->sb.njournal_buckets; - l = find_next_zero_bit(bitmap, ca->sb.njournal_buckets, - l + 1)) - if (read_bucket(l)) - goto bsearch; + for_each_clear_bit(l, bitmap, ca->sb.njournal_buckets) + if (read_bucket(l)) + goto bsearch; - /* no journal entries on this device? */ - if (l == ca->sb.njournal_buckets) - continue; + /* no journal entries on this device? */ + if (l == ca->sb.njournal_buckets) + goto out; bsearch: - BUG_ON(list_empty(list)); + BUG_ON(list_empty(list)); - /* Binary search */ - m = l; - r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1); - pr_debug("starting binary search, l %u r %u\n", l, r); + /* Binary search */ + m = l; + r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1); + pr_debug("starting binary search, l %u r %u\n", l, r); - while (l + 1 < r) { - seq = list_entry(list->prev, struct journal_replay, - list)->j.seq; + while (l + 1 < r) { + seq = list_entry(list->prev, struct journal_replay, + list)->j.seq; - m = (l + r) >> 1; - read_bucket(m); + m = (l + r) >> 1; + read_bucket(m); - if (seq != list_entry(list->prev, struct journal_replay, - list)->j.seq) - l = m; - else - r = m; - } + if (seq != list_entry(list->prev, struct journal_replay, + list)->j.seq) + l = m; + else + r = m; + } - /* - * Read buckets in reverse order until we stop finding more - * journal entries - */ - pr_debug("finishing up: m %u njournal_buckets %u\n", - m, ca->sb.njournal_buckets); - l = m; + /* + * Read buckets in reverse order until we stop finding more + * journal entries + */ + pr_debug("finishing up: m %u njournal_buckets %u\n", + m, ca->sb.njournal_buckets); + l = m; - while (1) { - if (!l--) - l = ca->sb.njournal_buckets - 1; + while (1) { + if (!l--) + l = ca->sb.njournal_buckets - 1; - if (l == m) - break; + if (l == m) + break; - if (test_bit(l, bitmap)) - continue; + if (test_bit(l, bitmap)) + continue; - if (!read_bucket(l)) - break; - } + if (!read_bucket(l)) + break; + } - seq = 0; + seq = 0; - for (i = 0; i < ca->sb.njournal_buckets; i++) - if (ja->seq[i] > seq) { - seq = ja->seq[i]; - /* - * When journal_reclaim() goes to allocate for - * the first time, it'll use the bucket after - * ja->cur_idx - */ - ja->cur_idx = i; - ja->last_idx = ja->discard_idx = (i + 1) % - ca->sb.njournal_buckets; + for (i = 0; i < ca->sb.njournal_buckets; i++) + if (ja->seq[i] > seq) { + seq = ja->seq[i]; + /* + * When journal_reclaim() goes to allocate for + * the first time, it'll use the bucket after + * ja->cur_idx + */ + ja->cur_idx = i; + ja->last_idx = (i + 1) % ca->sb.njournal_buckets; - } - } + } +out: if (!list_empty(list)) c->journal.seq = list_entry(list->prev, struct journal_replay, @@ -343,18 +335,6 @@ void bch_journal_mark(struct cache_set *c, struct list_head *list) } } -static bool is_discard_enabled(struct cache_set *s) -{ - struct cache *ca; - unsigned int i; - - for_each_cache(ca, s, i) - if (ca->discard) - return true; - - return false; -} - int bch_journal_replay(struct cache_set *s, struct list_head *list) { int ret = 0, keys = 0, entries = 0; @@ -369,15 +349,10 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list) BUG_ON(i->pin && atomic_read(i->pin) != 1); if (n != i->j.seq) { - if (n == start && is_discard_enabled(s)) - pr_info("journal entries %llu-%llu may be discarded! (replaying %llu-%llu)\n", - n, i->j.seq - 1, start, end); - else { - pr_err("journal entries %llu-%llu missing! (replaying %llu-%llu)\n", - n, i->j.seq - 1, start, end); - ret = -EIO; - goto err; - } + pr_err("journal entries %llu-%llu missing! (replaying %llu-%llu)\n", + n, i->j.seq - 1, start, end); + ret = -EIO; + goto err; } for (k = i->j.start; @@ -415,6 +390,11 @@ err: return ret; } +void bch_journal_space_reserve(struct journal *j) +{ + j->do_reserve = true; +} + /* Journalling */ static void btree_flush_write(struct cache_set *c) @@ -506,7 +486,7 @@ static void btree_flush_write(struct cache_set *c) * - If there are matched nodes recorded in btree_nodes[], * they are clean now (this is why and how the oldest * journal entry can be reclaimed). These selected nodes - * will be ignored and skipped in the folowing for-loop. + * will be ignored and skipped in the following for-loop. */ if (((btree_current_write(b)->journal - fifo_front_p) & mask) != 0) { @@ -572,73 +552,31 @@ out: #define last_seq(j) ((j)->seq - fifo_used(&(j)->pin) + 1) -static void journal_discard_endio(struct bio *bio) -{ - struct journal_device *ja = - container_of(bio, struct journal_device, discard_bio); - struct cache *ca = container_of(ja, struct cache, journal); - - atomic_set(&ja->discard_in_flight, DISCARD_DONE); - - closure_wake_up(&ca->set->journal.wait); - closure_put(&ca->set->cl); -} - -static void journal_discard_work(struct work_struct *work) -{ - struct journal_device *ja = - container_of(work, struct journal_device, discard_work); - - submit_bio(&ja->discard_bio); -} - -static void do_journal_discard(struct cache *ca) +static unsigned int free_journal_buckets(struct cache_set *c) { - struct journal_device *ja = &ca->journal; - struct bio *bio = &ja->discard_bio; - - if (!ca->discard) { - ja->discard_idx = ja->last_idx; - return; - } - - switch (atomic_read(&ja->discard_in_flight)) { - case DISCARD_IN_FLIGHT: - return; - - case DISCARD_DONE: - ja->discard_idx = (ja->discard_idx + 1) % - ca->sb.njournal_buckets; - - atomic_set(&ja->discard_in_flight, DISCARD_READY); - /* fallthrough */ - - case DISCARD_READY: - if (ja->discard_idx == ja->last_idx) - return; + struct journal *j = &c->journal; + struct cache *ca = c->cache; + struct journal_device *ja = &c->cache->journal; + unsigned int n; - atomic_set(&ja->discard_in_flight, DISCARD_IN_FLIGHT); + /* In case njournal_buckets is not power of 2 */ + if (ja->cur_idx >= ja->last_idx) + n = ca->sb.njournal_buckets + ja->last_idx - ja->cur_idx; + else + n = ja->last_idx - ja->cur_idx; - bio_init(bio, bio->bi_inline_vecs, 1); - bio_set_op_attrs(bio, REQ_OP_DISCARD, 0); - bio->bi_iter.bi_sector = bucket_to_sector(ca->set, - ca->sb.d[ja->discard_idx]); - bio_set_dev(bio, ca->bdev); - bio->bi_iter.bi_size = bucket_bytes(ca); - bio->bi_end_io = journal_discard_endio; + if (n > (1 + j->do_reserve)) + return n - (1 + j->do_reserve); - closure_get(&ca->set->cl); - INIT_WORK(&ja->discard_work, journal_discard_work); - queue_work(bch_journal_wq, &ja->discard_work); - } + return 0; } static void journal_reclaim(struct cache_set *c) { struct bkey *k = &c->journal.key; - struct cache *ca; + struct cache *ca = c->cache; uint64_t last_seq; - unsigned int iter, n = 0; + struct journal_device *ja = &ca->journal; atomic_t p __maybe_unused; atomic_long_inc(&c->reclaim); @@ -650,46 +588,27 @@ static void journal_reclaim(struct cache_set *c) /* Update last_idx */ - for_each_cache(ca, c, iter) { - struct journal_device *ja = &ca->journal; - - while (ja->last_idx != ja->cur_idx && - ja->seq[ja->last_idx] < last_seq) - ja->last_idx = (ja->last_idx + 1) % - ca->sb.njournal_buckets; - } - - for_each_cache(ca, c, iter) - do_journal_discard(ca); + while (ja->last_idx != ja->cur_idx && + ja->seq[ja->last_idx] < last_seq) + ja->last_idx = (ja->last_idx + 1) % + ca->sb.njournal_buckets; if (c->journal.blocks_free) goto out; - /* - * Allocate: - * XXX: Sort by free journal space - */ - - for_each_cache(ca, c, iter) { - struct journal_device *ja = &ca->journal; - unsigned int next = (ja->cur_idx + 1) % ca->sb.njournal_buckets; + if (!free_journal_buckets(c)) + goto out; - /* No space available on this device */ - if (next == ja->discard_idx) - continue; + ja->cur_idx = (ja->cur_idx + 1) % ca->sb.njournal_buckets; + k->ptr[0] = MAKE_PTR(0, + bucket_to_sector(c, ca->sb.d[ja->cur_idx]), + ca->sb.nr_this_dev); + atomic_long_inc(&c->reclaimed_journal_buckets); - ja->cur_idx = next; - k->ptr[n++] = MAKE_PTR(0, - bucket_to_sector(c, ca->sb.d[ja->cur_idx]), - ca->sb.nr_this_dev); - atomic_long_inc(&c->reclaimed_journal_buckets); - } + bkey_init(k); + SET_KEY_PTRS(k, 1); + c->journal.blocks_free = ca->sb.bucket_size >> c->block_bits; - if (n) { - bkey_init(k); - SET_KEY_PTRS(k, n); - c->journal.blocks_free = c->sb.bucket_size >> c->block_bits; - } out: if (!journal_full(&c->journal)) __closure_wake_up(&c->journal.wait); @@ -727,11 +646,11 @@ static void journal_write_endio(struct bio *bio) closure_put(&w->c->journal.io); } -static void journal_write(struct closure *cl); +static CLOSURE_CALLBACK(journal_write); -static void journal_write_done(struct closure *cl) +static CLOSURE_CALLBACK(journal_write_done) { - struct journal *j = container_of(cl, struct journal, io); + closure_type(j, struct journal, io); struct journal_write *w = (j->cur == j->w) ? &j->w[1] : &j->w[0]; @@ -740,24 +659,24 @@ static void journal_write_done(struct closure *cl) continue_at_nobarrier(cl, journal_write, bch_journal_wq); } -static void journal_write_unlock(struct closure *cl) +static CLOSURE_CALLBACK(journal_write_unlock) __releases(&c->journal.lock) { - struct cache_set *c = container_of(cl, struct cache_set, journal.io); + closure_type(c, struct cache_set, journal.io); c->journal.io_in_flight = 0; spin_unlock(&c->journal.lock); } -static void journal_write_unlocked(struct closure *cl) +static CLOSURE_CALLBACK(journal_write_unlocked) __releases(c->journal.lock) { - struct cache_set *c = container_of(cl, struct cache_set, journal.io); - struct cache *ca; + closure_type(c, struct cache_set, journal.io); + struct cache *ca = c->cache; struct journal_write *w = c->journal.cur; struct bkey *k = &c->journal.key; - unsigned int i, sectors = set_blocks(w->data, block_bytes(c)) * - c->sb.block_size; + unsigned int i, sectors = set_blocks(w->data, block_bytes(ca)) * + ca->sb.block_size; struct bio *bio; struct bio_list list; @@ -776,36 +695,32 @@ static void journal_write_unlocked(struct closure *cl) return; } - c->journal.blocks_free -= set_blocks(w->data, block_bytes(c)); + c->journal.blocks_free -= set_blocks(w->data, block_bytes(ca)); w->data->btree_level = c->root->level; bkey_copy(&w->data->btree_root, &c->root->key); bkey_copy(&w->data->uuid_bucket, &c->uuid_bucket); - for_each_cache(ca, c, i) - w->data->prio_bucket[ca->sb.nr_this_dev] = ca->prio_buckets[0]; - - w->data->magic = jset_magic(&c->sb); + w->data->prio_bucket[ca->sb.nr_this_dev] = ca->prio_buckets[0]; + w->data->magic = jset_magic(&ca->sb); w->data->version = BCACHE_JSET_VERSION; w->data->last_seq = last_seq(&c->journal); w->data->csum = csum_set(w->data); for (i = 0; i < KEY_PTRS(k); i++) { - ca = PTR_CACHE(c, k, i); + ca = c->cache; bio = &ca->journal.bio; atomic_long_add(sectors, &ca->meta_sectors_written); - bio_reset(bio); + bio_reset(bio, ca->bdev, REQ_OP_WRITE | + REQ_SYNC | REQ_META | REQ_PREFLUSH | REQ_FUA); bio->bi_iter.bi_sector = PTR_OFFSET(k, i); - bio_set_dev(bio, ca->bdev); bio->bi_iter.bi_size = sectors << 9; bio->bi_end_io = journal_write_endio; bio->bi_private = w; - bio_set_op_attrs(bio, REQ_OP_WRITE, - REQ_SYNC|REQ_META|REQ_PREFLUSH|REQ_FUA); bch_bio_map(bio, w->data); trace_bcache_journal_write(bio, w->data->keys); @@ -831,12 +746,12 @@ static void journal_write_unlocked(struct closure *cl) continue_at(cl, journal_write_done, NULL); } -static void journal_write(struct closure *cl) +static CLOSURE_CALLBACK(journal_write) { - struct cache_set *c = container_of(cl, struct cache_set, journal.io); + closure_type(c, struct cache_set, journal.io); spin_lock(&c->journal.lock); - journal_write_unlocked(cl); + journal_write_unlocked(&cl->work); } static void journal_try_write(struct cache_set *c) @@ -862,6 +777,7 @@ static struct journal_write *journal_wait_for_write(struct cache_set *c, size_t sectors; struct closure cl; bool wait = false; + struct cache *ca = c->cache; closure_init_stack(&cl); @@ -871,10 +787,10 @@ static struct journal_write *journal_wait_for_write(struct cache_set *c, struct journal_write *w = c->journal.cur; sectors = __set_blocks(w->data, w->data->keys + nkeys, - block_bytes(c)) * c->sb.block_size; + block_bytes(ca)) * ca->sb.block_size; if (sectors <= min_t(size_t, - c->journal.blocks_free * c->sb.block_size, + c->journal.blocks_free * ca->sb.block_size, PAGE_SECTORS << JSET_BITS)) return w; @@ -939,7 +855,7 @@ atomic_t *bch_journal(struct cache_set *c, if (unlikely(test_bit(CACHE_SET_IO_DISABLE, &c->flags))) return NULL; - if (!CACHE_SYNC(&c->sb)) + if (!CACHE_SYNC(&c->cache->sb)) return NULL; w = journal_wait_for_write(c, bch_keylist_nkeys(keys)); @@ -955,8 +871,8 @@ atomic_t *bch_journal(struct cache_set *c, journal_try_write(c); } else if (!w->dirty) { w->dirty = true; - schedule_delayed_work(&c->journal.work, - msecs_to_jiffies(c->journal_delay_ms)); + queue_delayed_work(bch_flush_wq, &c->journal.work, + msecs_to_jiffies(c->journal_delay_ms)); spin_unlock(&c->journal.lock); } else { spin_unlock(&c->journal.lock); @@ -999,8 +915,8 @@ int bch_journal_alloc(struct cache_set *c) j->w[1].c = c; if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) || - !(j->w[0].data = (void *) __get_free_pages(GFP_KERNEL, JSET_BITS)) || - !(j->w[1].data = (void *) __get_free_pages(GFP_KERNEL, JSET_BITS))) + !(j->w[0].data = (void *) __get_free_pages(GFP_KERNEL|__GFP_COMP, JSET_BITS)) || + !(j->w[1].data = (void *) __get_free_pages(GFP_KERNEL|__GFP_COMP, JSET_BITS))) return -ENOMEM; return 0; |
