summaryrefslogtreecommitdiff
path: root/drivers/md/raid5-cache.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/raid5-cache.c')
-rw-r--r--drivers/md/raid5-cache.c286
1 files changed, 134 insertions, 152 deletions
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 0b5dcaabbc15..e29e69335c69 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -120,12 +120,12 @@ struct r5l_log {
struct bio_set bs;
mempool_t meta_pool;
- struct md_thread *reclaim_thread;
+ struct md_thread __rcu *reclaim_thread;
unsigned long reclaim_target; /* number of space that need to be
* reclaimed. if it's 0, reclaim spaces
* used by io_units which are in
* IO_UNIT_STRIPE_END state (eg, reclaim
- * dones't wait for specific io_unit
+ * doesn't wait for specific io_unit
* switching to IO_UNIT_STRIPE_END
* state) */
wait_queue_head_t iounit_wait;
@@ -313,10 +313,6 @@ void r5c_handle_cached_data_endio(struct r5conf *conf,
if (sh->dev[i].written) {
set_bit(R5_UPTODATE, &sh->dev[i].flags);
r5c_return_dev_pending_writes(conf, &sh->dev[i]);
- md_bitmap_endwrite(conf->mddev->bitmap, sh->sector,
- RAID5_STRIPE_SECTORS(conf),
- !test_bit(STRIPE_DEGRADED, &sh->state),
- 0);
}
}
}
@@ -327,8 +323,9 @@ void r5l_wake_reclaim(struct r5l_log *log, sector_t space);
void r5c_check_stripe_cache_usage(struct r5conf *conf)
{
int total_cached;
+ struct r5l_log *log = READ_ONCE(conf->log);
- if (!r5c_is_writeback(conf->log))
+ if (!r5c_is_writeback(log))
return;
total_cached = atomic_read(&conf->r5c_cached_partial_stripes) +
@@ -344,7 +341,7 @@ void r5c_check_stripe_cache_usage(struct r5conf *conf)
*/
if (total_cached > conf->min_nr_stripes * 1 / 2 ||
atomic_read(&conf->empty_inactive_list_nr) > 0)
- r5l_wake_reclaim(conf->log, 0);
+ r5l_wake_reclaim(log, 0);
}
/*
@@ -353,7 +350,9 @@ void r5c_check_stripe_cache_usage(struct r5conf *conf)
*/
void r5c_check_cached_full_stripe(struct r5conf *conf)
{
- if (!r5c_is_writeback(conf->log))
+ struct r5l_log *log = READ_ONCE(conf->log);
+
+ if (!r5c_is_writeback(log))
return;
/*
@@ -363,7 +362,7 @@ void r5c_check_cached_full_stripe(struct r5conf *conf)
if (atomic_read(&conf->r5c_cached_full_stripes) >=
min(R5C_FULL_STRIPE_FLUSH_BATCH(conf),
conf->chunk_sectors >> RAID5_STRIPE_SHIFT(conf)))
- r5l_wake_reclaim(conf->log, 0);
+ r5l_wake_reclaim(log, 0);
}
/*
@@ -396,7 +395,7 @@ void r5c_check_cached_full_stripe(struct r5conf *conf)
*/
static sector_t r5c_log_required_to_flush_cache(struct r5conf *conf)
{
- struct r5l_log *log = conf->log;
+ struct r5l_log *log = READ_ONCE(conf->log);
if (!r5c_is_writeback(log))
return 0;
@@ -449,7 +448,7 @@ static inline void r5c_update_log_state(struct r5l_log *log)
void r5c_make_stripe_write_out(struct stripe_head *sh)
{
struct r5conf *conf = sh->raid_conf;
- struct r5l_log *log = conf->log;
+ struct r5l_log *log = READ_ONCE(conf->log);
BUG_ON(!r5c_is_writeback(log));
@@ -491,7 +490,7 @@ static void r5c_handle_parity_cached(struct stripe_head *sh)
*/
static void r5c_finish_cache_stripe(struct stripe_head *sh)
{
- struct r5l_log *log = sh->raid_conf->log;
+ struct r5l_log *log = READ_ONCE(sh->raid_conf->log);
if (log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH) {
BUG_ON(test_bit(STRIPE_R5C_CACHING, &sh->state));
@@ -683,7 +682,6 @@ static void r5c_disable_writeback_async(struct work_struct *work)
disable_writeback_work);
struct mddev *mddev = log->rdev->mddev;
struct r5conf *conf = mddev->private;
- int locked = 0;
if (log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH)
return;
@@ -692,14 +690,14 @@ static void r5c_disable_writeback_async(struct work_struct *work)
/* wait superblock change before suspend */
wait_event(mddev->sb_wait,
- conf->log == NULL ||
- (!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) &&
- (locked = mddev_trylock(mddev))));
- if (locked) {
- mddev_suspend(mddev);
+ !READ_ONCE(conf->log) ||
+ !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
+
+ log = READ_ONCE(conf->log);
+ if (log) {
+ mddev_suspend(mddev, false);
log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH;
mddev_resume(mddev);
- mddev_unlock(mddev);
}
}
@@ -716,7 +714,7 @@ static void r5l_submit_current_io(struct r5l_log *log)
block = page_address(io->meta_page);
block->meta_size = cpu_to_le32(io->meta_offset);
- crc = crc32c_le(log->uuid_checksum, block, PAGE_SIZE);
+ crc = crc32c(log->uuid_checksum, block, PAGE_SIZE);
block->checksum = cpu_to_le32(crc);
log->current_io = NULL;
@@ -735,10 +733,9 @@ static void r5l_submit_current_io(struct r5l_log *log)
static struct bio *r5l_bio_alloc(struct r5l_log *log)
{
- struct bio *bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_VECS, &log->bs);
+ struct bio *bio = bio_alloc_bioset(log->rdev->bdev, BIO_MAX_VECS,
+ REQ_OP_WRITE, GFP_NOIO, &log->bs);
- bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
- bio_set_dev(bio, log->rdev->bdev);
bio->bi_iter.bi_sector = log->rdev->data_offset + log->log_start;
return bio;
@@ -793,7 +790,7 @@ static struct r5l_io_unit *r5l_new_meta(struct r5l_log *log)
io->current_bio = r5l_bio_alloc(log);
io->current_bio->bi_end_io = r5l_log_endio;
io->current_bio->bi_private = io;
- bio_add_page(io->current_bio, io->meta_page, PAGE_SIZE, 0);
+ __bio_add_page(io->current_bio, io->meta_page, PAGE_SIZE, 0);
r5_reserve_log_entry(log, io);
@@ -1022,10 +1019,10 @@ int r5l_write_stripe(struct r5l_log *log, struct stripe_head *sh)
/* checksum is already calculated in last run */
if (test_bit(STRIPE_LOG_TRAPPED, &sh->state))
continue;
- addr = kmap_atomic(sh->dev[i].page);
- sh->dev[i].log_checksum = crc32c_le(log->uuid_checksum,
- addr, PAGE_SIZE);
- kunmap_atomic(addr);
+ addr = kmap_local_page(sh->dev[i].page);
+ sh->dev[i].log_checksum = crc32c(log->uuid_checksum,
+ addr, PAGE_SIZE);
+ kunmap_local(addr);
}
parity_pages = 1 + !!(sh->qd_idx >= 0);
data_pages = write_disks - parity_pages;
@@ -1152,7 +1149,7 @@ static void r5l_run_no_space_stripes(struct r5l_log *log)
static sector_t r5c_calculate_new_cp(struct r5conf *conf)
{
struct stripe_head *sh;
- struct r5l_log *log = conf->log;
+ struct r5l_log *log = READ_ONCE(conf->log);
sector_t new_cp;
unsigned long flags;
@@ -1160,12 +1157,12 @@ static sector_t r5c_calculate_new_cp(struct r5conf *conf)
return log->next_checkpoint;
spin_lock_irqsave(&log->stripe_in_journal_lock, flags);
- if (list_empty(&conf->log->stripe_in_journal_list)) {
+ if (list_empty(&log->stripe_in_journal_list)) {
/* all stripes flushed */
spin_unlock_irqrestore(&log->stripe_in_journal_lock, flags);
return log->next_checkpoint;
}
- sh = list_first_entry(&conf->log->stripe_in_journal_list,
+ sh = list_first_entry(&log->stripe_in_journal_list,
struct stripe_head, r5c);
new_cp = sh->log_start;
spin_unlock_irqrestore(&log->stripe_in_journal_lock, flags);
@@ -1261,6 +1258,7 @@ static void r5l_log_flush_endio(struct bio *bio)
if (bio->bi_status)
md_error(log->rdev->mddev, log->rdev);
+ bio_uninit(bio);
spin_lock_irqsave(&log->io_list_lock, flags);
list_for_each_entry(io, &log->flushing_ios, log_sibling)
@@ -1302,10 +1300,9 @@ void r5l_flush_stripe_to_raid(struct r5l_log *log)
if (!do_flush)
return;
- bio_reset(&log->flush_bio);
- bio_set_dev(&log->flush_bio, log->rdev->bdev);
+ bio_init(&log->flush_bio, log->rdev->bdev, NULL, 0,
+ REQ_OP_WRITE | REQ_PREFLUSH);
log->flush_bio.bi_end_io = r5l_log_flush_endio;
- log->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
submit_bio(&log->flush_bio);
}
@@ -1318,7 +1315,7 @@ static void r5l_write_super_and_discard_space(struct r5l_log *log,
r5l_write_super(log, end);
- if (!blk_queue_discard(bdev_get_queue(bdev)))
+ if (!bdev_max_discard_sectors(bdev))
return;
mddev = log->rdev->mddev;
@@ -1327,9 +1324,9 @@ static void r5l_write_super_and_discard_space(struct r5l_log *log,
* superblock is updated to new log tail. Updating superblock (either
* directly call md_update_sb() or depend on md thread) must hold
* reconfig mutex. On the other hand, raid5_quiesce is called with
- * reconfig_mutex hold. The first step of raid5_quiesce() is waitting
- * for all IO finish, hence waitting for reclaim thread, while reclaim
- * thread is calling this function and waitting for reconfig mutex. So
+ * reconfig_mutex hold. The first step of raid5_quiesce() is waiting
+ * for all IO finish, hence waiting for reclaim thread, while reclaim
+ * thread is calling this function and waiting for reconfig mutex. So
* there is a deadlock. We workaround this issue with a trylock.
* FIXME: we could miss discard if we can't take reconfig mutex
*/
@@ -1344,14 +1341,14 @@ static void r5l_write_super_and_discard_space(struct r5l_log *log,
if (log->last_checkpoint < end) {
blkdev_issue_discard(bdev,
log->last_checkpoint + log->rdev->data_offset,
- end - log->last_checkpoint, GFP_NOIO, 0);
+ end - log->last_checkpoint, GFP_NOIO);
} else {
blkdev_issue_discard(bdev,
log->last_checkpoint + log->rdev->data_offset,
log->device_size - log->last_checkpoint,
- GFP_NOIO, 0);
+ GFP_NOIO);
blkdev_issue_discard(bdev, log->rdev->data_offset, end,
- GFP_NOIO, 0);
+ GFP_NOIO);
}
}
@@ -1400,7 +1397,7 @@ void r5c_flush_cache(struct r5conf *conf, int num)
struct stripe_head *sh, *next;
lockdep_assert_held(&conf->device_lock);
- if (!conf->log)
+ if (!READ_ONCE(conf->log))
return;
count = 0;
@@ -1421,7 +1418,7 @@ void r5c_flush_cache(struct r5conf *conf, int num)
static void r5c_do_reclaim(struct r5conf *conf)
{
- struct r5l_log *log = conf->log;
+ struct r5l_log *log = READ_ONCE(conf->log);
struct stripe_head *sh;
int count = 0;
unsigned long flags;
@@ -1550,7 +1547,7 @@ static void r5l_reclaim_thread(struct md_thread *thread)
{
struct mddev *mddev = thread->mddev;
struct r5conf *conf = mddev->private;
- struct r5l_log *log = conf->log;
+ struct r5l_log *log = READ_ONCE(conf->log);
if (!log)
return;
@@ -1565,43 +1562,40 @@ void r5l_wake_reclaim(struct r5l_log *log, sector_t space)
if (!log)
return;
+
+ target = READ_ONCE(log->reclaim_target);
do {
- target = log->reclaim_target;
if (new < target)
return;
- } while (cmpxchg(&log->reclaim_target, target, new) != target);
+ } while (!try_cmpxchg(&log->reclaim_target, &target, new));
md_wakeup_thread(log->reclaim_thread);
}
void r5l_quiesce(struct r5l_log *log, int quiesce)
{
- struct mddev *mddev;
+ struct mddev *mddev = log->rdev->mddev;
+ struct md_thread *thread = rcu_dereference_protected(
+ log->reclaim_thread, lockdep_is_held(&mddev->reconfig_mutex));
if (quiesce) {
/* make sure r5l_write_super_and_discard_space exits */
- mddev = log->rdev->mddev;
wake_up(&mddev->sb_wait);
- kthread_park(log->reclaim_thread->tsk);
+ kthread_park(thread->tsk);
r5l_wake_reclaim(log, MaxSector);
r5l_do_reclaim(log);
} else
- kthread_unpark(log->reclaim_thread->tsk);
+ kthread_unpark(thread->tsk);
}
bool r5l_log_disk_error(struct r5conf *conf)
{
- struct r5l_log *log;
- bool ret;
- /* don't allow write if journal disk is missing */
- rcu_read_lock();
- log = rcu_dereference(conf->log);
+ struct r5l_log *log = READ_ONCE(conf->log);
+ /* don't allow write if journal disk is missing */
if (!log)
- ret = test_bit(MD_HAS_JOURNAL, &conf->mddev->flags);
+ return test_bit(MD_HAS_JOURNAL, &conf->mddev->flags);
else
- ret = test_bit(Faulty, &log->rdev->flags);
- rcu_read_unlock();
- return ret;
+ return test_bit(Faulty, &log->rdev->flags);
}
#define R5L_RECOVERY_PAGE_POOL_SIZE 256
@@ -1623,10 +1617,10 @@ struct r5l_recovery_ctx {
* just copy data from the pool.
*/
struct page *ra_pool[R5L_RECOVERY_PAGE_POOL_SIZE];
+ struct bio_vec ra_bvec[R5L_RECOVERY_PAGE_POOL_SIZE];
sector_t pool_offset; /* offset of first page in the pool */
int total_pages; /* total allocated pages */
int valid_pages; /* pages with valid data */
- struct bio *ra_bio; /* bio to do the read ahead */
};
static int r5l_recovery_allocate_ra_pool(struct r5l_log *log,
@@ -1634,10 +1628,6 @@ static int r5l_recovery_allocate_ra_pool(struct r5l_log *log,
{
struct page *page;
- ctx->ra_bio = bio_alloc_bioset(GFP_KERNEL, BIO_MAX_VECS, &log->bs);
- if (!ctx->ra_bio)
- return -ENOMEM;
-
ctx->valid_pages = 0;
ctx->total_pages = 0;
while (ctx->total_pages < R5L_RECOVERY_PAGE_POOL_SIZE) {
@@ -1649,10 +1639,8 @@ static int r5l_recovery_allocate_ra_pool(struct r5l_log *log,
ctx->total_pages += 1;
}
- if (ctx->total_pages == 0) {
- bio_put(ctx->ra_bio);
+ if (ctx->total_pages == 0)
return -ENOMEM;
- }
ctx->pool_offset = 0;
return 0;
@@ -1665,7 +1653,6 @@ static void r5l_recovery_free_ra_pool(struct r5l_log *log,
for (i = 0; i < ctx->total_pages; ++i)
put_page(ctx->ra_pool[i]);
- bio_put(ctx->ra_bio);
}
/*
@@ -1678,17 +1665,19 @@ static int r5l_recovery_fetch_ra_pool(struct r5l_log *log,
struct r5l_recovery_ctx *ctx,
sector_t offset)
{
- bio_reset(ctx->ra_bio);
- bio_set_dev(ctx->ra_bio, log->rdev->bdev);
- bio_set_op_attrs(ctx->ra_bio, REQ_OP_READ, 0);
- ctx->ra_bio->bi_iter.bi_sector = log->rdev->data_offset + offset;
+ struct bio bio;
+ int ret;
+
+ bio_init(&bio, log->rdev->bdev, ctx->ra_bvec,
+ R5L_RECOVERY_PAGE_POOL_SIZE, REQ_OP_READ);
+ bio.bi_iter.bi_sector = log->rdev->data_offset + offset;
ctx->valid_pages = 0;
ctx->pool_offset = offset;
while (ctx->valid_pages < ctx->total_pages) {
- bio_add_page(ctx->ra_bio,
- ctx->ra_pool[ctx->valid_pages], PAGE_SIZE, 0);
+ __bio_add_page(&bio, ctx->ra_pool[ctx->valid_pages], PAGE_SIZE,
+ 0);
ctx->valid_pages += 1;
offset = r5l_ring_add(log, offset, BLOCK_SECTORS);
@@ -1697,7 +1686,9 @@ static int r5l_recovery_fetch_ra_pool(struct r5l_log *log,
break;
}
- return submit_bio_wait(ctx->ra_bio);
+ ret = submit_bio_wait(&bio);
+ bio_uninit(&bio);
+ return ret;
}
/*
@@ -1750,7 +1741,7 @@ static int r5l_recovery_read_meta_block(struct r5l_log *log,
le64_to_cpu(mb->position) != ctx->pos)
return -EINVAL;
- crc = crc32c_le(log->uuid_checksum, mb, PAGE_SIZE);
+ crc = crc32c(log->uuid_checksum, mb, PAGE_SIZE);
if (stored_crc != crc)
return -EINVAL;
@@ -1789,9 +1780,8 @@ static int r5l_log_write_empty_meta_block(struct r5l_log *log, sector_t pos,
return -ENOMEM;
r5l_recovery_create_empty_meta_block(log, page, pos, seq);
mb = page_address(page);
- mb->checksum = cpu_to_le32(crc32c_le(log->uuid_checksum,
- mb, PAGE_SIZE));
- if (!sync_page_io(log->rdev, pos, PAGE_SIZE, page, REQ_OP_WRITE,
+ mb->checksum = cpu_to_le32(crc32c(log->uuid_checksum, mb, PAGE_SIZE));
+ if (!sync_page_io(log->rdev, pos, PAGE_SIZE, page, REQ_OP_WRITE |
REQ_SYNC | REQ_FUA, false)) {
__free_page(page);
return -EIO;
@@ -1895,28 +1885,22 @@ r5l_recovery_replay_one_stripe(struct r5conf *conf,
continue;
/* in case device is broken */
- rcu_read_lock();
- rdev = rcu_dereference(conf->disks[disk_index].rdev);
+ rdev = conf->disks[disk_index].rdev;
if (rdev) {
atomic_inc(&rdev->nr_pending);
- rcu_read_unlock();
sync_page_io(rdev, sh->sector, PAGE_SIZE,
- sh->dev[disk_index].page, REQ_OP_WRITE, 0,
+ sh->dev[disk_index].page, REQ_OP_WRITE,
false);
rdev_dec_pending(rdev, rdev->mddev);
- rcu_read_lock();
}
- rrdev = rcu_dereference(conf->disks[disk_index].replacement);
+ rrdev = conf->disks[disk_index].replacement;
if (rrdev) {
atomic_inc(&rrdev->nr_pending);
- rcu_read_unlock();
sync_page_io(rrdev, sh->sector, PAGE_SIZE,
- sh->dev[disk_index].page, REQ_OP_WRITE, 0,
+ sh->dev[disk_index].page, REQ_OP_WRITE,
false);
rdev_dec_pending(rrdev, rrdev->mddev);
- rcu_read_lock();
}
- rcu_read_unlock();
}
ctx->data_parity_stripes++;
out:
@@ -1931,7 +1915,8 @@ r5c_recovery_alloc_stripe(
{
struct stripe_head *sh;
- sh = raid5_get_active_stripe(conf, stripe_sect, 0, noblock, 0);
+ sh = raid5_get_active_stripe(conf, NULL, stripe_sect,
+ noblock ? R5_GAS_NOBLOCK : 0);
if (!sh)
return NULL; /* no more stripe available */
@@ -1989,9 +1974,9 @@ r5l_recovery_verify_data_checksum(struct r5l_log *log,
u32 checksum;
r5l_recovery_read_page(log, ctx, page, log_offset);
- addr = kmap_atomic(page);
- checksum = crc32c_le(log->uuid_checksum, addr, PAGE_SIZE);
- kunmap_atomic(addr);
+ addr = kmap_local_page(page);
+ checksum = crc32c(log->uuid_checksum, addr, PAGE_SIZE);
+ kunmap_local(addr);
return (le32_to_cpu(log_checksum) == checksum) ? 0 : -EINVAL;
}
@@ -2391,13 +2376,13 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
payload->size = cpu_to_le32(BLOCK_SECTORS);
payload->location = cpu_to_le64(
raid5_compute_blocknr(sh, i, 0));
- addr = kmap_atomic(dev->page);
+ addr = kmap_local_page(dev->page);
payload->checksum[0] = cpu_to_le32(
- crc32c_le(log->uuid_checksum, addr,
- PAGE_SIZE));
- kunmap_atomic(addr);
+ crc32c(log->uuid_checksum, addr,
+ PAGE_SIZE));
+ kunmap_local(addr);
sync_page_io(log->rdev, write_pos, PAGE_SIZE,
- dev->page, REQ_OP_WRITE, 0, false);
+ dev->page, REQ_OP_WRITE, false);
write_pos = r5l_ring_add(log, write_pos,
BLOCK_SECTORS);
offset += sizeof(__le32) +
@@ -2406,10 +2391,10 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
}
}
mb->meta_size = cpu_to_le32(offset);
- mb->checksum = cpu_to_le32(crc32c_le(log->uuid_checksum,
- mb, PAGE_SIZE));
+ mb->checksum = cpu_to_le32(crc32c(log->uuid_checksum,
+ mb, PAGE_SIZE));
sync_page_io(log->rdev, ctx->pos, PAGE_SIZE, page,
- REQ_OP_WRITE, REQ_SYNC | REQ_FUA, false);
+ REQ_OP_WRITE | REQ_SYNC | REQ_FUA, false);
sh->log_start = ctx->pos;
list_add_tail(&sh->r5c, &log->stripe_in_journal_list);
atomic_inc(&log->stripe_in_journal_count);
@@ -2537,12 +2522,13 @@ static ssize_t r5c_journal_mode_show(struct mddev *mddev, char *page)
struct r5conf *conf;
int ret;
- spin_lock(&mddev->lock);
+ ret = mddev_lock(mddev);
+ if (ret)
+ return ret;
+
conf = mddev->private;
- if (!conf || !conf->log) {
- spin_unlock(&mddev->lock);
- return 0;
- }
+ if (!conf || !conf->log)
+ goto out_unlock;
switch (conf->log->r5c_journal_mode) {
case R5C_JOURNAL_MODE_WRITE_THROUGH:
@@ -2560,7 +2546,9 @@ static ssize_t r5c_journal_mode_show(struct mddev *mddev, char *page)
default:
ret = 0;
}
- spin_unlock(&mddev->lock);
+
+out_unlock:
+ mddev_unlock(mddev);
return ret;
}
@@ -2586,9 +2574,7 @@ int r5c_journal_mode_set(struct mddev *mddev, int mode)
mode == R5C_JOURNAL_MODE_WRITE_BACK)
return -EINVAL;
- mddev_suspend(mddev);
conf->log->r5c_journal_mode = mode;
- mddev_resume(mddev);
pr_debug("md/raid:%s: setting r5c cache mode to %d: %s\n",
mdname(mddev), mode, r5c_journal_mode_str[mode]);
@@ -2613,11 +2599,11 @@ static ssize_t r5c_journal_mode_store(struct mddev *mddev,
if (strlen(r5c_journal_mode_str[mode]) == len &&
!strncmp(page, r5c_journal_mode_str[mode], len))
break;
- ret = mddev_lock(mddev);
+ ret = mddev_suspend_and_lock(mddev);
if (ret)
return ret;
ret = r5c_journal_mode_set(mddev, mode);
- mddev_unlock(mddev);
+ mddev_unlock_and_resume(mddev);
return ret ?: length;
}
@@ -2638,11 +2624,11 @@ int r5c_try_caching_write(struct r5conf *conf,
struct stripe_head_state *s,
int disks)
{
- struct r5l_log *log = conf->log;
+ struct r5l_log *log = READ_ONCE(conf->log);
int i;
struct r5dev *dev;
int to_cache = 0;
- void **pslot;
+ void __rcu **pslot;
sector_t tree_index;
int ret;
uintptr_t refcount;
@@ -2805,11 +2791,10 @@ void r5c_finish_stripe_write_out(struct r5conf *conf,
struct stripe_head *sh,
struct stripe_head_state *s)
{
- struct r5l_log *log = conf->log;
+ struct r5l_log *log = READ_ONCE(conf->log);
int i;
- int do_wakeup = 0;
sector_t tree_index;
- void **pslot;
+ void __rcu **pslot;
uintptr_t refcount;
if (!log || !test_bit(R5_InJournal, &sh->dev[sh->pd_idx].flags))
@@ -2824,7 +2809,7 @@ void r5c_finish_stripe_write_out(struct r5conf *conf,
for (i = sh->disks; i--; ) {
clear_bit(R5_InJournal, &sh->dev[i].flags);
if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
- do_wakeup = 1;
+ wake_up_bit(&sh->dev[i].flags, R5_Overlap);
}
/*
@@ -2837,9 +2822,6 @@ void r5c_finish_stripe_write_out(struct r5conf *conf,
if (atomic_dec_and_test(&conf->pending_full_writes))
md_wakeup_thread(conf->mddev->thread);
- if (do_wakeup)
- wake_up(&conf->wait_for_overlap);
-
spin_lock_irq(&log->stripe_in_journal_lock);
list_del_init(&sh->r5c);
spin_unlock_irq(&log->stripe_in_journal_lock);
@@ -2901,10 +2883,10 @@ int r5c_cache_data(struct r5l_log *log, struct stripe_head *sh)
if (!test_bit(R5_Wantwrite, &sh->dev[i].flags))
continue;
- addr = kmap_atomic(sh->dev[i].page);
- sh->dev[i].log_checksum = crc32c_le(log->uuid_checksum,
- addr, PAGE_SIZE);
- kunmap_atomic(addr);
+ addr = kmap_local_page(sh->dev[i].page);
+ sh->dev[i].log_checksum = crc32c(log->uuid_checksum,
+ addr, PAGE_SIZE);
+ kunmap_local(addr);
pages++;
}
WARN_ON(pages == 0);
@@ -2944,14 +2926,13 @@ int r5c_cache_data(struct r5l_log *log, struct stripe_head *sh)
/* check whether this big stripe is in write back cache. */
bool r5c_big_stripe_cached(struct r5conf *conf, sector_t sect)
{
- struct r5l_log *log = conf->log;
+ struct r5l_log *log = READ_ONCE(conf->log);
sector_t tree_index;
void *slot;
if (!log)
return false;
- WARN_ON_ONCE(!rcu_read_lock_held());
tree_index = r5c_tree_index(conf, sect);
slot = radix_tree_lookup(&log->big_stripe_tree, tree_index);
return slot != NULL;
@@ -2974,7 +2955,7 @@ static int r5l_load_log(struct r5l_log *log)
if (!page)
return -ENOMEM;
- if (!sync_page_io(rdev, cp, PAGE_SIZE, page, REQ_OP_READ, 0, false)) {
+ if (!sync_page_io(rdev, cp, PAGE_SIZE, page, REQ_OP_READ, false)) {
ret = -EIO;
goto ioerr;
}
@@ -2987,7 +2968,7 @@ static int r5l_load_log(struct r5l_log *log)
}
stored_crc = le32_to_cpu(mb->checksum);
mb->checksum = 0;
- expected_crc = crc32c_le(log->uuid_checksum, mb, PAGE_SIZE);
+ expected_crc = crc32c(log->uuid_checksum, mb, PAGE_SIZE);
if (stored_crc != expected_crc) {
create_super = true;
goto create;
@@ -2998,7 +2979,7 @@ static int r5l_load_log(struct r5l_log *log)
}
create:
if (create_super) {
- log->last_cp_seq = prandom_u32();
+ log->last_cp_seq = get_random_u32();
cp = 0;
r5l_log_write_empty_meta_block(log, cp, log->last_cp_seq);
/*
@@ -3052,26 +3033,25 @@ int r5l_start(struct r5l_log *log)
void r5c_update_on_rdev_error(struct mddev *mddev, struct md_rdev *rdev)
{
struct r5conf *conf = mddev->private;
- struct r5l_log *log = conf->log;
+ struct r5l_log *log = READ_ONCE(conf->log);
if (!log)
return;
if ((raid5_calc_degraded(conf) > 0 ||
test_bit(Journal, &rdev->flags)) &&
- conf->log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_BACK)
+ log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_BACK)
schedule_work(&log->disable_writeback_work);
}
int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
{
- struct request_queue *q = bdev_get_queue(rdev->bdev);
struct r5l_log *log;
- char b[BDEVNAME_SIZE];
+ struct md_thread *thread;
int ret;
- pr_debug("md/raid:%s: using device %s as journal\n",
- mdname(conf->mddev), bdevname(rdev->bdev, b));
+ pr_debug("md/raid:%s: using device %pg as journal\n",
+ mdname(conf->mddev), rdev->bdev);
if (PAGE_SIZE != 4096)
return -EINVAL;
@@ -3095,11 +3075,9 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
if (!log)
return -ENOMEM;
log->rdev = rdev;
-
- log->need_cache_flush = test_bit(QUEUE_FLAG_WC, &q->queue_flags) != 0;
-
- log->uuid_checksum = crc32c_le(~0, rdev->mddev->uuid,
- sizeof(rdev->mddev->uuid));
+ log->need_cache_flush = bdev_write_cache(rdev->bdev);
+ log->uuid_checksum = crc32c(~0, rdev->mddev->uuid,
+ sizeof(rdev->mddev->uuid));
mutex_init(&log->io_mutex);
@@ -3108,7 +3086,6 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
INIT_LIST_HEAD(&log->io_end_ios);
INIT_LIST_HEAD(&log->flushing_ios);
INIT_LIST_HEAD(&log->finished_ios);
- bio_init(&log->flush_bio, NULL, 0);
log->io_kc = KMEM_CACHE(r5l_io_unit, 0);
if (!log->io_kc)
@@ -3127,13 +3104,15 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
goto out_mempool;
spin_lock_init(&log->tree_lock);
- INIT_RADIX_TREE(&log->big_stripe_tree, GFP_NOWAIT | __GFP_NOWARN);
+ INIT_RADIX_TREE(&log->big_stripe_tree, GFP_NOWAIT);
- log->reclaim_thread = md_register_thread(r5l_reclaim_thread,
- log->rdev->mddev, "reclaim");
- if (!log->reclaim_thread)
+ thread = md_register_thread(r5l_reclaim_thread, log->rdev->mddev,
+ "reclaim");
+ if (!thread)
goto reclaim_thread;
- log->reclaim_thread->timeout = R5C_RECLAIM_WAKEUP_INTERVAL;
+
+ thread->timeout = R5C_RECLAIM_WAKEUP_INTERVAL;
+ rcu_assign_pointer(log->reclaim_thread, thread);
init_waitqueue_head(&log->iounit_wait);
@@ -3150,7 +3129,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
spin_lock_init(&log->stripe_in_journal_lock);
atomic_set(&log->stripe_in_journal_count, 0);
- rcu_assign_pointer(conf->log, log);
+ WRITE_ONCE(conf->log, log);
set_bit(MD_HAS_JOURNAL, &conf->mddev->flags);
return 0;
@@ -3172,13 +3151,16 @@ void r5l_exit_log(struct r5conf *conf)
{
struct r5l_log *log = conf->log;
- conf->log = NULL;
- synchronize_rcu();
+ md_unregister_thread(conf->mddev, &log->reclaim_thread);
- /* Ensure disable_writeback_work wakes up and exits */
+ /*
+ * 'reconfig_mutex' is held by caller, set 'confg->log' to NULL to
+ * ensure disable_writeback_work wakes up and exits.
+ */
+ WRITE_ONCE(conf->log, NULL);
wake_up(&conf->mddev->sb_wait);
flush_work(&log->disable_writeback_work);
- md_unregister_thread(&log->reclaim_thread);
+
mempool_exit(&log->meta_pool);
bioset_exit(&log->bs);
mempool_exit(&log->io_pool);