diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-09-16 13:33:06 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-09-16 13:33:06 +0200 |
commit | 26bb0d3f38a764b743a3ad5c8b6e5b5044d7ceb4 (patch) | |
tree | a08d01893b603d2f611a617f6055b54a835c03f0 /drivers/md/md-bitmap.c | |
parent | 3a4d319a8fb5a9bbdf5b31ef32841eb286b1dcc2 (diff) | |
parent | d4d7c03f7ee1d7f16b7b6e885b1e00968f72b93c (diff) |
Merge tag 'for-6.12/block-20240913' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe:
- MD changes via Song:
- md-bitmap refactoring (Yu Kuai)
- raid5 performance optimization (Artur Paszkiewicz)
- Other small fixes (Yu Kuai, Chen Ni)
- Add a sysfs entry 'new_level' (Xiao Ni)
- Improve information reported in /proc/mdstat (Mateusz Kusiak)
- NVMe changes via Keith:
- Asynchronous namespace scanning (Stuart)
- TCP TLS updates (Hannes)
- RDMA queue controller validation (Niklas)
- Align field names to the spec (Anuj)
- Metadata support validation (Puranjay)
- A syntax cleanup (Shen)
- Fix a Kconfig linking error (Arnd)
- New queue-depth quirk (Keith)
- Add missing unplug trace event (Keith)
- blk-iocost fixes (Colin, Konstantin)
- t10-pi modular removal and fixes (Alexey)
- Fix for potential BLKSECDISCARD overflow (Alexey)
- bio splitting cleanups and fixes (Christoph)
- Deal with folios rather than rather than pages, speeding up how the
block layer handles bigger IOs (Kundan)
- Use spinlocks rather than bit spinlocks in zram (Sebastian, Mike)
- Reduce zoned device overhead in ublk (Ming)
- Add and use sendpages_ok() for drbd and nvme-tcp (Ofir)
- Fix regression in partition error pointer checking (Riyan)
- Add support for write zeroes and rotational status in nbd (Wouter)
- Add Yu Kuai as new BFQ maintainer. The scheduler has been
unmaintained for quite a while.
- Various sets of fixes for BFQ (Yu Kuai)
- Misc fixes and cleanups (Alvaro, Christophe, Li, Md Haris, Mikhail,
Yang)
* tag 'for-6.12/block-20240913' of git://git.kernel.dk/linux: (120 commits)
nvme-pci: qdepth 1 quirk
block: fix potential invalid pointer dereference in blk_add_partition
blk_iocost: make read-only static array vrate_adj_pct const
block: unpin user pages belonging to a folio at once
mm: release number of pages of a folio
block: introduce folio awareness and add a bigger size from folio
block: Added folio-ized version of bio_add_hw_page()
block, bfq: factor out a helper to split bfqq in bfq_init_rq()
block, bfq: remove local variable 'bfqq_already_existing' in bfq_init_rq()
block, bfq: remove local variable 'split' in bfq_init_rq()
block, bfq: remove bfq_log_bfqg()
block, bfq: merge bfq_release_process_ref() into bfq_put_cooperator()
block, bfq: fix procress reference leakage for bfqq in merge chain
block, bfq: fix uaf for accessing waker_bfqq after splitting
blk-throttle: support prioritized processing of metadata
blk-throttle: remove last_low_overflow_time
drbd: Add NULL check for net_conf to prevent dereference in state validation
nvme-tcp: fix link failure for TCP auth
blk-mq: add missing unplug trace event
mtip32xx: Remove redundant null pointer checks in mtip_hw_debugfs_init()
...
Diffstat (limited to 'drivers/md/md-bitmap.c')
-rw-r--r-- | drivers/md/md-bitmap.c | 568 |
1 files changed, 432 insertions, 136 deletions
diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index db5330d97348..29da10e6f703 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -32,11 +32,210 @@ #include "md.h" #include "md-bitmap.h" +#define BITMAP_MAJOR_LO 3 +/* version 4 insists the bitmap is in little-endian order + * with version 3, it is host-endian which is non-portable + * Version 5 is currently set only for clustered devices + */ +#define BITMAP_MAJOR_HI 4 +#define BITMAP_MAJOR_CLUSTERED 5 +#define BITMAP_MAJOR_HOSTENDIAN 3 + +/* + * in-memory bitmap: + * + * Use 16 bit block counters to track pending writes to each "chunk". + * The 2 high order bits are special-purpose, the first is a flag indicating + * whether a resync is needed. The second is a flag indicating whether a + * resync is active. + * This means that the counter is actually 14 bits: + * + * +--------+--------+------------------------------------------------+ + * | resync | resync | counter | + * | needed | active | | + * | (0-1) | (0-1) | (0-16383) | + * +--------+--------+------------------------------------------------+ + * + * The "resync needed" bit is set when: + * a '1' bit is read from storage at startup. + * a write request fails on some drives + * a resync is aborted on a chunk with 'resync active' set + * It is cleared (and resync-active set) when a resync starts across all drives + * of the chunk. + * + * + * The "resync active" bit is set when: + * a resync is started on all drives, and resync_needed is set. + * resync_needed will be cleared (as long as resync_active wasn't already set). + * It is cleared when a resync completes. + * + * The counter counts pending write requests, plus the on-disk bit. + * When the counter is '1' and the resync bits are clear, the on-disk + * bit can be cleared as well, thus setting the counter to 0. + * When we set a bit, or in the counter (to start a write), if the fields is + * 0, we first set the disk bit and set the counter to 1. + * + * If the counter is 0, the on-disk bit is clear and the stripe is clean + * Anything that dirties the stripe pushes the counter to 2 (at least) + * and sets the on-disk bit (lazily). + * If a periodic sweep find the counter at 2, it is decremented to 1. + * If the sweep find the counter at 1, the on-disk bit is cleared and the + * counter goes to zero. + * + * Also, we'll hijack the "map" pointer itself and use it as two 16 bit block + * counters as a fallback when "page" memory cannot be allocated: + * + * Normal case (page memory allocated): + * + * page pointer (32-bit) + * + * [ ] ------+ + * | + * +-------> [ ][ ]..[ ] (4096 byte page == 2048 counters) + * c1 c2 c2048 + * + * Hijacked case (page memory allocation failed): + * + * hijacked page pointer (32-bit) + * + * [ ][ ] (no page memory allocated) + * counter #1 (16-bit) counter #2 (16-bit) + * + */ + +#define PAGE_BITS (PAGE_SIZE << 3) +#define PAGE_BIT_SHIFT (PAGE_SHIFT + 3) + +#define NEEDED(x) (((bitmap_counter_t) x) & NEEDED_MASK) +#define RESYNC(x) (((bitmap_counter_t) x) & RESYNC_MASK) +#define COUNTER(x) (((bitmap_counter_t) x) & COUNTER_MAX) + +/* how many counters per page? */ +#define PAGE_COUNTER_RATIO (PAGE_BITS / COUNTER_BITS) +/* same, except a shift value for more efficient bitops */ +#define PAGE_COUNTER_SHIFT (PAGE_BIT_SHIFT - COUNTER_BIT_SHIFT) +/* same, except a mask value for more efficient bitops */ +#define PAGE_COUNTER_MASK (PAGE_COUNTER_RATIO - 1) + +#define BITMAP_BLOCK_SHIFT 9 + +/* + * bitmap structures: + */ + +/* the in-memory bitmap is represented by bitmap_pages */ +struct bitmap_page { + /* + * map points to the actual memory page + */ + char *map; + /* + * in emergencies (when map cannot be alloced), hijack the map + * pointer and use it as two counters itself + */ + unsigned int hijacked:1; + /* + * If any counter in this page is '1' or '2' - and so could be + * cleared then that page is marked as 'pending' + */ + unsigned int pending:1; + /* + * count of dirty bits on the page + */ + unsigned int count:30; +}; + +/* the main bitmap structure - one per mddev */ +struct bitmap { + + struct bitmap_counts { + spinlock_t lock; + struct bitmap_page *bp; + /* total number of pages in the bitmap */ + unsigned long pages; + /* number of pages not yet allocated */ + unsigned long missing_pages; + /* chunksize = 2^chunkshift (for bitops) */ + unsigned long chunkshift; + /* total number of data chunks for the array */ + unsigned long chunks; + } counts; + + struct mddev *mddev; /* the md device that the bitmap is for */ + + __u64 events_cleared; + int need_sync; + + struct bitmap_storage { + /* backing disk file */ + struct file *file; + /* cached copy of the bitmap file superblock */ + struct page *sb_page; + unsigned long sb_index; + /* list of cache pages for the file */ + struct page **filemap; + /* attributes associated filemap pages */ + unsigned long *filemap_attr; + /* number of pages in the file */ + unsigned long file_pages; + /* total bytes in the bitmap */ + unsigned long bytes; + } storage; + + unsigned long flags; + + int allclean; + + atomic_t behind_writes; + /* highest actual value at runtime */ + unsigned long behind_writes_used; + + /* + * the bitmap daemon - periodically wakes up and sweeps the bitmap + * file, cleaning up bits and flushing out pages to disk as necessary + */ + unsigned long daemon_lastrun; /* jiffies of last run */ + /* + * when we lasted called end_sync to update bitmap with resync + * progress. + */ + unsigned long last_end_sync; + + /* pending writes to the bitmap file */ + atomic_t pending_writes; + wait_queue_head_t write_wait; + wait_queue_head_t overflow_wait; + wait_queue_head_t behind_wait; + + struct kernfs_node *sysfs_can_clear; + /* slot offset for clustered env */ + int cluster_slot; +}; + +static int __bitmap_resize(struct bitmap *bitmap, sector_t blocks, + int chunksize, bool init); + static inline char *bmname(struct bitmap *bitmap) { return bitmap->mddev ? mdname(bitmap->mddev) : "mdX"; } +static bool __bitmap_enabled(struct bitmap *bitmap) +{ + return bitmap->storage.filemap && + !test_bit(BITMAP_STALE, &bitmap->flags); +} + +static bool bitmap_enabled(struct mddev *mddev) +{ + struct bitmap *bitmap = mddev->bitmap; + + if (!bitmap) + return false; + + return __bitmap_enabled(bitmap); +} + /* * check a page and, if necessary, allocate it (or hijack it if the alloc fails) * @@ -472,9 +671,10 @@ static void md_bitmap_wait_writes(struct bitmap *bitmap) /* update the event counter and sync the superblock to disk */ -void md_bitmap_update_sb(struct bitmap *bitmap) +static void bitmap_update_sb(void *data) { bitmap_super_t *sb; + struct bitmap *bitmap = data; if (!bitmap || !bitmap->mddev) /* no bitmap for this array */ return; @@ -510,10 +710,8 @@ void md_bitmap_update_sb(struct bitmap *bitmap) write_sb_page(bitmap, bitmap->storage.sb_index, bitmap->storage.sb_page, 1); } -EXPORT_SYMBOL(md_bitmap_update_sb); -/* print out the bitmap file superblock */ -void md_bitmap_print_sb(struct bitmap *bitmap) +static void bitmap_print_sb(struct bitmap *bitmap) { bitmap_super_t *sb; @@ -760,7 +958,7 @@ out_no_sb: bitmap->mddev->bitmap_info.space > sectors_reserved) bitmap->mddev->bitmap_info.space = sectors_reserved; } else { - md_bitmap_print_sb(bitmap); + bitmap_print_sb(bitmap); if (bitmap->cluster_slot < 0) md_cluster_stop(bitmap->mddev); } @@ -893,7 +1091,7 @@ static void md_bitmap_file_unmap(struct bitmap_storage *store) static void md_bitmap_file_kick(struct bitmap *bitmap) { if (!test_and_set_bit(BITMAP_STALE, &bitmap->flags)) { - md_bitmap_update_sb(bitmap); + bitmap_update_sb(bitmap); if (bitmap->storage.file) { pr_warn("%s: kicking failed bitmap file %pD4 from array!\n", @@ -1028,13 +1226,13 @@ static int md_bitmap_file_test_bit(struct bitmap *bitmap, sector_t block) /* this gets called when the md device is ready to unplug its underlying * (slave) device queues -- before we let any writes go down, we need to * sync the dirty pages of the bitmap file to disk */ -void md_bitmap_unplug(struct bitmap *bitmap) +static void __bitmap_unplug(struct bitmap *bitmap) { unsigned long i; int dirty, need_write; int writing = 0; - if (!md_bitmap_enabled(bitmap)) + if (!__bitmap_enabled(bitmap)) return; /* look at each page to see if there are any set bits that need to be @@ -1060,7 +1258,6 @@ void md_bitmap_unplug(struct bitmap *bitmap) if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) md_bitmap_file_kick(bitmap); } -EXPORT_SYMBOL(md_bitmap_unplug); struct bitmap_unplug_work { struct work_struct work; @@ -1073,11 +1270,11 @@ static void md_bitmap_unplug_fn(struct work_struct *work) struct bitmap_unplug_work *unplug_work = container_of(work, struct bitmap_unplug_work, work); - md_bitmap_unplug(unplug_work->bitmap); + __bitmap_unplug(unplug_work->bitmap); complete(unplug_work->done); } -void md_bitmap_unplug_async(struct bitmap *bitmap) +static void bitmap_unplug_async(struct bitmap *bitmap) { DECLARE_COMPLETION_ONSTACK(done); struct bitmap_unplug_work unplug_work; @@ -1089,7 +1286,19 @@ void md_bitmap_unplug_async(struct bitmap *bitmap) queue_work(md_bitmap_wq, &unplug_work.work); wait_for_completion(&done); } -EXPORT_SYMBOL(md_bitmap_unplug_async); + +static void bitmap_unplug(struct mddev *mddev, bool sync) +{ + struct bitmap *bitmap = mddev->bitmap; + + if (!bitmap) + return; + + if (sync) + __bitmap_unplug(bitmap); + else + bitmap_unplug_async(bitmap); +} static void md_bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed); @@ -1226,22 +1435,21 @@ static int md_bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) return ret; } -void md_bitmap_write_all(struct bitmap *bitmap) +/* just flag bitmap pages as needing to be written. */ +static void bitmap_write_all(struct mddev *mddev) { - /* We don't actually write all bitmap blocks here, - * just flag them as needing to be written - */ int i; + struct bitmap *bitmap = mddev->bitmap; if (!bitmap || !bitmap->storage.filemap) return; + + /* Only one copy, so nothing needed */ if (bitmap->storage.file) - /* Only one copy, so nothing needed */ return; for (i = 0; i < bitmap->storage.file_pages; i++) - set_page_attr(bitmap, i, - BITMAP_PAGE_NEEDWRITE); + set_page_attr(bitmap, i, BITMAP_PAGE_NEEDWRITE); bitmap->allclean = 0; } @@ -1290,7 +1498,7 @@ out: * bitmap daemon -- periodically wakes up to clean bits and flush pages * out to disk */ -void md_bitmap_daemon_work(struct mddev *mddev) +static void bitmap_daemon_work(struct mddev *mddev) { struct bitmap *bitmap; unsigned long j; @@ -1461,8 +1669,11 @@ __acquires(bitmap->lock) &(bitmap->bp[page].map[pageoff]); } -int md_bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, int behind) +static int bitmap_startwrite(struct mddev *mddev, sector_t offset, + unsigned long sectors, bool behind) { + struct bitmap *bitmap = mddev->bitmap; + if (!bitmap) return 0; @@ -1523,13 +1734,15 @@ int md_bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long s } return 0; } -EXPORT_SYMBOL(md_bitmap_startwrite); -void md_bitmap_endwrite(struct bitmap *bitmap, sector_t offset, - unsigned long sectors, int success, int behind) +static void bitmap_endwrite(struct mddev *mddev, sector_t offset, + unsigned long sectors, bool success, bool behind) { + struct bitmap *bitmap = mddev->bitmap; + if (!bitmap) return; + if (behind) { if (atomic_dec_and_test(&bitmap->behind_writes)) wake_up(&bitmap->behind_wait); @@ -1576,26 +1789,27 @@ void md_bitmap_endwrite(struct bitmap *bitmap, sector_t offset, sectors = 0; } } -EXPORT_SYMBOL(md_bitmap_endwrite); -static int __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, - int degraded) +static bool __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, + sector_t *blocks, bool degraded) { bitmap_counter_t *bmc; - int rv; + bool rv; + if (bitmap == NULL) {/* FIXME or bitmap set as 'failed' */ *blocks = 1024; - return 1; /* always resync if no bitmap */ + return true; /* always resync if no bitmap */ } spin_lock_irq(&bitmap->counts.lock); + + rv = false; bmc = md_bitmap_get_counter(&bitmap->counts, offset, blocks, 0); - rv = 0; if (bmc) { /* locked */ - if (RESYNC(*bmc)) - rv = 1; - else if (NEEDED(*bmc)) { - rv = 1; + if (RESYNC(*bmc)) { + rv = true; + } else if (NEEDED(*bmc)) { + rv = true; if (!degraded) { /* don't set/clear bits if degraded */ *bmc |= RESYNC_MASK; *bmc &= ~NEEDED_MASK; @@ -1603,11 +1817,12 @@ static int __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t } } spin_unlock_irq(&bitmap->counts.lock); + return rv; } -int md_bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, - int degraded) +static bool bitmap_start_sync(struct mddev *mddev, sector_t offset, + sector_t *blocks, bool degraded) { /* bitmap_start_sync must always report on multiples of whole * pages, otherwise resync (which is very PAGE_SIZE based) will @@ -1616,21 +1831,22 @@ int md_bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *block * At least PAGE_SIZE>>9 blocks are covered. * Return the 'or' of the result. */ - int rv = 0; + bool rv = false; sector_t blocks1; *blocks = 0; while (*blocks < (PAGE_SIZE>>9)) { - rv |= __bitmap_start_sync(bitmap, offset, + rv |= __bitmap_start_sync(mddev->bitmap, offset, &blocks1, degraded); offset += blocks1; *blocks += blocks1; } + return rv; } -EXPORT_SYMBOL(md_bitmap_start_sync); -void md_bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted) +static void __bitmap_end_sync(struct bitmap *bitmap, sector_t offset, + sector_t *blocks, bool aborted) { bitmap_counter_t *bmc; unsigned long flags; @@ -1659,9 +1875,14 @@ void md_bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks unlock: spin_unlock_irqrestore(&bitmap->counts.lock, flags); } -EXPORT_SYMBOL(md_bitmap_end_sync); -void md_bitmap_close_sync(struct bitmap *bitmap) +static void bitmap_end_sync(struct mddev *mddev, sector_t offset, + sector_t *blocks) +{ + __bitmap_end_sync(mddev->bitmap, offset, blocks, true); +} + +static void bitmap_close_sync(struct mddev *mddev) { /* Sync has finished, and any bitmap chunks that weren't synced * properly have been aborted. It remains to us to clear the @@ -1669,19 +1890,23 @@ void md_bitmap_close_sync(struct bitmap *bitmap) */ sector_t sector = 0; sector_t blocks; + struct bitmap *bitmap = mddev->bitmap; + if (!bitmap) return; + while (sector < bitmap->mddev->resync_max_sectors) { - md_bitmap_end_sync(bitmap, sector, &blocks, 0); + __bitmap_end_sync(bitmap, sector, &blocks, false); sector += blocks; } } -EXPORT_SYMBOL(md_bitmap_close_sync); -void md_bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force) +static void bitmap_cond_end_sync(struct mddev *mddev, sector_t sector, + bool force) { sector_t s = 0; sector_t blocks; + struct bitmap *bitmap = mddev->bitmap; if (!bitmap) return; @@ -1700,34 +1925,32 @@ void md_bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force) sector &= ~((1ULL << bitmap->counts.chunkshift) - 1); s = 0; while (s < sector && s < bitmap->mddev->resync_max_sectors) { - md_bitmap_end_sync(bitmap, s, &blocks, 0); + __bitmap_end_sync(bitmap, s, &blocks, false); s += blocks; } bitmap->last_end_sync = jiffies; sysfs_notify_dirent_safe(bitmap->mddev->sysfs_completed); } -EXPORT_SYMBOL(md_bitmap_cond_end_sync); -void md_bitmap_sync_with_cluster(struct mddev *mddev, - sector_t old_lo, sector_t old_hi, - sector_t new_lo, sector_t new_hi) +static void bitmap_sync_with_cluster(struct mddev *mddev, + sector_t old_lo, sector_t old_hi, + sector_t new_lo, sector_t new_hi) { struct bitmap *bitmap = mddev->bitmap; sector_t sector, blocks = 0; for (sector = old_lo; sector < new_lo; ) { - md_bitmap_end_sync(bitmap, sector, &blocks, 0); + __bitmap_end_sync(bitmap, sector, &blocks, false); sector += blocks; } WARN((blocks > new_lo) && old_lo, "alignment is not correct for lo\n"); for (sector = old_hi; sector < new_hi; ) { - md_bitmap_start_sync(bitmap, sector, &blocks, 0); + bitmap_start_sync(mddev, sector, &blocks, false); sector += blocks; } WARN((blocks > new_hi) && old_hi, "alignment is not correct for hi\n"); } -EXPORT_SYMBOL(md_bitmap_sync_with_cluster); static void md_bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed) { @@ -1756,12 +1979,18 @@ static void md_bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, in } /* dirty the memory and file bits for bitmap chunks "s" to "e" */ -void md_bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e) +static void bitmap_dirty_bits(struct mddev *mddev, unsigned long s, + unsigned long e) { unsigned long chunk; + struct bitmap *bitmap = mddev->bitmap; + + if (!bitmap) + return; for (chunk = s; chunk <= e; chunk++) { sector_t sec = (sector_t)chunk << bitmap->counts.chunkshift; + md_bitmap_set_memory_bits(bitmap, sec, 1); md_bitmap_file_set_bit(bitmap, sec); if (sec < bitmap->mddev->recovery_cp) @@ -1773,10 +2002,7 @@ void md_bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long } } -/* - * flush out any pending updates - */ -void md_bitmap_flush(struct mddev *mddev) +static void bitmap_flush(struct mddev *mddev) { struct bitmap *bitmap = mddev->bitmap; long sleep; @@ -1789,23 +2015,21 @@ void md_bitmap_flush(struct mddev *mddev) */ sleep = mddev->bitmap_info.daemon_sleep * 2; bitmap->daemon_lastrun -= sleep; - md_bitmap_daemon_work(mddev); + bitmap_daemon_work(mddev); bitmap->daemon_lastrun -= sleep; - md_bitmap_daemon_work(mddev); + bitmap_daemon_work(mddev); bitmap->daemon_lastrun -= sleep; - md_bitmap_daemon_work(mddev); + bitmap_daemon_work(mddev); if (mddev->bitmap_info.external) md_super_wait(mddev); - md_bitmap_update_sb(bitmap); + bitmap_update_sb(bitmap); } -/* - * free memory that was allocated - */ -void md_bitmap_free(struct bitmap *bitmap) +static void md_bitmap_free(void *data) { unsigned long k, pages; struct bitmap_page *bp; + struct bitmap *bitmap = data; if (!bitmap) /* there was no bitmap */ return; @@ -1836,9 +2060,8 @@ void md_bitmap_free(struct bitmap *bitmap) kfree(bp); kfree(bitmap); } -EXPORT_SYMBOL(md_bitmap_free); -void md_bitmap_wait_behind_writes(struct mddev *mddev) +static void bitmap_wait_behind_writes(struct mddev *mddev) { struct bitmap *bitmap = mddev->bitmap; @@ -1852,14 +2075,14 @@ void md_bitmap_wait_behind_writes(struct mddev *mddev) } } -void md_bitmap_destroy(struct mddev *mddev) +static void bitmap_destroy(struct mddev *mddev) { struct bitmap *bitmap = mddev->bitmap; if (!bitmap) /* there was no bitmap */ return; - md_bitmap_wait_behind_writes(mddev); + bitmap_wait_behind_writes(mddev); if (!mddev->serialize_policy) mddev_destroy_serial_pool(mddev, NULL); @@ -1878,7 +2101,7 @@ void md_bitmap_destroy(struct mddev *mddev) * if this returns an error, bitmap_destroy must be called to do clean up * once mddev->bitmap is set */ -struct bitmap *md_bitmap_create(struct mddev *mddev, int slot) +static struct bitmap *__bitmap_create(struct mddev *mddev, int slot) { struct bitmap *bitmap; sector_t blocks = mddev->resync_max_sectors; @@ -1948,7 +2171,8 @@ struct bitmap *md_bitmap_create(struct mddev *mddev, int slot) goto error; bitmap->daemon_lastrun = jiffies; - err = md_bitmap_resize(bitmap, blocks, mddev->bitmap_info.chunksize, 1); + err = __bitmap_resize(bitmap, blocks, mddev->bitmap_info.chunksize, + true); if (err) goto error; @@ -1965,7 +2189,18 @@ struct bitmap *md_bitmap_create(struct mddev *mddev, int slot) return ERR_PTR(err); } -int md_bitmap_load(struct mddev *mddev) +static int bitmap_create(struct mddev *mddev, int slot) +{ + struct bitmap *bitmap = __bitmap_create(mddev, slot); + + if (IS_ERR(bitmap)) + return PTR_ERR(bitmap); + + mddev->bitmap = bitmap; + return 0; +} + +static int bitmap_load(struct mddev *mddev) { int err = 0; sector_t start = 0; @@ -1989,10 +2224,10 @@ int md_bitmap_load(struct mddev *mddev) */ while (sector < mddev->resync_max_sectors) { sector_t blocks; - md_bitmap_start_sync(bitmap, sector, &blocks, 0); + bitmap_start_sync(mddev, sector, &blocks, false); sector += blocks; } - md_bitmap_close_sync(bitmap); + bitmap_close_sync(mddev); if (mddev->degraded == 0 || bitmap->events_cleared == mddev->events) @@ -2014,22 +2249,21 @@ int md_bitmap_load(struct mddev *mddev) mddev_set_timeout(mddev, mddev->bitmap_info.daemon_sleep, true); md_wakeup_thread(mddev->thread); - md_bitmap_update_sb(bitmap); + bitmap_update_sb(bitmap); if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) err = -EIO; out: return err; } -EXPORT_SYMBOL_GPL(md_bitmap_load); /* caller need to free returned bitmap with md_bitmap_free() */ -struct bitmap *get_bitmap_from_slot(struct mddev *mddev, int slot) +static void *bitmap_get_from_slot(struct mddev *mddev, int slot) { int rv = 0; struct bitmap *bitmap; - bitmap = md_bitmap_create(mddev, slot); + bitmap = __bitmap_create(mddev, slot); if (IS_ERR(bitmap)) { rv = PTR_ERR(bitmap); return ERR_PTR(rv); @@ -2043,20 +2277,19 @@ struct bitmap *get_bitmap_from_slot(struct mddev *mddev, int slot) return bitmap; } -EXPORT_SYMBOL(get_bitmap_from_slot); /* Loads the bitmap associated with slot and copies the resync information * to our bitmap */ -int md_bitmap_copy_from_slot(struct mddev *mddev, int slot, - sector_t *low, sector_t *high, bool clear_bits) +static int bitmap_copy_from_slot(struct mddev *mddev, int slot, sector_t *low, + sector_t *high, bool clear_bits) { int rv = 0, i, j; sector_t block, lo = 0, hi = 0; struct bitmap_counts *counts; struct bitmap *bitmap; - bitmap = get_bitmap_from_slot(mddev, slot); + bitmap = bitmap_get_from_slot(mddev, slot); if (IS_ERR(bitmap)) { pr_err("%s can't get bitmap from slot %d\n", __func__, slot); return -1; @@ -2076,53 +2309,59 @@ int md_bitmap_copy_from_slot(struct mddev *mddev, int slot, } if (clear_bits) { - md_bitmap_update_sb(bitmap); + bitmap_update_sb(bitmap); /* BITMAP_PAGE_PENDING is set, but bitmap_unplug needs * BITMAP_PAGE_DIRTY or _NEEDWRITE to write ... */ for (i = 0; i < bitmap->storage.file_pages; i++) if (test_page_attr(bitmap, i, BITMAP_PAGE_PENDING)) set_page_attr(bitmap, i, BITMAP_PAGE_NEEDWRITE); - md_bitmap_unplug(bitmap); + __bitmap_unplug(bitmap); } - md_bitmap_unplug(mddev->bitmap); + __bitmap_unplug(mddev->bitmap); *low = lo; *high = hi; md_bitmap_free(bitmap); return rv; } -EXPORT_SYMBOL_GPL(md_bitmap_copy_from_slot); +static void bitmap_set_pages(void *data, unsigned long pages) +{ + struct bitmap *bitmap = data; + + bitmap->counts.pages = pages; +} -void md_bitmap_status(struct seq_file *seq, struct bitmap *bitmap) +static int bitmap_get_stats(void *data, struct md_bitmap_stats *stats) { - unsigned long chunk_kb; + struct bitmap_storage *storage; struct bitmap_counts *counts; + struct bitmap *bitmap = data; + bitmap_super_t *sb; if (!bitmap) - return; + return -ENOENT; + + sb = kmap_local_page(bitmap->storage.sb_page); + stats->sync_size = le64_to_cpu(sb->sync_size); + kunmap_local(sb); counts = &bitmap->counts; + stats->missing_pages = counts->missing_pages; + stats->pages = counts->pages; - chunk_kb = bitmap->mddev->bitmap_info.chunksize >> 10; - seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], " - "%lu%s chunk", - counts->pages - counts->missing_pages, - counts->pages, - (counts->pages - counts->missing_pages) - << (PAGE_SHIFT - 10), - chunk_kb ? chunk_kb : bitmap->mddev->bitmap_info.chunksize, - chunk_kb ? "KB" : "B"); - if (bitmap->storage.file) { - seq_printf(seq, ", file: "); - seq_file_path(seq, bitmap->storage.file, " \t\n"); - } + storage = &bitmap->storage; + stats->file_pages = storage->file_pages; + stats->file = storage->file; - seq_printf(seq, "\n"); + stats->behind_writes = atomic_read(&bitmap->behind_writes); + stats->behind_wait = wq_has_sleeper(&bitmap->behind_wait); + stats->events_cleared = bitmap->events_cleared; + return 0; } -int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks, - int chunksize, int init) +static int __bitmap_resize(struct bitmap *bitmap, sector_t blocks, + int chunksize, bool init) { /* If chunk_size is 0, choose an appropriate chunk size. * Then possibly allocate new storage space. @@ -2320,14 +2559,24 @@ int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks, spin_unlock_irq(&bitmap->counts.lock); if (!init) { - md_bitmap_unplug(bitmap); + __bitmap_unplug(bitmap); bitmap->mddev->pers->quiesce(bitmap->mddev, 0); } ret = 0; err: return ret; } -EXPORT_SYMBOL_GPL(md_bitmap_resize); + +static int bitmap_resize(struct mddev *mddev, sector_t blocks, int chunksize, + bool init) +{ + struct bitmap *bitmap = mddev->bitmap; + + if (!bitmap) + return 0; + + return __bitmap_resize(bitmap, blocks, chunksize, init); +} static ssize_t location_show(struct mddev *mddev, char *page) @@ -2367,7 +2616,7 @@ location_store(struct mddev *mddev, const char *buf, size_t len) goto out; } - md_bitmap_destroy(mddev); + bitmap_destroy(mddev); mddev->bitmap_info.offset = 0; if (mddev->bitmap_info.file) { struct file *f = mddev->bitmap_info.file; @@ -2377,7 +2626,6 @@ location_store(struct mddev *mddev, const char *buf, size_t len) } else { /* No bitmap, OK to set a location */ long long offset; - struct bitmap *bitmap; if (strncmp(buf, "none", 4) == 0) /* nothing to be done */; @@ -2404,17 +2652,14 @@ location_store(struct mddev *mddev, const char *buf, size_t len) } mddev->bitmap_info.offset = offset; - bitmap = md_bitmap_create(mddev, -1); - if (IS_ERR(bitmap)) { - rv = PTR_ERR(bitmap); + rv = bitmap_create(mddev, -1); + if (rv) goto out; - } - mddev->bitmap = bitmap; - rv = md_bitmap_load(mddev); + rv = bitmap_load(mddev); if (rv) { mddev->bitmap_info.offset = 0; - md_bitmap_destroy(mddev); + bitmap_destroy(mddev); goto out; } } @@ -2450,6 +2695,7 @@ space_show(struct mddev *mddev, char *page) static ssize_t space_store(struct mddev *mddev, const char *buf, size_t len) { + struct bitmap *bitmap; unsigned long sectors; int rv; @@ -2460,8 +2706,8 @@ space_store(struct mddev *mddev, const char *buf, size_t len) if (sectors == 0) return -EINVAL; - if (mddev->bitmap && - sectors < (mddev->bitmap->storage.bytes + 511) >> 9) + bitmap = mddev->bitmap; + if (bitmap && sectors < (bitmap->storage.bytes + 511) >> 9) return -EFBIG; /* Bitmap is too big for this small space */ /* could make sure it isn't too big, but that isn't really @@ -2569,7 +2815,7 @@ backlog_store(struct mddev *mddev, const char *buf, size_t len) mddev_create_serial_pool(mddev, rdev); } if (old_mwb != backlog) - md_bitmap_update_sb(mddev->bitmap); + bitmap_update_sb(mddev->bitmap); mddev_unlock_and_resume(mddev); return len; @@ -2638,10 +2884,13 @@ __ATTR(metadata, S_IRUGO|S_IWUSR, metadata_show, metadata_store); static ssize_t can_clear_show(struct mddev *mddev, char *page) { int len; + struct bitmap *bitmap; + spin_lock(&mddev->lock); - if (mddev->bitmap) - len = sprintf(page, "%s\n", (mddev->bitmap->need_sync ? - "false" : "true")); + bitmap = mddev->bitmap; + if (bitmap) + len = sprintf(page, "%s\n", (bitmap->need_sync ? "false" : + "true")); else len = sprintf(page, "\n"); spin_unlock(&mddev->lock); @@ -2650,17 +2899,24 @@ static ssize_t can_clear_show(struct mddev *mddev, char *page) static ssize_t can_clear_store(struct mddev *mddev, const char *buf, size_t len) { - if (mddev->bitmap == NULL) + struct bitmap *bitmap = mddev->bitmap; + + if (!bitmap) return -ENOENT; - if (strncmp(buf, "false", 5) == 0) - mddev->bitmap->need_sync = 1; - else if (strncmp(buf, "true", 4) == 0) { + + if (strncmp(buf, "false", 5) == 0) { + bitmap->need_sync = 1; + return len; + } + + if (strncmp(buf, "true", 4) == 0) { if (mddev->degraded) return -EBUSY; - mddev->bitmap->need_sync = 0; - } else - return -EINVAL; - return len; + bitmap->need_sync = 0; + return len; + } + + return -EINVAL; } static struct md_sysfs_entry bitmap_can_clear = @@ -2670,21 +2926,26 @@ static ssize_t behind_writes_used_show(struct mddev *mddev, char *page) { ssize_t ret; + struct bitmap *bitmap; + spin_lock(&mddev->lock); - if (mddev->bitmap == NULL) + bitmap = mddev->bitmap; + if (!bitmap) ret = sprintf(page, "0\n"); else - ret = sprintf(page, "%lu\n", - mddev->bitmap->behind_writes_used); + ret = sprintf(page, "%lu\n", bitmap->behind_writes_used); spin_unlock(&mddev->lock); + return ret; } static ssize_t behind_writes_used_reset(struct mddev *mddev, const char *buf, size_t len) { - if (mddev->bitmap) - mddev->bitmap->behind_writes_used = 0; + struct bitmap *bitmap = mddev->bitmap; + + if (bitmap) + bitmap->behind_writes_used = 0; return len; } @@ -2707,3 +2968,38 @@ const struct attribute_group md_bitmap_group = { .name = "bitmap", .attrs = md_bitmap_attrs, }; + +static struct bitmap_operations bitmap_ops = { + .enabled = bitmap_enabled, + .create = bitmap_create, + .resize = bitmap_resize, + .load = bitmap_load, + .destroy = bitmap_destroy, + .flush = bitmap_flush, + .write_all = bitmap_write_all, + .dirty_bits = bitmap_dirty_bits, + .unplug = bitmap_unplug, + .daemon_work = bitmap_daemon_work, + .wait_behind_writes = bitmap_wait_behind_writes, + + .startwrite = bitmap_startwrite, + .endwrite = bitmap_endwrite, + .start_sync = bitmap_start_sync, + .end_sync = bitmap_end_sync, + .cond_end_sync = bitmap_cond_end_sync, + .close_sync = bitmap_close_sync, + + .update_sb = bitmap_update_sb, + .get_stats = bitmap_get_stats, + + .sync_with_cluster = bitmap_sync_with_cluster, + .get_from_slot = bitmap_get_from_slot, + .copy_from_slot = bitmap_copy_from_slot, + .set_pages = bitmap_set_pages, + .free = md_bitmap_free, +}; + +void mddev_set_bitmap_ops(struct mddev *mddev) +{ + mddev->bitmap_ops = &bitmap_ops; +} |