diff options
Diffstat (limited to 'drivers/block/zram/zram_drv.c')
| -rw-r--r-- | drivers/block/zram/zram_drv.c | 2234 |
1 files changed, 1401 insertions, 833 deletions
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index e290d6d97047..5759823d6314 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -33,6 +33,7 @@ #include <linux/debugfs.h> #include <linux/cpuhotplug.h> #include <linux/part_stat.h> +#include <linux/kernel_read_file.h> #include "zram_drv.h" @@ -43,6 +44,8 @@ static DEFINE_MUTEX(zram_index_mutex); static int zram_major; static const char *default_compressor = CONFIG_ZRAM_DEF_COMP; +#define ZRAM_MAX_ALGO_NAME_SZ 128 + /* Module params (documentation at end) */ static unsigned int num_devices = 1; /* @@ -54,23 +57,59 @@ static size_t huge_class_size; static const struct block_device_operations zram_devops; static void zram_free_page(struct zram *zram, size_t index); -static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, - u32 index, int offset, struct bio *bio); +static int zram_read_from_zspool(struct zram *zram, struct page *page, + u32 index); +#define slot_dep_map(zram, index) (&(zram)->table[(index)].dep_map) -static int zram_slot_trylock(struct zram *zram, u32 index) +static void zram_slot_lock_init(struct zram *zram, u32 index) { - return bit_spin_trylock(ZRAM_LOCK, &zram->table[index].flags); + static struct lock_class_key __key; + + lockdep_init_map(slot_dep_map(zram, index), "zram->table[index].lock", + &__key, 0); +} + +/* + * entry locking rules: + * + * 1) Lock is exclusive + * + * 2) lock() function can sleep waiting for the lock + * + * 3) Lock owner can sleep + * + * 4) Use TRY lock variant when in atomic context + * - must check return value and handle locking failers + */ +static __must_check bool zram_slot_trylock(struct zram *zram, u32 index) +{ + unsigned long *lock = &zram->table[index].flags; + + if (!test_and_set_bit_lock(ZRAM_ENTRY_LOCK, lock)) { + mutex_acquire(slot_dep_map(zram, index), 0, 1, _RET_IP_); + lock_acquired(slot_dep_map(zram, index), _RET_IP_); + return true; + } + + return false; } static void zram_slot_lock(struct zram *zram, u32 index) { - bit_spin_lock(ZRAM_LOCK, &zram->table[index].flags); + unsigned long *lock = &zram->table[index].flags; + + mutex_acquire(slot_dep_map(zram, index), 0, 0, _RET_IP_); + wait_on_bit_lock(lock, ZRAM_ENTRY_LOCK, TASK_UNINTERRUPTIBLE); + lock_acquired(slot_dep_map(zram, index), _RET_IP_); } static void zram_slot_unlock(struct zram *zram, u32 index) { - bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags); + unsigned long *lock = &zram->table[index].flags; + + mutex_release(slot_dep_map(zram, index), _RET_IP_); + clear_and_wake_up_bit(ZRAM_ENTRY_LOCK, lock); } static inline bool init_done(struct zram *zram) @@ -93,7 +132,6 @@ static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle) zram->table[index].handle = handle; } -/* flag operations require table entry bit_spin_lock() being held */ static bool zram_test_flag(struct zram *zram, u32 index, enum zram_pageflags flag) { @@ -112,17 +150,6 @@ static void zram_clear_flag(struct zram *zram, u32 index, zram->table[index].flags &= ~BIT(flag); } -static inline void zram_set_element(struct zram *zram, u32 index, - unsigned long element) -{ - zram->table[index].element = element; -} - -static unsigned long zram_get_element(struct zram *zram, u32 index) -{ - return zram->table[index].element; -} - static size_t zram_get_obj_size(struct zram *zram, u32 index) { return zram->table[index].flags & (BIT(ZRAM_FLAG_SHIFT) - 1); @@ -143,11 +170,33 @@ static inline bool zram_allocated(struct zram *zram, u32 index) zram_test_flag(zram, index, ZRAM_WB); } +static inline void update_used_max(struct zram *zram, const unsigned long pages) +{ + unsigned long cur_max = atomic_long_read(&zram->stats.max_used_pages); + + do { + if (cur_max >= pages) + return; + } while (!atomic_long_try_cmpxchg(&zram->stats.max_used_pages, + &cur_max, pages)); +} + +static bool zram_can_store_page(struct zram *zram) +{ + unsigned long alloced_pages; + + alloced_pages = zs_get_total_pages(zram->mem_pool); + update_used_max(zram, alloced_pages); + + return !zram->limit_pages || alloced_pages <= zram->limit_pages; +} + #if PAGE_SIZE != 4096 static inline bool is_partial_io(struct bio_vec *bvec) { return bvec->bv_len != PAGE_SIZE; } +#define ZRAM_PARTIAL_IO 1 #else static inline bool is_partial_io(struct bio_vec *bvec) { @@ -174,47 +223,116 @@ static inline u32 zram_get_priority(struct zram *zram, u32 index) return prio & ZRAM_COMP_PRIORITY_MASK; } +static void zram_accessed(struct zram *zram, u32 index) +{ + zram_clear_flag(zram, index, ZRAM_IDLE); + zram_clear_flag(zram, index, ZRAM_PP_SLOT); +#ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME + zram->table[index].ac_time = ktime_get_boottime(); +#endif +} + +#if defined CONFIG_ZRAM_WRITEBACK || defined CONFIG_ZRAM_MULTI_COMP +struct zram_pp_slot { + unsigned long index; + struct list_head entry; +}; + /* - * Check if request is within bounds and aligned on zram logical blocks. + * A post-processing bucket is, essentially, a size class, this defines + * the range (in bytes) of pp-slots sizes in particular bucket. */ -static inline bool valid_io_request(struct zram *zram, - sector_t start, unsigned int size) +#define PP_BUCKET_SIZE_RANGE 64 +#define NUM_PP_BUCKETS ((PAGE_SIZE / PP_BUCKET_SIZE_RANGE) + 1) + +struct zram_pp_ctl { + struct list_head pp_buckets[NUM_PP_BUCKETS]; +}; + +static struct zram_pp_ctl *init_pp_ctl(void) { - u64 end, bound; + struct zram_pp_ctl *ctl; + u32 idx; - /* unaligned request */ - if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1))) - return false; - if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1))) - return false; + ctl = kmalloc(sizeof(*ctl), GFP_KERNEL); + if (!ctl) + return NULL; - end = start + (size >> SECTOR_SHIFT); - bound = zram->disksize >> SECTOR_SHIFT; - /* out of range range */ - if (unlikely(start >= bound || end > bound || start > end)) - return false; + for (idx = 0; idx < NUM_PP_BUCKETS; idx++) + INIT_LIST_HEAD(&ctl->pp_buckets[idx]); + return ctl; +} - /* I/O request is valid */ - return true; +static void release_pp_slot(struct zram *zram, struct zram_pp_slot *pps) +{ + list_del_init(&pps->entry); + + zram_slot_lock(zram, pps->index); + zram_clear_flag(zram, pps->index, ZRAM_PP_SLOT); + zram_slot_unlock(zram, pps->index); + + kfree(pps); } -static void update_position(u32 *index, int *offset, struct bio_vec *bvec) +static void release_pp_ctl(struct zram *zram, struct zram_pp_ctl *ctl) { - *index += (*offset + bvec->bv_len) / PAGE_SIZE; - *offset = (*offset + bvec->bv_len) % PAGE_SIZE; + u32 idx; + + if (!ctl) + return; + + for (idx = 0; idx < NUM_PP_BUCKETS; idx++) { + while (!list_empty(&ctl->pp_buckets[idx])) { + struct zram_pp_slot *pps; + + pps = list_first_entry(&ctl->pp_buckets[idx], + struct zram_pp_slot, + entry); + release_pp_slot(zram, pps); + } + } + + kfree(ctl); } -static inline void update_used_max(struct zram *zram, - const unsigned long pages) +static bool place_pp_slot(struct zram *zram, struct zram_pp_ctl *ctl, + u32 index) { - unsigned long cur_max = atomic_long_read(&zram->stats.max_used_pages); + struct zram_pp_slot *pps; + u32 bid; - do { - if (cur_max >= pages) - return; - } while (!atomic_long_try_cmpxchg(&zram->stats.max_used_pages, - &cur_max, pages)); + pps = kmalloc(sizeof(*pps), GFP_NOIO | __GFP_NOWARN); + if (!pps) + return false; + + INIT_LIST_HEAD(&pps->entry); + pps->index = index; + + bid = zram_get_obj_size(zram, pps->index) / PP_BUCKET_SIZE_RANGE; + list_add(&pps->entry, &ctl->pp_buckets[bid]); + + zram_set_flag(zram, pps->index, ZRAM_PP_SLOT); + return true; +} + +static struct zram_pp_slot *select_pp_slot(struct zram_pp_ctl *ctl) +{ + struct zram_pp_slot *pps = NULL; + s32 idx = NUM_PP_BUCKETS - 1; + + /* The higher the bucket id the more optimal slot post-processing is */ + while (idx >= 0) { + pps = list_first_entry_or_null(&ctl->pp_buckets[idx], + struct zram_pp_slot, + entry); + if (pps) + break; + + idx--; + } + return pps; } +#endif static inline void zram_fill_page(void *ptr, unsigned long len, unsigned long value) @@ -255,7 +373,7 @@ static ssize_t initstate_show(struct device *dev, val = init_done(zram); up_read(&zram->init_lock); - return scnprintf(buf, PAGE_SIZE, "%u\n", val); + return sysfs_emit(buf, "%u\n", val); } static ssize_t disksize_show(struct device *dev, @@ -263,7 +381,7 @@ static ssize_t disksize_show(struct device *dev, { struct zram *zram = dev_to_zram(dev); - return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize); + return sysfs_emit(buf, "%llu\n", zram->disksize); } static ssize_t mem_limit_store(struct device *dev, @@ -317,18 +435,28 @@ static void mark_idle(struct zram *zram, ktime_t cutoff) for (index = 0; index < nr_pages; index++) { /* - * Do not mark ZRAM_UNDER_WB slot as ZRAM_IDLE to close race. - * See the comment in writeback_store. + * Do not mark ZRAM_SAME slots as ZRAM_IDLE, because no + * post-processing (recompress, writeback) happens to the + * ZRAM_SAME slot. + * + * And ZRAM_WB slots simply cannot be ZRAM_IDLE. */ zram_slot_lock(zram, index); - if (zram_allocated(zram, index) && - !zram_test_flag(zram, index, ZRAM_UNDER_WB)) { -#ifdef CONFIG_ZRAM_MEMORY_TRACKING - is_idle = !cutoff || ktime_after(cutoff, zram->table[index].ac_time); -#endif - if (is_idle) - zram_set_flag(zram, index, ZRAM_IDLE); + if (!zram_allocated(zram, index) || + zram_test_flag(zram, index, ZRAM_WB) || + zram_test_flag(zram, index, ZRAM_SAME)) { + zram_slot_unlock(zram, index); + continue; } + +#ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME + is_idle = !cutoff || + ktime_after(cutoff, zram->table[index].ac_time); +#endif + if (is_idle) + zram_set_flag(zram, index, ZRAM_IDLE); + else + zram_clear_flag(zram, index, ZRAM_IDLE); zram_slot_unlock(zram, index); } } @@ -347,7 +475,7 @@ static ssize_t idle_store(struct device *dev, */ u64 age_sec; - if (IS_ENABLED(CONFIG_ZRAM_MEMORY_TRACKING) && !kstrtoull(buf, 0, &age_sec)) + if (IS_ENABLED(CONFIG_ZRAM_TRACK_ENTRY_ACTIME) && !kstrtoull(buf, 0, &age_sec)) cutoff_time = ktime_sub(ktime_get_boottime(), ns_to_ktime(age_sec * NSEC_PER_SEC)); else @@ -372,8 +500,31 @@ out: } #ifdef CONFIG_ZRAM_WRITEBACK +#define INVALID_BDEV_BLOCK (~0UL) + +struct zram_wb_ctl { + /* idle list is accessed only by the writeback task, no concurency */ + struct list_head idle_reqs; + /* done list is accessed concurrently, protect by done_lock */ + struct list_head done_reqs; + wait_queue_head_t done_wait; + spinlock_t done_lock; + atomic_t num_inflight; +}; + +struct zram_wb_req { + unsigned long blk_idx; + struct page *page; + struct zram_pp_slot *pps; + struct bio_vec bio_vec; + struct bio bio; + + struct list_head entry; +}; + static ssize_t writeback_limit_enable_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t len) + struct device_attribute *attr, + const char *buf, size_t len) { struct zram *zram = dev_to_zram(dev); u64 val; @@ -382,33 +533,31 @@ static ssize_t writeback_limit_enable_store(struct device *dev, if (kstrtoull(buf, 10, &val)) return ret; - down_read(&zram->init_lock); - spin_lock(&zram->wb_limit_lock); + down_write(&zram->init_lock); zram->wb_limit_enable = val; - spin_unlock(&zram->wb_limit_lock); - up_read(&zram->init_lock); + up_write(&zram->init_lock); ret = len; return ret; } static ssize_t writeback_limit_enable_show(struct device *dev, - struct device_attribute *attr, char *buf) + struct device_attribute *attr, + char *buf) { bool val; struct zram *zram = dev_to_zram(dev); down_read(&zram->init_lock); - spin_lock(&zram->wb_limit_lock); val = zram->wb_limit_enable; - spin_unlock(&zram->wb_limit_lock); up_read(&zram->init_lock); - return scnprintf(buf, PAGE_SIZE, "%d\n", val); + return sysfs_emit(buf, "%d\n", val); } static ssize_t writeback_limit_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t len) + struct device_attribute *attr, + const char *buf, size_t len) { struct zram *zram = dev_to_zram(dev); u64 val; @@ -417,40 +566,76 @@ static ssize_t writeback_limit_store(struct device *dev, if (kstrtoull(buf, 10, &val)) return ret; - down_read(&zram->init_lock); - spin_lock(&zram->wb_limit_lock); + /* + * When the page size is greater than 4KB, if bd_wb_limit is set to + * a value that is not page - size aligned, it will cause value + * wrapping. For example, when the page size is set to 16KB and + * bd_wb_limit is set to 3, a single write - back operation will + * cause bd_wb_limit to become -1. Even more terrifying is that + * bd_wb_limit is an unsigned number. + */ + val = rounddown(val, PAGE_SIZE / 4096); + + down_write(&zram->init_lock); zram->bd_wb_limit = val; - spin_unlock(&zram->wb_limit_lock); - up_read(&zram->init_lock); + up_write(&zram->init_lock); ret = len; return ret; } static ssize_t writeback_limit_show(struct device *dev, - struct device_attribute *attr, char *buf) + struct device_attribute *attr, char *buf) { u64 val; struct zram *zram = dev_to_zram(dev); down_read(&zram->init_lock); - spin_lock(&zram->wb_limit_lock); val = zram->bd_wb_limit; - spin_unlock(&zram->wb_limit_lock); up_read(&zram->init_lock); - return scnprintf(buf, PAGE_SIZE, "%llu\n", val); + return sysfs_emit(buf, "%llu\n", val); } -static void reset_bdev(struct zram *zram) +static ssize_t writeback_batch_size_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) { - struct block_device *bdev; + struct zram *zram = dev_to_zram(dev); + u32 val; + + if (kstrtouint(buf, 10, &val)) + return -EINVAL; + if (!val) + return -EINVAL; + + down_write(&zram->init_lock); + zram->wb_batch_size = val; + up_write(&zram->init_lock); + + return len; +} + +static ssize_t writeback_batch_size_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + u32 val; + struct zram *zram = dev_to_zram(dev); + + down_read(&zram->init_lock); + val = zram->wb_batch_size; + up_read(&zram->init_lock); + + return sysfs_emit(buf, "%u\n", val); +} + +static void reset_bdev(struct zram *zram) +{ if (!zram->backing_dev) return; - bdev = zram->bdev; - blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); /* hope filp_close flush all of IO */ filp_close(zram->backing_dev, NULL); zram->backing_dev = NULL; @@ -497,10 +682,8 @@ static ssize_t backing_dev_store(struct device *dev, size_t sz; struct file *backing_dev = NULL; struct inode *inode; - struct address_space *mapping; unsigned int bitmap_sz; unsigned long nr_pages, *bitmap = NULL; - struct block_device *bdev = NULL; int err; struct zram *zram = dev_to_zram(dev); @@ -521,15 +704,14 @@ static ssize_t backing_dev_store(struct device *dev, if (sz > 0 && file_name[sz - 1] == '\n') file_name[sz - 1] = 0x00; - backing_dev = filp_open(file_name, O_RDWR|O_LARGEFILE, 0); + backing_dev = filp_open(file_name, O_RDWR | O_LARGEFILE | O_EXCL, 0); if (IS_ERR(backing_dev)) { err = PTR_ERR(backing_dev); backing_dev = NULL; goto out; } - mapping = backing_dev->f_mapping; - inode = mapping->host; + inode = backing_dev->f_mapping->host; /* Support only block device in this moment */ if (!S_ISBLK(inode->i_mode)) { @@ -537,15 +719,13 @@ static ssize_t backing_dev_store(struct device *dev, goto out; } - bdev = blkdev_get_by_dev(inode->i_rdev, - FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram); - if (IS_ERR(bdev)) { - err = PTR_ERR(bdev); - bdev = NULL; + nr_pages = i_size_read(inode) >> PAGE_SHIFT; + /* Refuse to use zero sized device (also prevents self reference) */ + if (!nr_pages) { + err = -EINVAL; goto out; } - nr_pages = i_size_read(inode) >> PAGE_SHIFT; bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long); bitmap = kvzalloc(bitmap_sz, GFP_KERNEL); if (!bitmap) { @@ -555,7 +735,7 @@ static ssize_t backing_dev_store(struct device *dev, reset_bdev(zram); - zram->bdev = bdev; + zram->bdev = I_BDEV(inode); zram->backing_dev = backing_dev; zram->bitmap = bitmap; zram->nr_pages = nr_pages; @@ -568,9 +748,6 @@ static ssize_t backing_dev_store(struct device *dev, out: kvfree(bitmap); - if (bdev) - blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); - if (backing_dev) filp_close(backing_dev, NULL); @@ -581,23 +758,20 @@ out: return err; } -static unsigned long alloc_block_bdev(struct zram *zram) +static unsigned long zram_reserve_bdev_block(struct zram *zram) { - unsigned long blk_idx = 1; -retry: - /* skip 0 bit to confuse zram.handle = 0 */ - blk_idx = find_next_zero_bit(zram->bitmap, zram->nr_pages, blk_idx); - if (blk_idx == zram->nr_pages) - return 0; + unsigned long blk_idx; - if (test_and_set_bit(blk_idx, zram->bitmap)) - goto retry; + blk_idx = find_next_zero_bit(zram->bitmap, zram->nr_pages, 0); + if (blk_idx == zram->nr_pages) + return INVALID_BDEV_BLOCK; + set_bit(blk_idx, zram->bitmap); atomic64_inc(&zram->stats.bd_count); return blk_idx; } -static void free_block_bdev(struct zram *zram, unsigned long blk_idx) +static void zram_release_bdev_block(struct zram *zram, unsigned long blk_idx) { int was_set; @@ -606,130 +780,399 @@ static void free_block_bdev(struct zram *zram, unsigned long blk_idx) atomic64_dec(&zram->stats.bd_count); } -static void zram_page_end_io(struct bio *bio) +static void read_from_bdev_async(struct zram *zram, struct page *page, + unsigned long entry, struct bio *parent) { - struct page *page = bio_first_page_all(bio); + struct bio *bio; + + bio = bio_alloc(zram->bdev, 1, parent->bi_opf, GFP_NOIO); + bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9); + __bio_add_page(bio, page, PAGE_SIZE, 0); + bio_chain(bio, parent); + submit_bio(bio); +} - page_endio(page, op_is_write(bio_op(bio)), - blk_status_to_errno(bio->bi_status)); - bio_put(bio); +static void release_wb_req(struct zram_wb_req *req) +{ + __free_page(req->page); + kfree(req); } -/* - * Returns 1 if the submission is successful. - */ -static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec, - unsigned long entry, struct bio *parent) +static void release_wb_ctl(struct zram_wb_ctl *wb_ctl) { - struct bio *bio; + if (!wb_ctl) + return; - bio = bio_alloc(zram->bdev, 1, parent ? parent->bi_opf : REQ_OP_READ, - GFP_NOIO); - if (!bio) - return -ENOMEM; + /* We should never have inflight requests at this point */ + WARN_ON(atomic_read(&wb_ctl->num_inflight)); + WARN_ON(!list_empty(&wb_ctl->done_reqs)); - bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9); - if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, bvec->bv_offset)) { - bio_put(bio); - return -EIO; + while (!list_empty(&wb_ctl->idle_reqs)) { + struct zram_wb_req *req; + + req = list_first_entry(&wb_ctl->idle_reqs, + struct zram_wb_req, entry); + list_del(&req->entry); + release_wb_req(req); } - if (!parent) - bio->bi_end_io = zram_page_end_io; - else - bio_chain(bio, parent); + kfree(wb_ctl); +} - submit_bio(bio); - return 1; +static struct zram_wb_ctl *init_wb_ctl(struct zram *zram) +{ + struct zram_wb_ctl *wb_ctl; + int i; + + wb_ctl = kmalloc(sizeof(*wb_ctl), GFP_KERNEL); + if (!wb_ctl) + return NULL; + + INIT_LIST_HEAD(&wb_ctl->idle_reqs); + INIT_LIST_HEAD(&wb_ctl->done_reqs); + atomic_set(&wb_ctl->num_inflight, 0); + init_waitqueue_head(&wb_ctl->done_wait); + spin_lock_init(&wb_ctl->done_lock); + + for (i = 0; i < zram->wb_batch_size; i++) { + struct zram_wb_req *req; + + /* + * This is fatal condition only if we couldn't allocate + * any requests at all. Otherwise we just work with the + * requests that we have successfully allocated, so that + * writeback can still proceed, even if there is only one + * request on the idle list. + */ + req = kzalloc(sizeof(*req), GFP_KERNEL | __GFP_NOWARN); + if (!req) + break; + + req->page = alloc_page(GFP_KERNEL | __GFP_NOWARN); + if (!req->page) { + kfree(req); + break; + } + + list_add(&req->entry, &wb_ctl->idle_reqs); + } + + /* We couldn't allocate any requests, so writeabck is not possible */ + if (list_empty(&wb_ctl->idle_reqs)) + goto release_wb_ctl; + + return wb_ctl; + +release_wb_ctl: + release_wb_ctl(wb_ctl); + return NULL; } -#define PAGE_WB_SIG "page_index=" +static void zram_account_writeback_rollback(struct zram *zram) +{ + lockdep_assert_held_read(&zram->init_lock); -#define PAGE_WRITEBACK 0 -#define HUGE_WRITEBACK (1<<0) -#define IDLE_WRITEBACK (1<<1) -#define INCOMPRESSIBLE_WRITEBACK (1<<2) + if (zram->wb_limit_enable) + zram->bd_wb_limit += 1UL << (PAGE_SHIFT - 12); +} -static ssize_t writeback_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t len) +static void zram_account_writeback_submit(struct zram *zram) { - struct zram *zram = dev_to_zram(dev); - unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; - unsigned long index = 0; - struct bio bio; - struct bio_vec bio_vec; - struct page *page; - ssize_t ret = len; - int mode, err; - unsigned long blk_idx = 0; - - if (sysfs_streq(buf, "idle")) - mode = IDLE_WRITEBACK; - else if (sysfs_streq(buf, "huge")) - mode = HUGE_WRITEBACK; - else if (sysfs_streq(buf, "huge_idle")) - mode = IDLE_WRITEBACK | HUGE_WRITEBACK; - else if (sysfs_streq(buf, "incompressible")) - mode = INCOMPRESSIBLE_WRITEBACK; - else { - if (strncmp(buf, PAGE_WB_SIG, sizeof(PAGE_WB_SIG) - 1)) - return -EINVAL; + lockdep_assert_held_read(&zram->init_lock); - if (kstrtol(buf + sizeof(PAGE_WB_SIG) - 1, 10, &index) || - index >= nr_pages) - return -EINVAL; + if (zram->wb_limit_enable && zram->bd_wb_limit > 0) + zram->bd_wb_limit -= 1UL << (PAGE_SHIFT - 12); +} - nr_pages = 1; - mode = PAGE_WRITEBACK; - } +static int zram_writeback_complete(struct zram *zram, struct zram_wb_req *req) +{ + u32 index = req->pps->index; + int err; - down_read(&zram->init_lock); - if (!init_done(zram)) { - ret = -EINVAL; - goto release_init_lock; + err = blk_status_to_errno(req->bio.bi_status); + if (err) { + /* + * Failed wb requests should not be accounted in wb_limit + * (if enabled). + */ + zram_account_writeback_rollback(zram); + zram_release_bdev_block(zram, req->blk_idx); + return err; } - if (!zram->backing_dev) { - ret = -ENODEV; - goto release_init_lock; + atomic64_inc(&zram->stats.bd_writes); + zram_slot_lock(zram, index); + /* + * We release slot lock during writeback so slot can change under us: + * slot_free() or slot_free() and zram_write_page(). In both cases + * slot loses ZRAM_PP_SLOT flag. No concurrent post-processing can + * set ZRAM_PP_SLOT on such slots until current post-processing + * finishes. + */ + if (!zram_test_flag(zram, index, ZRAM_PP_SLOT)) { + zram_release_bdev_block(zram, req->blk_idx); + goto out; } - page = alloc_page(GFP_KERNEL); - if (!page) { - ret = -ENOMEM; - goto release_init_lock; + zram_free_page(zram, index); + zram_set_flag(zram, index, ZRAM_WB); + zram_set_handle(zram, index, req->blk_idx); + atomic64_inc(&zram->stats.pages_stored); + +out: + zram_slot_unlock(zram, index); + return 0; +} + +static void zram_writeback_endio(struct bio *bio) +{ + struct zram_wb_req *req = container_of(bio, struct zram_wb_req, bio); + struct zram_wb_ctl *wb_ctl = bio->bi_private; + unsigned long flags; + + spin_lock_irqsave(&wb_ctl->done_lock, flags); + list_add(&req->entry, &wb_ctl->done_reqs); + spin_unlock_irqrestore(&wb_ctl->done_lock, flags); + + wake_up(&wb_ctl->done_wait); +} + +static void zram_submit_wb_request(struct zram *zram, + struct zram_wb_ctl *wb_ctl, + struct zram_wb_req *req) +{ + /* + * wb_limit (if enabled) should be adjusted before submission, + * so that we don't over-submit. + */ + zram_account_writeback_submit(zram); + atomic_inc(&wb_ctl->num_inflight); + req->bio.bi_private = wb_ctl; + submit_bio(&req->bio); +} + +static int zram_complete_done_reqs(struct zram *zram, + struct zram_wb_ctl *wb_ctl) +{ + struct zram_wb_req *req; + unsigned long flags; + int ret = 0, err; + + while (atomic_read(&wb_ctl->num_inflight) > 0) { + spin_lock_irqsave(&wb_ctl->done_lock, flags); + req = list_first_entry_or_null(&wb_ctl->done_reqs, + struct zram_wb_req, entry); + if (req) + list_del(&req->entry); + spin_unlock_irqrestore(&wb_ctl->done_lock, flags); + + /* ->num_inflight > 0 doesn't mean we have done requests */ + if (!req) + break; + + err = zram_writeback_complete(zram, req); + if (err) + ret = err; + + atomic_dec(&wb_ctl->num_inflight); + release_pp_slot(zram, req->pps); + req->pps = NULL; + + list_add(&req->entry, &wb_ctl->idle_reqs); } - for (; nr_pages != 0; index++, nr_pages--) { - struct bio_vec bvec; + return ret; +} + +static struct zram_wb_req *zram_select_idle_req(struct zram_wb_ctl *wb_ctl) +{ + struct zram_wb_req *req; - bvec.bv_page = page; - bvec.bv_len = PAGE_SIZE; - bvec.bv_offset = 0; + req = list_first_entry_or_null(&wb_ctl->idle_reqs, + struct zram_wb_req, entry); + if (req) + list_del(&req->entry); + return req; +} - spin_lock(&zram->wb_limit_lock); +static int zram_writeback_slots(struct zram *zram, + struct zram_pp_ctl *ctl, + struct zram_wb_ctl *wb_ctl) +{ + unsigned long blk_idx = INVALID_BDEV_BLOCK; + struct zram_wb_req *req = NULL; + struct zram_pp_slot *pps; + int ret = 0, err = 0; + u32 index = 0; + + while ((pps = select_pp_slot(ctl))) { if (zram->wb_limit_enable && !zram->bd_wb_limit) { - spin_unlock(&zram->wb_limit_lock); ret = -EIO; break; } - spin_unlock(&zram->wb_limit_lock); - if (!blk_idx) { - blk_idx = alloc_block_bdev(zram); - if (!blk_idx) { + while (!req) { + req = zram_select_idle_req(wb_ctl); + if (req) + break; + + wait_event(wb_ctl->done_wait, + !list_empty(&wb_ctl->done_reqs)); + + err = zram_complete_done_reqs(zram, wb_ctl); + /* + * BIO errors are not fatal, we continue and simply + * attempt to writeback the remaining objects (pages). + * At the same time we need to signal user-space that + * some writes (at least one, but also could be all of + * them) were not successful and we do so by returning + * the most recent BIO error. + */ + if (err) + ret = err; + } + + if (blk_idx == INVALID_BDEV_BLOCK) { + blk_idx = zram_reserve_bdev_block(zram); + if (blk_idx == INVALID_BDEV_BLOCK) { ret = -ENOSPC; break; } } + index = pps->index; + zram_slot_lock(zram, index); + /* + * scan_slots() sets ZRAM_PP_SLOT and releases slot lock, so + * slots can change in the meantime. If slots are accessed or + * freed they lose ZRAM_PP_SLOT flag and hence we don't + * post-process them. + */ + if (!zram_test_flag(zram, index, ZRAM_PP_SLOT)) + goto next; + if (zram_read_from_zspool(zram, req->page, index)) + goto next; + zram_slot_unlock(zram, index); + + /* + * From now on pp-slot is owned by the req, remove it from + * its pp bucket. + */ + list_del_init(&pps->entry); + + req->blk_idx = blk_idx; + req->pps = pps; + bio_init(&req->bio, zram->bdev, &req->bio_vec, 1, REQ_OP_WRITE); + req->bio.bi_iter.bi_sector = req->blk_idx * (PAGE_SIZE >> 9); + req->bio.bi_end_io = zram_writeback_endio; + __bio_add_page(&req->bio, req->page, PAGE_SIZE, 0); + + zram_submit_wb_request(zram, wb_ctl, req); + blk_idx = INVALID_BDEV_BLOCK; + req = NULL; + cond_resched(); + continue; + +next: + zram_slot_unlock(zram, index); + release_pp_slot(zram, pps); + } + + /* + * Selected idle req, but never submitted it due to some error or + * wb limit. + */ + if (req) + release_wb_req(req); + + while (atomic_read(&wb_ctl->num_inflight) > 0) { + wait_event(wb_ctl->done_wait, !list_empty(&wb_ctl->done_reqs)); + err = zram_complete_done_reqs(zram, wb_ctl); + if (err) + ret = err; + } + + return ret; +} + +#define PAGE_WRITEBACK 0 +#define HUGE_WRITEBACK (1 << 0) +#define IDLE_WRITEBACK (1 << 1) +#define INCOMPRESSIBLE_WRITEBACK (1 << 2) + +static int parse_page_index(char *val, unsigned long nr_pages, + unsigned long *lo, unsigned long *hi) +{ + int ret; + + ret = kstrtoul(val, 10, lo); + if (ret) + return ret; + if (*lo >= nr_pages) + return -ERANGE; + *hi = *lo + 1; + return 0; +} + +static int parse_page_indexes(char *val, unsigned long nr_pages, + unsigned long *lo, unsigned long *hi) +{ + char *delim; + int ret; + + delim = strchr(val, '-'); + if (!delim) + return -EINVAL; + + *delim = 0x00; + ret = kstrtoul(val, 10, lo); + if (ret) + return ret; + if (*lo >= nr_pages) + return -ERANGE; + + ret = kstrtoul(delim + 1, 10, hi); + if (ret) + return ret; + if (*hi >= nr_pages || *lo > *hi) + return -ERANGE; + *hi += 1; + return 0; +} + +static int parse_mode(char *val, u32 *mode) +{ + *mode = 0; + + if (!strcmp(val, "idle")) + *mode = IDLE_WRITEBACK; + if (!strcmp(val, "huge")) + *mode = HUGE_WRITEBACK; + if (!strcmp(val, "huge_idle")) + *mode = IDLE_WRITEBACK | HUGE_WRITEBACK; + if (!strcmp(val, "incompressible")) + *mode = INCOMPRESSIBLE_WRITEBACK; + + if (*mode == 0) + return -EINVAL; + return 0; +} + +static int scan_slots_for_writeback(struct zram *zram, u32 mode, + unsigned long lo, unsigned long hi, + struct zram_pp_ctl *ctl) +{ + u32 index = lo; + + while (index < hi) { + bool ok = true; + zram_slot_lock(zram, index); if (!zram_allocated(zram, index)) goto next; if (zram_test_flag(zram, index, ZRAM_WB) || - zram_test_flag(zram, index, ZRAM_SAME) || - zram_test_flag(zram, index, ZRAM_UNDER_WB)) + zram_test_flag(zram, index, ZRAM_SAME)) goto next; if (mode & IDLE_WRITEBACK && @@ -742,86 +1185,129 @@ static ssize_t writeback_store(struct device *dev, !zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE)) goto next; - /* - * Clearing ZRAM_UNDER_WB is duty of caller. - * IOW, zram_free_page never clear it. - */ - zram_set_flag(zram, index, ZRAM_UNDER_WB); - /* Need for hugepage writeback racing */ - zram_set_flag(zram, index, ZRAM_IDLE); + ok = place_pp_slot(zram, ctl, index); +next: zram_slot_unlock(zram, index); - if (zram_bvec_read(zram, &bvec, index, 0, NULL)) { - zram_slot_lock(zram, index); - zram_clear_flag(zram, index, ZRAM_UNDER_WB); - zram_clear_flag(zram, index, ZRAM_IDLE); - zram_slot_unlock(zram, index); - continue; - } + if (!ok) + break; + index++; + } + + return 0; +} + +static ssize_t writeback_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct zram *zram = dev_to_zram(dev); + u64 nr_pages = zram->disksize >> PAGE_SHIFT; + unsigned long lo = 0, hi = nr_pages; + struct zram_pp_ctl *pp_ctl = NULL; + struct zram_wb_ctl *wb_ctl = NULL; + char *args, *param, *val; + ssize_t ret = len; + int err, mode = 0; - bio_init(&bio, zram->bdev, &bio_vec, 1, - REQ_OP_WRITE | REQ_SYNC); - bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9); + down_read(&zram->init_lock); + if (!init_done(zram)) { + up_read(&zram->init_lock); + return -EINVAL; + } + + /* Do not permit concurrent post-processing actions. */ + if (atomic_xchg(&zram->pp_in_progress, 1)) { + up_read(&zram->init_lock); + return -EAGAIN; + } + + if (!zram->backing_dev) { + ret = -ENODEV; + goto release_init_lock; + } + + pp_ctl = init_pp_ctl(); + if (!pp_ctl) { + ret = -ENOMEM; + goto release_init_lock; + } + + wb_ctl = init_wb_ctl(zram); + if (!wb_ctl) { + ret = -ENOMEM; + goto release_init_lock; + } + + args = skip_spaces(buf); + while (*args) { + args = next_arg(args, ¶m, &val); - bio_add_page(&bio, bvec.bv_page, bvec.bv_len, - bvec.bv_offset); /* - * XXX: A single page IO would be inefficient for write - * but it would be not bad as starter. + * Workaround to support the old writeback interface. + * + * The old writeback interface has a minor inconsistency and + * requires key=value only for page_index parameter, while the + * writeback mode is a valueless parameter. + * + * This is not the case anymore and now all parameters are + * required to have values, however, we need to support the + * legacy writeback interface format so we check if we can + * recognize a valueless parameter as the (legacy) writeback + * mode. */ - err = submit_bio_wait(&bio); - if (err) { - zram_slot_lock(zram, index); - zram_clear_flag(zram, index, ZRAM_UNDER_WB); - zram_clear_flag(zram, index, ZRAM_IDLE); - zram_slot_unlock(zram, index); - /* - * BIO errors are not fatal, we continue and simply - * attempt to writeback the remaining objects (pages). - * At the same time we need to signal user-space that - * some writes (at least one, but also could be all of - * them) were not successful and we do so by returning - * the most recent BIO error. - */ - ret = err; - continue; + if (!val || !*val) { + err = parse_mode(param, &mode); + if (err) { + ret = err; + goto release_init_lock; + } + + scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl); + break; } - atomic64_inc(&zram->stats.bd_writes); - /* - * We released zram_slot_lock so need to check if the slot was - * changed. If there is freeing for the slot, we can catch it - * easily by zram_allocated. - * A subtle case is the slot is freed/reallocated/marked as - * ZRAM_IDLE again. To close the race, idle_store doesn't - * mark ZRAM_IDLE once it found the slot was ZRAM_UNDER_WB. - * Thus, we could close the race by checking ZRAM_IDLE bit. - */ - zram_slot_lock(zram, index); - if (!zram_allocated(zram, index) || - !zram_test_flag(zram, index, ZRAM_IDLE)) { - zram_clear_flag(zram, index, ZRAM_UNDER_WB); - zram_clear_flag(zram, index, ZRAM_IDLE); - goto next; + if (!strcmp(param, "type")) { + err = parse_mode(val, &mode); + if (err) { + ret = err; + goto release_init_lock; + } + + scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl); + break; } - zram_free_page(zram, index); - zram_clear_flag(zram, index, ZRAM_UNDER_WB); - zram_set_flag(zram, index, ZRAM_WB); - zram_set_element(zram, index, blk_idx); - blk_idx = 0; - atomic64_inc(&zram->stats.pages_stored); - spin_lock(&zram->wb_limit_lock); - if (zram->wb_limit_enable && zram->bd_wb_limit > 0) - zram->bd_wb_limit -= 1UL << (PAGE_SHIFT - 12); - spin_unlock(&zram->wb_limit_lock); -next: - zram_slot_unlock(zram, index); + if (!strcmp(param, "page_index")) { + err = parse_page_index(val, nr_pages, &lo, &hi); + if (err) { + ret = err; + goto release_init_lock; + } + + scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl); + continue; + } + + if (!strcmp(param, "page_indexes")) { + err = parse_page_indexes(val, nr_pages, &lo, &hi); + if (err) { + ret = err; + goto release_init_lock; + } + + scan_slots_for_writeback(zram, mode, lo, hi, pp_ctl); + continue; + } } - if (blk_idx) - free_block_bdev(zram, blk_idx); - __free_page(page); + err = zram_writeback_slots(zram, pp_ctl, wb_ctl); + if (err) + ret = err; + release_init_lock: + release_pp_ctl(zram, pp_ctl); + release_wb_ctl(wb_ctl); + atomic_set(&zram->pp_in_progress, 0); up_read(&zram->init_lock); return ret; @@ -831,19 +1317,20 @@ struct zram_work { struct work_struct work; struct zram *zram; unsigned long entry; - struct bio *bio; - struct bio_vec bvec; + struct page *page; + int error; }; -#if PAGE_SIZE != 4096 static void zram_sync_read(struct work_struct *work) { struct zram_work *zw = container_of(work, struct zram_work, work); - struct zram *zram = zw->zram; - unsigned long entry = zw->entry; - struct bio *bio = zw->bio; + struct bio_vec bv; + struct bio bio; - read_from_bdev_async(zram, &zw->bvec, entry, bio); + bio_init(&bio, zw->zram->bdev, &bv, 1, REQ_OP_READ); + bio.bi_iter.bi_sector = zw->entry * (PAGE_SIZE >> 9); + __bio_add_page(&bio, zw->page, PAGE_SIZE, 0); + zw->error = submit_bio_wait(&bio); } /* @@ -851,50 +1338,46 @@ static void zram_sync_read(struct work_struct *work) * chained IO with parent IO in same context, it's a deadlock. To avoid that, * use a worker thread context. */ -static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec, - unsigned long entry, struct bio *bio) +static int read_from_bdev_sync(struct zram *zram, struct page *page, + unsigned long entry) { struct zram_work work; - work.bvec = *bvec; + work.page = page; work.zram = zram; work.entry = entry; - work.bio = bio; INIT_WORK_ONSTACK(&work.work, zram_sync_read); - queue_work(system_unbound_wq, &work.work); + queue_work(system_dfl_wq, &work.work); flush_work(&work.work); destroy_work_on_stack(&work.work); - return 1; -} -#else -static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec, - unsigned long entry, struct bio *bio) -{ - WARN_ON(1); - return -EIO; + return work.error; } -#endif -static int read_from_bdev(struct zram *zram, struct bio_vec *bvec, - unsigned long entry, struct bio *parent, bool sync) +static int read_from_bdev(struct zram *zram, struct page *page, + unsigned long entry, struct bio *parent) { atomic64_inc(&zram->stats.bd_reads); - if (sync) - return read_from_bdev_sync(zram, bvec, entry, parent); - else - return read_from_bdev_async(zram, bvec, entry, parent); + if (!parent) { + if (WARN_ON_ONCE(!IS_ENABLED(ZRAM_PARTIAL_IO))) + return -EIO; + return read_from_bdev_sync(zram, page, entry); + } + read_from_bdev_async(zram, page, entry, parent); + return 0; } #else static inline void reset_bdev(struct zram *zram) {}; -static int read_from_bdev(struct zram *zram, struct bio_vec *bvec, - unsigned long entry, struct bio *parent, bool sync) +static int read_from_bdev(struct zram *zram, struct page *page, + unsigned long entry, struct bio *parent) { return -EIO; } -static void free_block_bdev(struct zram *zram, unsigned long blk_idx) {}; +static void zram_release_bdev_block(struct zram *zram, unsigned long blk_idx) +{ +} #endif #ifdef CONFIG_ZRAM_MEMORY_TRACKING @@ -911,12 +1394,6 @@ static void zram_debugfs_destroy(void) debugfs_remove_recursive(zram_debugfs_root); } -static void zram_accessed(struct zram *zram, u32 index) -{ - zram_clear_flag(zram, index, ZRAM_IDLE); - zram->table[index].ac_time = ktime_get_boottime(); -} - static ssize_t read_block_state(struct file *file, char __user *buf, size_t count, loff_t *ppos) { @@ -1000,35 +1477,10 @@ static void zram_debugfs_unregister(struct zram *zram) #else static void zram_debugfs_create(void) {}; static void zram_debugfs_destroy(void) {}; -static void zram_accessed(struct zram *zram, u32 index) -{ - zram_clear_flag(zram, index, ZRAM_IDLE); -}; static void zram_debugfs_register(struct zram *zram) {}; static void zram_debugfs_unregister(struct zram *zram) {}; #endif -/* - * We switched to per-cpu streams and this attr is not needed anymore. - * However, we will keep it around for some time, because: - * a) we may revert per-cpu streams in the future - * b) it's visible to user space and we need to follow our 2 years - * retirement rule; but we already have a number of 'soon to be - * altered' attrs, so max_comp_streams need to wait for the next - * layoff cycle. - */ -static ssize_t max_comp_streams_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - return scnprintf(buf, PAGE_SIZE, "%d\n", num_online_cpus()); -} - -static ssize_t max_comp_streams_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t len) -{ - return len; -} - static void comp_algorithm_set(struct zram *zram, u32 prio, const char *alg) { /* Do not free statically defined compression algorithms */ @@ -1038,24 +1490,13 @@ static void comp_algorithm_set(struct zram *zram, u32 prio, const char *alg) zram->comp_algs[prio] = alg; } -static ssize_t __comp_algorithm_show(struct zram *zram, u32 prio, char *buf) -{ - ssize_t sz; - - down_read(&zram->init_lock); - sz = zcomp_available_show(zram->comp_algs[prio], buf); - up_read(&zram->init_lock); - - return sz; -} - static int __comp_algorithm_store(struct zram *zram, u32 prio, const char *buf) { char *compressor; size_t sz; sz = strlen(buf); - if (sz >= CRYPTO_MAX_ALG_NAME) + if (sz >= ZRAM_MAX_ALGO_NAME_SZ) return -E2BIG; compressor = kstrdup(buf, GFP_KERNEL); @@ -1084,13 +1525,127 @@ static int __comp_algorithm_store(struct zram *zram, u32 prio, const char *buf) return 0; } +static void comp_params_reset(struct zram *zram, u32 prio) +{ + struct zcomp_params *params = &zram->params[prio]; + + vfree(params->dict); + params->level = ZCOMP_PARAM_NOT_SET; + params->deflate.winbits = ZCOMP_PARAM_NOT_SET; + params->dict_sz = 0; + params->dict = NULL; +} + +static int comp_params_store(struct zram *zram, u32 prio, s32 level, + const char *dict_path, + struct deflate_params *deflate_params) +{ + ssize_t sz = 0; + + comp_params_reset(zram, prio); + + if (dict_path) { + sz = kernel_read_file_from_path(dict_path, 0, + &zram->params[prio].dict, + INT_MAX, + NULL, + READING_POLICY); + if (sz < 0) + return -EINVAL; + } + + zram->params[prio].dict_sz = sz; + zram->params[prio].level = level; + zram->params[prio].deflate.winbits = deflate_params->winbits; + return 0; +} + +static ssize_t algorithm_params_store(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t len) +{ + s32 prio = ZRAM_PRIMARY_COMP, level = ZCOMP_PARAM_NOT_SET; + char *args, *param, *val, *algo = NULL, *dict_path = NULL; + struct deflate_params deflate_params; + struct zram *zram = dev_to_zram(dev); + int ret; + + deflate_params.winbits = ZCOMP_PARAM_NOT_SET; + + args = skip_spaces(buf); + while (*args) { + args = next_arg(args, ¶m, &val); + + if (!val || !*val) + return -EINVAL; + + if (!strcmp(param, "priority")) { + ret = kstrtoint(val, 10, &prio); + if (ret) + return ret; + continue; + } + + if (!strcmp(param, "level")) { + ret = kstrtoint(val, 10, &level); + if (ret) + return ret; + continue; + } + + if (!strcmp(param, "algo")) { + algo = val; + continue; + } + + if (!strcmp(param, "dict")) { + dict_path = val; + continue; + } + + if (!strcmp(param, "deflate.winbits")) { + ret = kstrtoint(val, 10, &deflate_params.winbits); + if (ret) + return ret; + continue; + } + } + + /* Lookup priority by algorithm name */ + if (algo) { + s32 p; + + prio = -EINVAL; + for (p = ZRAM_PRIMARY_COMP; p < ZRAM_MAX_COMPS; p++) { + if (!zram->comp_algs[p]) + continue; + + if (!strcmp(zram->comp_algs[p], algo)) { + prio = p; + break; + } + } + } + + if (prio < ZRAM_PRIMARY_COMP || prio >= ZRAM_MAX_COMPS) + return -EINVAL; + + ret = comp_params_store(zram, prio, level, dict_path, &deflate_params); + return ret ? ret : len; +} + static ssize_t comp_algorithm_show(struct device *dev, struct device_attribute *attr, char *buf) { struct zram *zram = dev_to_zram(dev); + ssize_t sz; - return __comp_algorithm_show(zram, ZRAM_PRIMARY_COMP, buf); + down_read(&zram->init_lock); + sz = zcomp_available_show(zram->comp_algs[ZRAM_PRIMARY_COMP], buf, 0); + up_read(&zram->init_lock); + return sz; } static ssize_t comp_algorithm_store(struct device *dev, @@ -1114,14 +1669,15 @@ static ssize_t recomp_algorithm_show(struct device *dev, ssize_t sz = 0; u32 prio; + down_read(&zram->init_lock); for (prio = ZRAM_SECONDARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { if (!zram->comp_algs[prio]) continue; - sz += scnprintf(buf + sz, PAGE_SIZE - sz - 2, "#%d: ", prio); - sz += __comp_algorithm_show(zram, prio, buf + sz); + sz += sysfs_emit_at(buf, sz, "#%d: ", prio); + sz += zcomp_available_show(zram->comp_algs[prio], buf, sz); } - + up_read(&zram->init_lock); return sz; } @@ -1140,7 +1696,7 @@ static ssize_t recomp_algorithm_store(struct device *dev, while (*args) { args = next_arg(args, ¶m, &val); - if (!*val) + if (!val || !*val) return -EINVAL; if (!strcmp(param, "algo")) { @@ -1191,11 +1747,10 @@ static ssize_t io_stat_show(struct device *dev, ssize_t ret; down_read(&zram->init_lock); - ret = scnprintf(buf, PAGE_SIZE, - "%8llu %8llu %8llu %8llu\n", + ret = sysfs_emit(buf, + "%8llu %8llu 0 %8llu\n", (u64)atomic64_read(&zram->stats.failed_reads), (u64)atomic64_read(&zram->stats.failed_writes), - (u64)atomic64_read(&zram->stats.invalid_io), (u64)atomic64_read(&zram->stats.notify_free)); up_read(&zram->init_lock); @@ -1222,7 +1777,7 @@ static ssize_t mm_stat_show(struct device *dev, orig_size = atomic64_read(&zram->stats.pages_stored); max_used = atomic_long_read(&zram->stats.max_used_pages); - ret = scnprintf(buf, PAGE_SIZE, + ret = sysfs_emit(buf, "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu %8llu\n", orig_size << PAGE_SHIFT, (u64)atomic64_read(&zram->stats.compr_data_size), @@ -1247,8 +1802,8 @@ static ssize_t bd_stat_show(struct device *dev, ssize_t ret; down_read(&zram->init_lock); - ret = scnprintf(buf, PAGE_SIZE, - "%8llu %8llu %8llu\n", + ret = sysfs_emit(buf, + "%8llu %8llu %8llu\n", FOUR_K((u64)atomic64_read(&zram->stats.bd_count)), FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)), FOUR_K((u64)atomic64_read(&zram->stats.bd_writes))); @@ -1266,10 +1821,9 @@ static ssize_t debug_stat_show(struct device *dev, ssize_t ret; down_read(&zram->init_lock); - ret = scnprintf(buf, PAGE_SIZE, - "version: %d\n%8llu %8llu\n", + ret = sysfs_emit(buf, + "version: %d\n0 %8llu\n", version, - (u64)atomic64_read(&zram->stats.writestall), (u64)atomic64_read(&zram->stats.miss_free)); up_read(&zram->init_lock); @@ -1288,17 +1842,21 @@ static void zram_meta_free(struct zram *zram, u64 disksize) size_t num_pages = disksize >> PAGE_SHIFT; size_t index; + if (!zram->table) + return; + /* Free all pages that are still in this zram device */ for (index = 0; index < num_pages; index++) zram_free_page(zram, index); zs_destroy_pool(zram->mem_pool); vfree(zram->table); + zram->table = NULL; } static bool zram_meta_alloc(struct zram *zram, u64 disksize) { - size_t num_pages; + size_t num_pages, index; num_pages = disksize >> PAGE_SHIFT; zram->table = vzalloc(array_size(num_pages, sizeof(*zram->table))); @@ -1308,42 +1866,40 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize) zram->mem_pool = zs_create_pool(zram->disk->disk_name); if (!zram->mem_pool) { vfree(zram->table); + zram->table = NULL; return false; } if (!huge_class_size) huge_class_size = zs_huge_class_size(zram->mem_pool); + + for (index = 0; index < num_pages; index++) + zram_slot_lock_init(zram, index); + return true; } -/* - * To protect concurrent access to the same index entry, - * caller should hold this table index entry's bit_spinlock to - * indicate this index entry is accessing. - */ static void zram_free_page(struct zram *zram, size_t index) { unsigned long handle; -#ifdef CONFIG_ZRAM_MEMORY_TRACKING +#ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME zram->table[index].ac_time = 0; #endif - if (zram_test_flag(zram, index, ZRAM_IDLE)) - zram_clear_flag(zram, index, ZRAM_IDLE); + + zram_clear_flag(zram, index, ZRAM_IDLE); + zram_clear_flag(zram, index, ZRAM_INCOMPRESSIBLE); + zram_clear_flag(zram, index, ZRAM_PP_SLOT); + zram_set_priority(zram, index, 0); if (zram_test_flag(zram, index, ZRAM_HUGE)) { zram_clear_flag(zram, index, ZRAM_HUGE); atomic64_dec(&zram->stats.huge_pages); } - if (zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE)) - zram_clear_flag(zram, index, ZRAM_INCOMPRESSIBLE); - - zram_set_priority(zram, index, 0); - if (zram_test_flag(zram, index, ZRAM_WB)) { zram_clear_flag(zram, index, ZRAM_WB); - free_block_bdev(zram, zram_get_element(zram, index)); + zram_release_bdev_block(zram, zram_get_handle(zram, index)); goto out; } @@ -1364,83 +1920,82 @@ static void zram_free_page(struct zram *zram, size_t index) zs_free(zram->mem_pool, handle); atomic64_sub(zram_get_obj_size(zram, index), - &zram->stats.compr_data_size); + &zram->stats.compr_data_size); out: atomic64_dec(&zram->stats.pages_stored); zram_set_handle(zram, index, 0); zram_set_obj_size(zram, index, 0); - WARN_ON_ONCE(zram->table[index].flags & - ~(1UL << ZRAM_LOCK | 1UL << ZRAM_UNDER_WB)); } -/* - * Reads a page from the writeback devices. Corresponding ZRAM slot - * should be unlocked. - */ -static int zram_bvec_read_from_bdev(struct zram *zram, struct page *page, - u32 index, struct bio *bio, bool partial_io) +static int read_same_filled_page(struct zram *zram, struct page *page, + u32 index) { - struct bio_vec bvec = { - .bv_page = page, - .bv_len = PAGE_SIZE, - .bv_offset = 0, - }; + void *mem; - return read_from_bdev(zram, &bvec, zram_get_element(zram, index), bio, - partial_io); + mem = kmap_local_page(page); + zram_fill_page(mem, PAGE_SIZE, zram_get_handle(zram, index)); + kunmap_local(mem); + return 0; } -/* - * Reads (decompresses if needed) a page from zspool (zsmalloc). - * Corresponding ZRAM slot should be locked. - */ -static int zram_read_from_zspool(struct zram *zram, struct page *page, - u32 index) +static int read_incompressible_page(struct zram *zram, struct page *page, + u32 index) +{ + unsigned long handle; + void *src, *dst; + + handle = zram_get_handle(zram, index); + src = zs_obj_read_begin(zram->mem_pool, handle, NULL); + dst = kmap_local_page(page); + copy_page(dst, src); + kunmap_local(dst); + zs_obj_read_end(zram->mem_pool, handle, src); + + return 0; +} + +static int read_compressed_page(struct zram *zram, struct page *page, u32 index) { struct zcomp_strm *zstrm; unsigned long handle; unsigned int size; void *src, *dst; - u32 prio; - int ret; + int ret, prio; handle = zram_get_handle(zram, index); - if (!handle || zram_test_flag(zram, index, ZRAM_SAME)) { - unsigned long value; - void *mem; - - value = handle ? zram_get_element(zram, index) : 0; - mem = kmap_atomic(page); - zram_fill_page(mem, PAGE_SIZE, value); - kunmap_atomic(mem); - return 0; - } - size = zram_get_obj_size(zram, index); + prio = zram_get_priority(zram, index); - if (size != PAGE_SIZE) { - prio = zram_get_priority(zram, index); - zstrm = zcomp_stream_get(zram->comps[prio]); - } + zstrm = zcomp_stream_get(zram->comps[prio]); + src = zs_obj_read_begin(zram->mem_pool, handle, zstrm->local_copy); + dst = kmap_local_page(page); + ret = zcomp_decompress(zram->comps[prio], zstrm, src, size, dst); + kunmap_local(dst); + zs_obj_read_end(zram->mem_pool, handle, src); + zcomp_stream_put(zstrm); - src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO); - if (size == PAGE_SIZE) { - dst = kmap_atomic(page); - memcpy(dst, src, PAGE_SIZE); - kunmap_atomic(dst); - ret = 0; - } else { - dst = kmap_atomic(page); - ret = zcomp_decompress(zstrm, src, size, dst); - kunmap_atomic(dst); - zcomp_stream_put(zram->comps[prio]); - } - zs_unmap_object(zram->mem_pool, handle); return ret; } -static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, - struct bio *bio, bool partial_io) +/* + * Reads (decompresses if needed) a page from zspool (zsmalloc). + * Corresponding ZRAM slot should be locked. + */ +static int zram_read_from_zspool(struct zram *zram, struct page *page, + u32 index) +{ + if (zram_test_flag(zram, index, ZRAM_SAME) || + !zram_get_handle(zram, index)) + return read_same_filled_page(zram, page, index); + + if (!zram_test_flag(zram, index, ZRAM_HUGE)) + return read_compressed_page(zram, page, index); + else + return read_incompressible_page(zram, page, index); +} + +static int zram_read_page(struct zram *zram, struct page *page, u32 index, + struct bio *parent) { int ret; @@ -1450,15 +2005,14 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, ret = zram_read_from_zspool(zram, page, index); zram_slot_unlock(zram, index); } else { - /* Slot should be unlocked before the function call */ - zram_slot_unlock(zram, index); - - /* A null bio means rw_page was used, we must fallback to bio */ - if (!bio) - return -EOPNOTSUPP; + unsigned long blk_idx = zram_get_handle(zram, index); - ret = zram_bvec_read_from_bdev(zram, page, index, bio, - partial_io); + /* + * The slot should be unlocked before reading from the backing + * device. + */ + zram_slot_unlock(zram, index); + ret = read_from_bdev(zram, page, blk_idx, parent); } /* Should NEVER happen. Return bio error if it does. */ @@ -1468,203 +2022,225 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, return ret; } +/* + * Use a temporary buffer to decompress the page, as the decompressor + * always expects a full page for the output. + */ +static int zram_bvec_read_partial(struct zram *zram, struct bio_vec *bvec, + u32 index, int offset) +{ + struct page *page = alloc_page(GFP_NOIO); + int ret; + + if (!page) + return -ENOMEM; + ret = zram_read_page(zram, page, index, NULL); + if (likely(!ret)) + memcpy_to_bvec(bvec, page_address(page) + offset); + __free_page(page); + return ret; +} + static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, u32 index, int offset, struct bio *bio) { - int ret; - struct page *page; + if (is_partial_io(bvec)) + return zram_bvec_read_partial(zram, bvec, index, offset); + return zram_read_page(zram, bvec->bv_page, index, bio); +} - page = bvec->bv_page; - if (is_partial_io(bvec)) { - /* Use a temporary buffer to decompress the page */ - page = alloc_page(GFP_NOIO|__GFP_HIGHMEM); - if (!page) - return -ENOMEM; - } +static int write_same_filled_page(struct zram *zram, unsigned long fill, + u32 index) +{ + zram_slot_lock(zram, index); + zram_free_page(zram, index); + zram_set_flag(zram, index, ZRAM_SAME); + zram_set_handle(zram, index, fill); + zram_slot_unlock(zram, index); - ret = __zram_bvec_read(zram, page, index, bio, is_partial_io(bvec)); - if (unlikely(ret)) - goto out; + atomic64_inc(&zram->stats.same_pages); + atomic64_inc(&zram->stats.pages_stored); + + return 0; +} + +static int write_incompressible_page(struct zram *zram, struct page *page, + u32 index) +{ + unsigned long handle; + void *src; - if (is_partial_io(bvec)) { - void *src = kmap_atomic(page); + /* + * This function is called from preemptible context so we don't need + * to do optimistic and fallback to pessimistic handle allocation, + * like we do for compressible pages. + */ + handle = zs_malloc(zram->mem_pool, PAGE_SIZE, + GFP_NOIO | __GFP_NOWARN | + __GFP_HIGHMEM | __GFP_MOVABLE, page_to_nid(page)); + if (IS_ERR_VALUE(handle)) + return PTR_ERR((void *)handle); - memcpy_to_bvec(bvec, src + offset); - kunmap_atomic(src); + if (!zram_can_store_page(zram)) { + zs_free(zram->mem_pool, handle); + return -ENOMEM; } -out: - if (is_partial_io(bvec)) - __free_page(page); - return ret; + src = kmap_local_page(page); + zs_obj_write(zram->mem_pool, handle, src, PAGE_SIZE); + kunmap_local(src); + + zram_slot_lock(zram, index); + zram_free_page(zram, index); + zram_set_flag(zram, index, ZRAM_HUGE); + zram_set_handle(zram, index, handle); + zram_set_obj_size(zram, index, PAGE_SIZE); + zram_slot_unlock(zram, index); + + atomic64_add(PAGE_SIZE, &zram->stats.compr_data_size); + atomic64_inc(&zram->stats.huge_pages); + atomic64_inc(&zram->stats.huge_pages_since); + atomic64_inc(&zram->stats.pages_stored); + + return 0; } -static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, - u32 index, struct bio *bio) +static int zram_write_page(struct zram *zram, struct page *page, u32 index) { int ret = 0; - unsigned long alloced_pages; - unsigned long handle = -ENOMEM; - unsigned int comp_len = 0; - void *src, *dst, *mem; + unsigned long handle; + unsigned int comp_len; + void *mem; struct zcomp_strm *zstrm; - struct page *page = bvec->bv_page; - unsigned long element = 0; - enum zram_pageflags flags = 0; - - mem = kmap_atomic(page); - if (page_same_filled(mem, &element)) { - kunmap_atomic(mem); - /* Free memory associated with this sector now. */ - flags = ZRAM_SAME; - atomic64_inc(&zram->stats.same_pages); - goto out; - } - kunmap_atomic(mem); + unsigned long element; + bool same_filled; + + mem = kmap_local_page(page); + same_filled = page_same_filled(mem, &element); + kunmap_local(mem); + if (same_filled) + return write_same_filled_page(zram, element, index); -compress_again: zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]); - src = kmap_atomic(page); - ret = zcomp_compress(zstrm, src, &comp_len); - kunmap_atomic(src); + mem = kmap_local_page(page); + ret = zcomp_compress(zram->comps[ZRAM_PRIMARY_COMP], zstrm, + mem, &comp_len); + kunmap_local(mem); if (unlikely(ret)) { - zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]); + zcomp_stream_put(zstrm); pr_err("Compression failed! err=%d\n", ret); - zs_free(zram->mem_pool, handle); return ret; } - if (comp_len >= huge_class_size) - comp_len = PAGE_SIZE; - /* - * handle allocation has 2 paths: - * a) fast path is executed with preemption disabled (for - * per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear, - * since we can't sleep; - * b) slow path enables preemption and attempts to allocate - * the page with __GFP_DIRECT_RECLAIM bit set. we have to - * put per-cpu compression stream and, thus, to re-do - * the compression once handle is allocated. - * - * if we have a 'non-null' handle here then we are coming - * from the slow path and handle has already been allocated. - */ - if (IS_ERR_VALUE(handle)) - handle = zs_malloc(zram->mem_pool, comp_len, - __GFP_KSWAPD_RECLAIM | - __GFP_NOWARN | - __GFP_HIGHMEM | - __GFP_MOVABLE); - if (IS_ERR_VALUE(handle)) { - zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]); - atomic64_inc(&zram->stats.writestall); - handle = zs_malloc(zram->mem_pool, comp_len, - GFP_NOIO | __GFP_HIGHMEM | - __GFP_MOVABLE); - if (IS_ERR_VALUE(handle)) - return PTR_ERR((void *)handle); - - if (comp_len != PAGE_SIZE) - goto compress_again; - /* - * If the page is not compressible, you need to acquire the - * lock and execute the code below. The zcomp_stream_get() - * call is needed to disable the cpu hotplug and grab the - * zstrm buffer back. It is necessary that the dereferencing - * of the zstrm variable below occurs correctly. - */ - zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]); + if (comp_len >= huge_class_size) { + zcomp_stream_put(zstrm); + return write_incompressible_page(zram, page, index); } - alloced_pages = zs_get_total_pages(zram->mem_pool); - update_used_max(zram, alloced_pages); + handle = zs_malloc(zram->mem_pool, comp_len, + GFP_NOIO | __GFP_NOWARN | + __GFP_HIGHMEM | __GFP_MOVABLE, page_to_nid(page)); + if (IS_ERR_VALUE(handle)) { + zcomp_stream_put(zstrm); + return PTR_ERR((void *)handle); + } - if (zram->limit_pages && alloced_pages > zram->limit_pages) { - zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]); + if (!zram_can_store_page(zram)) { + zcomp_stream_put(zstrm); zs_free(zram->mem_pool, handle); return -ENOMEM; } - dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO); - - src = zstrm->buffer; - if (comp_len == PAGE_SIZE) - src = kmap_atomic(page); - memcpy(dst, src, comp_len); - if (comp_len == PAGE_SIZE) - kunmap_atomic(src); + zs_obj_write(zram->mem_pool, handle, zstrm->buffer, comp_len); + zcomp_stream_put(zstrm); - zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]); - zs_unmap_object(zram->mem_pool, handle); - atomic64_add(comp_len, &zram->stats.compr_data_size); -out: - /* - * Free memory associated with this sector - * before overwriting unused sectors. - */ zram_slot_lock(zram, index); zram_free_page(zram, index); - - if (comp_len == PAGE_SIZE) { - zram_set_flag(zram, index, ZRAM_HUGE); - atomic64_inc(&zram->stats.huge_pages); - atomic64_inc(&zram->stats.huge_pages_since); - } - - if (flags) { - zram_set_flag(zram, index, flags); - zram_set_element(zram, index, element); - } else { - zram_set_handle(zram, index, handle); - zram_set_obj_size(zram, index, comp_len); - } + zram_set_handle(zram, index, handle); + zram_set_obj_size(zram, index, comp_len); zram_slot_unlock(zram, index); /* Update stats */ atomic64_inc(&zram->stats.pages_stored); + atomic64_add(comp_len, &zram->stats.compr_data_size); + return ret; } -static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, - u32 index, int offset, struct bio *bio) +/* + * This is a partial IO. Read the full page before writing the changes. + */ +static int zram_bvec_write_partial(struct zram *zram, struct bio_vec *bvec, + u32 index, int offset, struct bio *bio) { + struct page *page = alloc_page(GFP_NOIO); int ret; - struct page *page = NULL; - struct bio_vec vec; - - vec = *bvec; - if (is_partial_io(bvec)) { - void *dst; - /* - * This is a partial IO. We need to read the full page - * before to write the changes. - */ - page = alloc_page(GFP_NOIO|__GFP_HIGHMEM); - if (!page) - return -ENOMEM; - ret = __zram_bvec_read(zram, page, index, bio, true); - if (ret) - goto out; - - dst = kmap_atomic(page); - memcpy_from_bvec(dst + offset, bvec); - kunmap_atomic(dst); + if (!page) + return -ENOMEM; - vec.bv_page = page; - vec.bv_len = PAGE_SIZE; - vec.bv_offset = 0; + ret = zram_read_page(zram, page, index, bio); + if (!ret) { + memcpy_from_bvec(page_address(page) + offset, bvec); + ret = zram_write_page(zram, page, index); } + __free_page(page); + return ret; +} - ret = __zram_bvec_write(zram, &vec, index, bio); -out: +static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, + u32 index, int offset, struct bio *bio) +{ if (is_partial_io(bvec)) - __free_page(page); - return ret; + return zram_bvec_write_partial(zram, bvec, index, offset, bio); + return zram_write_page(zram, bvec->bv_page, index); } #ifdef CONFIG_ZRAM_MULTI_COMP +#define RECOMPRESS_IDLE (1 << 0) +#define RECOMPRESS_HUGE (1 << 1) + +static int scan_slots_for_recompress(struct zram *zram, u32 mode, u32 prio_max, + struct zram_pp_ctl *ctl) +{ + unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; + unsigned long index; + + for (index = 0; index < nr_pages; index++) { + bool ok = true; + + zram_slot_lock(zram, index); + if (!zram_allocated(zram, index)) + goto next; + + if (mode & RECOMPRESS_IDLE && + !zram_test_flag(zram, index, ZRAM_IDLE)) + goto next; + + if (mode & RECOMPRESS_HUGE && + !zram_test_flag(zram, index, ZRAM_HUGE)) + goto next; + + if (zram_test_flag(zram, index, ZRAM_WB) || + zram_test_flag(zram, index, ZRAM_SAME) || + zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE)) + goto next; + + /* Already compressed with same of higher priority */ + if (zram_get_priority(zram, index) + 1 >= prio_max) + goto next; + + ok = place_pp_slot(zram, ctl, index); +next: + zram_slot_unlock(zram, index); + if (!ok) + break; + } + + return 0; +} + /* * This function will decompress (unless it's ZRAM_HUGE) the page and then * attempt to compress it using provided compression algorithm priority @@ -1672,8 +2248,9 @@ out: * * Corresponding ZRAM slot should be locked. */ -static int zram_recompress(struct zram *zram, u32 index, struct page *page, - u32 threshold, u32 prio, u32 prio_max) +static int recompress_slot(struct zram *zram, u32 index, struct page *page, + u64 *num_recomp_pages, u32 threshold, u32 prio, + u32 prio_max) { struct zcomp_strm *zstrm = NULL; unsigned long handle_old; @@ -1682,9 +2259,8 @@ static int zram_recompress(struct zram *zram, u32 index, struct page *page, unsigned int comp_len_new; unsigned int class_index_old; unsigned int class_index_new; - u32 num_recomps = 0; - void *src, *dst; - int ret; + void *src; + int ret = 0; handle_old = zram_get_handle(zram, index); if (!handle_old) @@ -1701,7 +2277,24 @@ static int zram_recompress(struct zram *zram, u32 index, struct page *page, if (ret) return ret; + /* + * We touched this entry so mark it as non-IDLE. This makes sure that + * we don't preserve IDLE flag and don't incorrectly pick this entry + * for different post-processing type (e.g. writeback). + */ + zram_clear_flag(zram, index, ZRAM_IDLE); + class_index_old = zs_lookup_class_index(zram->mem_pool, comp_len_old); + + prio = max(prio, zram_get_priority(zram, index) + 1); + /* + * Recompression slots scan should not select slots that are + * already compressed with a higher priority algorithm, but + * just in case + */ + if (prio >= prio_max) + return 0; + /* * Iterate the secondary comp algorithms list (in order of priority) * and try to recompress the page. @@ -1710,22 +2303,16 @@ static int zram_recompress(struct zram *zram, u32 index, struct page *page, if (!zram->comps[prio]) continue; - /* - * Skip if the object is already re-compressed with a higher - * priority algorithm (or same algorithm). - */ - if (prio <= zram_get_priority(zram, index)) - continue; - - num_recomps++; zstrm = zcomp_stream_get(zram->comps[prio]); - src = kmap_atomic(page); - ret = zcomp_compress(zstrm, src, &comp_len_new); - kunmap_atomic(src); + src = kmap_local_page(page); + ret = zcomp_compress(zram->comps[prio], zstrm, + src, &comp_len_new); + kunmap_local(src); if (ret) { - zcomp_stream_put(zram->comps[prio]); - return ret; + zcomp_stream_put(zstrm); + zstrm = NULL; + break; } class_index_new = zs_lookup_class_index(zram->mem_pool, @@ -1734,7 +2321,8 @@ static int zram_recompress(struct zram *zram, u32 index, struct page *page, /* Continue until we make progress */ if (class_index_new >= class_index_old || (threshold && comp_len_new >= threshold)) { - zcomp_stream_put(zram->comps[prio]); + zcomp_stream_put(zstrm); + zstrm = NULL; continue; } @@ -1743,55 +2331,52 @@ static int zram_recompress(struct zram *zram, u32 index, struct page *page, } /* - * We did not try to recompress, e.g. when we have only one - * secondary algorithm and the page is already recompressed - * using that algorithm + * Decrement the limit (if set) on pages we can recompress, even + * when current recompression was unsuccessful or did not compress + * the page below the threshold, because we still spent resources + * on it. */ - if (!zstrm) - return 0; + if (*num_recomp_pages) + *num_recomp_pages -= 1; + + /* Compression error */ + if (ret) + return ret; - if (class_index_new >= class_index_old) { + if (!zstrm) { /* * Secondary algorithms failed to re-compress the page - * in a way that would save memory, mark the object as - * incompressible so that we will not try to compress - * it again. + * in a way that would save memory. * - * We need to make sure that all secondary algorithms have - * failed, so we test if the number of recompressions matches - * the number of active secondary algorithms. + * Mark the object incompressible if the max-priority + * algorithm couldn't re-compress it. */ - if (num_recomps == zram->num_active_comps - 1) - zram_set_flag(zram, index, ZRAM_INCOMPRESSIBLE); + if (prio < zram->num_active_comps) + return 0; + zram_set_flag(zram, index, ZRAM_INCOMPRESSIBLE); return 0; } - /* Successful recompression but above threshold */ - if (threshold && comp_len_new >= threshold) - return 0; - /* - * No direct reclaim (slow path) for handle allocation and no - * re-compression attempt (unlike in __zram_bvec_write()) since - * we already have stored that object in zsmalloc. If we cannot - * alloc memory for recompressed object then we bail out and - * simply keep the old (existing) object in zsmalloc. + * We are holding per-CPU stream mutex and entry lock so better + * avoid direct reclaim. Allocation error is not fatal since + * we still have the old object in the mem_pool. + * + * XXX: technically, the node we really want here is the node that holds + * the original compressed data. But that would require us to modify + * zsmalloc API to return this information. For now, we will make do with + * the node of the page allocated for recompression. */ handle_new = zs_malloc(zram->mem_pool, comp_len_new, - __GFP_KSWAPD_RECLAIM | - __GFP_NOWARN | - __GFP_HIGHMEM | - __GFP_MOVABLE); + GFP_NOIO | __GFP_NOWARN | + __GFP_HIGHMEM | __GFP_MOVABLE, page_to_nid(page)); if (IS_ERR_VALUE(handle_new)) { - zcomp_stream_put(zram->comps[prio]); + zcomp_stream_put(zstrm); return PTR_ERR((void *)handle_new); } - dst = zs_map_object(zram->mem_pool, handle_new, ZS_MM_WO); - memcpy(dst, zstrm->buffer, comp_len_new); - zcomp_stream_put(zram->comps[prio]); - - zs_unmap_object(zram->mem_pool, handle_new); + zs_obj_write(zram->mem_pool, handle_new, zstrm->buffer, comp_len_new); + zcomp_stream_put(zstrm); zram_free_page(zram, index); zram_set_handle(zram, index, handle_new); @@ -1804,27 +2389,28 @@ static int zram_recompress(struct zram *zram, u32 index, struct page *page, return 0; } -#define RECOMPRESS_IDLE (1 << 0) -#define RECOMPRESS_HUGE (1 << 1) - static ssize_t recompress_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { - u32 prio = ZRAM_SECONDARY_COMP, prio_max = ZRAM_MAX_COMPS; struct zram *zram = dev_to_zram(dev); - unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; char *args, *param, *val, *algo = NULL; + u64 num_recomp_pages = ULLONG_MAX; + struct zram_pp_ctl *ctl = NULL; + struct zram_pp_slot *pps; u32 mode = 0, threshold = 0; - unsigned long index; - struct page *page; + u32 prio, prio_max; + struct page *page = NULL; ssize_t ret; + prio = ZRAM_SECONDARY_COMP; + prio_max = zram->num_active_comps; + args = skip_spaces(buf); while (*args) { args = next_arg(args, ¶m, &val); - if (!*val) + if (!val || !*val) return -EINVAL; if (!strcmp(param, "type")) { @@ -1837,6 +2423,17 @@ static ssize_t recompress_store(struct device *dev, continue; } + if (!strcmp(param, "max_pages")) { + /* + * Limit the number of entries (pages) we attempt to + * recompress. + */ + ret = kstrtoull(val, 10, &num_recomp_pages); + if (ret) + return ret; + continue; + } + if (!strcmp(param, "threshold")) { /* * We will re-compress only idle objects equal or @@ -1852,9 +2449,21 @@ static ssize_t recompress_store(struct device *dev, algo = val; continue; } + + if (!strcmp(param, "priority")) { + ret = kstrtouint(val, 10, &prio); + if (ret) + return ret; + + if (prio == ZRAM_PRIMARY_COMP) + prio = ZRAM_SECONDARY_COMP; + + prio_max = prio + 1; + continue; + } } - if (threshold >= PAGE_SIZE) + if (threshold >= huge_class_size) return -EINVAL; down_read(&zram->init_lock); @@ -1863,6 +2472,12 @@ static ssize_t recompress_store(struct device *dev, goto release_init_lock; } + /* Do not permit concurrent post-processing actions. */ + if (atomic_xchg(&zram->pp_in_progress, 1)) { + up_read(&zram->init_lock); + return -EAGAIN; + } + if (algo) { bool found = false; @@ -1871,7 +2486,7 @@ static ssize_t recompress_store(struct device *dev, continue; if (!strcmp(zram->comp_algs[prio], algo)) { - prio_max = min(prio + 1, ZRAM_MAX_COMPS); + prio_max = prio + 1; found = true; break; } @@ -1883,39 +2498,44 @@ static ssize_t recompress_store(struct device *dev, } } + prio_max = min(prio_max, (u32)zram->num_active_comps); + if (prio >= prio_max) { + ret = -EINVAL; + goto release_init_lock; + } + page = alloc_page(GFP_KERNEL); if (!page) { ret = -ENOMEM; goto release_init_lock; } - ret = len; - for (index = 0; index < nr_pages; index++) { - int err = 0; - - zram_slot_lock(zram, index); + ctl = init_pp_ctl(); + if (!ctl) { + ret = -ENOMEM; + goto release_init_lock; + } - if (!zram_allocated(zram, index)) - goto next; + scan_slots_for_recompress(zram, mode, prio_max, ctl); - if (mode & RECOMPRESS_IDLE && - !zram_test_flag(zram, index, ZRAM_IDLE)) - goto next; + ret = len; + while ((pps = select_pp_slot(ctl))) { + int err = 0; - if (mode & RECOMPRESS_HUGE && - !zram_test_flag(zram, index, ZRAM_HUGE)) - goto next; + if (!num_recomp_pages) + break; - if (zram_test_flag(zram, index, ZRAM_WB) || - zram_test_flag(zram, index, ZRAM_UNDER_WB) || - zram_test_flag(zram, index, ZRAM_SAME) || - zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE)) + zram_slot_lock(zram, pps->index); + if (!zram_test_flag(zram, pps->index, ZRAM_PP_SLOT)) goto next; - err = zram_recompress(zram, index, page, threshold, + err = recompress_slot(zram, pps->index, page, + &num_recomp_pages, threshold, prio, prio_max); next: - zram_slot_unlock(zram, index); + zram_slot_unlock(zram, pps->index); + release_pp_slot(zram, pps); + if (err) { ret = err; break; @@ -1924,23 +2544,22 @@ next: cond_resched(); } - __free_page(page); - release_init_lock: + if (page) + __free_page(page); + release_pp_ctl(zram, ctl); + atomic_set(&zram->pp_in_progress, 0); up_read(&zram->init_lock); return ret; } #endif -/* - * zram_bio_discard - handler on discard request - * @index: physical block index in PAGE_SIZE units - * @offset: byte offset within physical block - */ -static void zram_bio_discard(struct zram *zram, u32 index, - int offset, struct bio *bio) +static void zram_bio_discard(struct zram *zram, struct bio *bio) { size_t n = bio->bi_iter.bi_size; + u32 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; + u32 offset = (bio->bi_iter.bi_sector & (SECTORS_PER_PAGE - 1)) << + SECTOR_SHIFT; /* * zram manages data in physical block size units. Because logical block @@ -1968,81 +2587,67 @@ static void zram_bio_discard(struct zram *zram, u32 index, index++; n -= PAGE_SIZE; } + + bio_endio(bio); } -/* - * Returns errno if it has some problem. Otherwise return 0 or 1. - * Returns 0 if IO request was done synchronously - * Returns 1 if IO request was successfully submitted. - */ -static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, - int offset, enum req_op op, struct bio *bio) +static void zram_bio_read(struct zram *zram, struct bio *bio) { - int ret; + unsigned long start_time = bio_start_io_acct(bio); + struct bvec_iter iter = bio->bi_iter; - if (!op_is_write(op)) { - ret = zram_bvec_read(zram, bvec, index, offset, bio); - flush_dcache_page(bvec->bv_page); - } else { - ret = zram_bvec_write(zram, bvec, index, offset, bio); - } + do { + u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; + u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) << + SECTOR_SHIFT; + struct bio_vec bv = bio_iter_iovec(bio, iter); - zram_slot_lock(zram, index); - zram_accessed(zram, index); - zram_slot_unlock(zram, index); + bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset); - if (unlikely(ret < 0)) { - if (!op_is_write(op)) + if (zram_bvec_read(zram, &bv, index, offset, bio) < 0) { atomic64_inc(&zram->stats.failed_reads); - else - atomic64_inc(&zram->stats.failed_writes); - } + bio->bi_status = BLK_STS_IOERR; + break; + } + flush_dcache_page(bv.bv_page); - return ret; + zram_slot_lock(zram, index); + zram_accessed(zram, index); + zram_slot_unlock(zram, index); + + bio_advance_iter_single(bio, &iter, bv.bv_len); + } while (iter.bi_size); + + bio_end_io_acct(bio, start_time); + bio_endio(bio); } -static void __zram_make_request(struct zram *zram, struct bio *bio) +static void zram_bio_write(struct zram *zram, struct bio *bio) { - int offset; - u32 index; - struct bio_vec bvec; - struct bvec_iter iter; - unsigned long start_time; + unsigned long start_time = bio_start_io_acct(bio); + struct bvec_iter iter = bio->bi_iter; - index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; - offset = (bio->bi_iter.bi_sector & - (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; + do { + u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; + u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) << + SECTOR_SHIFT; + struct bio_vec bv = bio_iter_iovec(bio, iter); - switch (bio_op(bio)) { - case REQ_OP_DISCARD: - case REQ_OP_WRITE_ZEROES: - zram_bio_discard(zram, index, offset, bio); - bio_endio(bio); - return; - default: - break; - } + bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset); - start_time = bio_start_io_acct(bio); - bio_for_each_segment(bvec, bio, iter) { - struct bio_vec bv = bvec; - unsigned int unwritten = bvec.bv_len; + if (zram_bvec_write(zram, &bv, index, offset, bio) < 0) { + atomic64_inc(&zram->stats.failed_writes); + bio->bi_status = BLK_STS_IOERR; + break; + } - do { - bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset, - unwritten); - if (zram_bvec_rw(zram, &bv, index, offset, - bio_op(bio), bio) < 0) { - bio->bi_status = BLK_STS_IOERR; - break; - } + zram_slot_lock(zram, index); + zram_accessed(zram, index); + zram_slot_unlock(zram, index); - bv.bv_offset += bv.bv_len; - unwritten -= bv.bv_len; + bio_advance_iter_single(bio, &iter, bv.bv_len); + } while (iter.bi_size); - update_position(&index, &offset, &bv); - } while (unwritten); - } bio_end_io_acct(bio, start_time); bio_endio(bio); } @@ -2054,14 +2659,21 @@ static void zram_submit_bio(struct bio *bio) { struct zram *zram = bio->bi_bdev->bd_disk->private_data; - if (!valid_io_request(zram, bio->bi_iter.bi_sector, - bio->bi_iter.bi_size)) { - atomic64_inc(&zram->stats.invalid_io); - bio_io_error(bio); - return; + switch (bio_op(bio)) { + case REQ_OP_READ: + zram_bio_read(zram, bio); + break; + case REQ_OP_WRITE: + zram_bio_write(zram, bio); + break; + case REQ_OP_DISCARD: + case REQ_OP_WRITE_ZEROES: + zram_bio_discard(zram, bio); + break; + default: + WARN_ON_ONCE(1); + bio_endio(bio); } - - __zram_make_request(zram, bio); } static void zram_slot_free_notify(struct block_device *bdev, @@ -2081,66 +2693,20 @@ static void zram_slot_free_notify(struct block_device *bdev, zram_slot_unlock(zram, index); } -static int zram_rw_page(struct block_device *bdev, sector_t sector, - struct page *page, enum req_op op) +static void zram_comp_params_reset(struct zram *zram) { - int offset, ret; - u32 index; - struct zram *zram; - struct bio_vec bv; - unsigned long start_time; - - if (PageTransHuge(page)) - return -ENOTSUPP; - zram = bdev->bd_disk->private_data; - - if (!valid_io_request(zram, sector, PAGE_SIZE)) { - atomic64_inc(&zram->stats.invalid_io); - ret = -EINVAL; - goto out; - } - - index = sector >> SECTORS_PER_PAGE_SHIFT; - offset = (sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; - - bv.bv_page = page; - bv.bv_len = PAGE_SIZE; - bv.bv_offset = 0; - - start_time = bdev_start_io_acct(bdev->bd_disk->part0, - SECTORS_PER_PAGE, op, jiffies); - ret = zram_bvec_rw(zram, &bv, index, offset, op, NULL); - bdev_end_io_acct(bdev->bd_disk->part0, op, start_time); -out: - /* - * If I/O fails, just return error(ie, non-zero) without - * calling page_endio. - * It causes resubmit the I/O with bio request by upper functions - * of rw_page(e.g., swap_readpage, __swap_writepage) and - * bio->bi_end_io does things to handle the error - * (e.g., SetPageError, set_page_dirty and extra works). - */ - if (unlikely(ret < 0)) - return ret; + u32 prio; - switch (ret) { - case 0: - page_endio(page, op_is_write(op), 0); - break; - case 1: - ret = 0; - break; - default: - WARN_ON(1); + for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { + comp_params_reset(zram, prio); } - return ret; } static void zram_destroy_comps(struct zram *zram) { u32 prio; - for (prio = 0; prio < ZRAM_MAX_COMPS; prio++) { + for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { struct zcomp *comp = zram->comps[prio]; zram->comps[prio] = NULL; @@ -2149,6 +2715,15 @@ static void zram_destroy_comps(struct zram *zram) zcomp_destroy(comp); zram->num_active_comps--; } + + for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { + /* Do not free statically defined compression algorithms */ + if (zram->comp_algs[prio] != default_compressor) + kfree(zram->comp_algs[prio]); + zram->comp_algs[prio] = NULL; + } + + zram_comp_params_reset(zram); } static void zram_reset_device(struct zram *zram) @@ -2157,11 +2732,6 @@ static void zram_reset_device(struct zram *zram) zram->limit_pages = 0; - if (!init_done(zram)) { - up_write(&zram->init_lock); - return; - } - set_capacity_and_notify(zram->disk, 0); part_stat_set_all(zram->disk->part0, 0); @@ -2170,6 +2740,7 @@ static void zram_reset_device(struct zram *zram) zram->disksize = 0; zram_destroy_comps(zram); memset(&zram->stats, 0, sizeof(zram->stats)); + atomic_set(&zram->pp_in_progress, 0); reset_bdev(zram); comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor); @@ -2202,11 +2773,12 @@ static ssize_t disksize_store(struct device *dev, goto out_unlock; } - for (prio = 0; prio < ZRAM_MAX_COMPS; prio++) { + for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { if (!zram->comp_algs[prio]) continue; - comp = zcomp_create(zram->comp_algs[prio]); + comp = zcomp_create(zram->comp_algs[prio], + &zram->params[prio]); if (IS_ERR(comp)) { pr_err("Cannot initialise %s compressing backend\n", zram->comp_algs[prio]); @@ -2271,26 +2843,22 @@ static ssize_t reset_store(struct device *dev, return len; } -static int zram_open(struct block_device *bdev, fmode_t mode) +static int zram_open(struct gendisk *disk, blk_mode_t mode) { - int ret = 0; - struct zram *zram; + struct zram *zram = disk->private_data; - WARN_ON(!mutex_is_locked(&bdev->bd_disk->open_mutex)); + WARN_ON(!mutex_is_locked(&disk->open_mutex)); - zram = bdev->bd_disk->private_data; /* zram was claimed to reset so open request fails */ if (zram->claim) - ret = -EBUSY; - - return ret; + return -EBUSY; + return 0; } static const struct block_device_operations zram_devops = { .open = zram_open, .submit_bio = zram_submit_bio, .swap_slot_free_notify = zram_slot_free_notify, - .rw_page = zram_rw_page, .owner = THIS_MODULE }; @@ -2301,18 +2869,19 @@ static DEVICE_ATTR_WO(reset); static DEVICE_ATTR_WO(mem_limit); static DEVICE_ATTR_WO(mem_used_max); static DEVICE_ATTR_WO(idle); -static DEVICE_ATTR_RW(max_comp_streams); static DEVICE_ATTR_RW(comp_algorithm); #ifdef CONFIG_ZRAM_WRITEBACK static DEVICE_ATTR_RW(backing_dev); static DEVICE_ATTR_WO(writeback); static DEVICE_ATTR_RW(writeback_limit); static DEVICE_ATTR_RW(writeback_limit_enable); +static DEVICE_ATTR_RW(writeback_batch_size); #endif #ifdef CONFIG_ZRAM_MULTI_COMP static DEVICE_ATTR_RW(recomp_algorithm); static DEVICE_ATTR_WO(recompress); #endif +static DEVICE_ATTR_WO(algorithm_params); static struct attribute *zram_disk_attrs[] = { &dev_attr_disksize.attr, @@ -2322,13 +2891,13 @@ static struct attribute *zram_disk_attrs[] = { &dev_attr_mem_limit.attr, &dev_attr_mem_used_max.attr, &dev_attr_idle.attr, - &dev_attr_max_comp_streams.attr, &dev_attr_comp_algorithm.attr, #ifdef CONFIG_ZRAM_WRITEBACK &dev_attr_backing_dev.attr, &dev_attr_writeback.attr, &dev_attr_writeback_limit.attr, &dev_attr_writeback_limit_enable.attr, + &dev_attr_writeback_batch_size.attr, #endif &dev_attr_io_stat.attr, &dev_attr_mm_stat.attr, @@ -2340,6 +2909,7 @@ static struct attribute *zram_disk_attrs[] = { &dev_attr_recomp_algorithm.attr, &dev_attr_recompress.attr, #endif + &dev_attr_algorithm_params.attr, NULL, }; @@ -2351,6 +2921,30 @@ ATTRIBUTE_GROUPS(zram_disk); */ static int zram_add(void) { + struct queue_limits lim = { + .logical_block_size = ZRAM_LOGICAL_BLOCK_SIZE, + /* + * To ensure that we always get PAGE_SIZE aligned and + * n*PAGE_SIZED sized I/O requests. + */ + .physical_block_size = PAGE_SIZE, + .io_min = PAGE_SIZE, + .io_opt = PAGE_SIZE, + .max_hw_discard_sectors = UINT_MAX, + /* + * zram_bio_discard() will clear all logical blocks if logical + * block size is identical with physical block size(PAGE_SIZE). + * But if it is different, we will skip discarding some parts of + * logical blocks in the part of the request range which isn't + * aligned to physical block size. So we can't ensure that all + * discarded logical blocks are zeroed. + */ +#if ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE + .max_write_zeroes_sectors = UINT_MAX, +#endif + .features = BLK_FEAT_STABLE_WRITES | + BLK_FEAT_SYNCHRONOUS, + }; struct zram *zram; int ret, device_id; @@ -2365,15 +2959,15 @@ static int zram_add(void) init_rwsem(&zram->init_lock); #ifdef CONFIG_ZRAM_WRITEBACK - spin_lock_init(&zram->wb_limit_lock); + zram->wb_batch_size = 32; #endif /* gendisk structure */ - zram->disk = blk_alloc_disk(NUMA_NO_NODE); - if (!zram->disk) { + zram->disk = blk_alloc_disk(&lim, NUMA_NO_NODE); + if (IS_ERR(zram->disk)) { pr_err("Error allocating disk structure for device %d\n", device_id); - ret = -ENOMEM; + ret = PTR_ERR(zram->disk); goto out_free_idr; } @@ -2384,43 +2978,16 @@ static int zram_add(void) zram->disk->fops = &zram_devops; zram->disk->private_data = zram; snprintf(zram->disk->disk_name, 16, "zram%d", device_id); + atomic_set(&zram->pp_in_progress, 0); + zram_comp_params_reset(zram); + comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor); - /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */ + /* Actual capacity set using sysfs (/sys/block/zram<id>/disksize */ set_capacity(zram->disk, 0); - /* zram devices sort of resembles non-rotational disks */ - blk_queue_flag_set(QUEUE_FLAG_NONROT, zram->disk->queue); - blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue); - - /* - * To ensure that we always get PAGE_SIZE aligned - * and n*PAGE_SIZED sized I/O requests. - */ - blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE); - blk_queue_logical_block_size(zram->disk->queue, - ZRAM_LOGICAL_BLOCK_SIZE); - blk_queue_io_min(zram->disk->queue, PAGE_SIZE); - blk_queue_io_opt(zram->disk->queue, PAGE_SIZE); - zram->disk->queue->limits.discard_granularity = PAGE_SIZE; - blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX); - - /* - * zram_bio_discard() will clear all logical blocks if logical block - * size is identical with physical block size(PAGE_SIZE). But if it is - * different, we will skip discarding some parts of logical blocks in - * the part of the request range which isn't aligned to physical block - * size. So we can't ensure that all discarded logical blocks are - * zeroed. - */ - if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE) - blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX); - - blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue); ret = device_add_disk(NULL, zram->disk, zram_disk_groups); if (ret) goto out_cleanup_disk; - comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor); - zram_debugfs_register(zram); pr_info("Added device: %s\n", zram->disk->disk_name); return device_id; @@ -2490,8 +3057,8 @@ static int zram_remove(struct zram *zram) * creates a new un-initialized zram device and returns back this device's * device_id (or an error code if it fails to create a new device). */ -static ssize_t hot_add_show(struct class *class, - struct class_attribute *attr, +static ssize_t hot_add_show(const struct class *class, + const struct class_attribute *attr, char *buf) { int ret; @@ -2502,13 +3069,14 @@ static ssize_t hot_add_show(struct class *class, if (ret < 0) return ret; - return scnprintf(buf, PAGE_SIZE, "%d\n", ret); + return sysfs_emit(buf, "%d\n", ret); } +/* This attribute must be set to 0400, so CLASS_ATTR_RO() can not be used */ static struct class_attribute class_attr_hot_add = __ATTR(hot_add, 0400, hot_add_show, NULL); -static ssize_t hot_remove_store(struct class *class, - struct class_attribute *attr, +static ssize_t hot_remove_store(const struct class *class, + const struct class_attribute *attr, const char *buf, size_t count) { @@ -2547,7 +3115,6 @@ ATTRIBUTE_GROUPS(zram_control_class); static struct class zram_control_class = { .name = "zram-control", - .owner = THIS_MODULE, .class_groups = zram_control_class_groups, }; @@ -2569,9 +3136,10 @@ static void destroy_devices(void) static int __init zram_init(void) { + struct zram_table_entry zram_te; int ret; - BUILD_BUG_ON(__NR_ZRAM_PAGEFLAGS > BITS_PER_LONG); + BUILD_BUG_ON(__NR_ZRAM_PAGEFLAGS > sizeof(zram_te.flags) * 8); ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare", zcomp_cpu_up_prepare, zcomp_cpu_dead); |
