diff options
Diffstat (limited to 'mm/zsmalloc.c')
-rw-r--r-- | mm/zsmalloc.c | 792 |
1 files changed, 375 insertions, 417 deletions
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index c937635e0ad1..6d0e47f7ae33 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + /* * zsmalloc memory allocator * @@ -11,29 +13,13 @@ * Released under the terms of GNU General Public License Version 2.0 */ -/* - * Following is how we use various fields and flags of underlying - * struct page(s) to form a zspage. - * - * Usage of struct page fields: - * page->private: points to zspage - * page->index: links together all component pages of a zspage - * For the huge page, this is always 0, so we use this field - * to store handle. - * page->page_type: first object offset in a subpage of zspage - * - * Usage of struct page flags: - * PG_private: identifies the first component page - * PG_owner_priv_1: identifies the huge component page - * - */ - #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt /* * lock ordering: * page_lock - * pool->lock + * pool->migrate_lock + * class->lock * zspage->lock */ @@ -52,6 +38,7 @@ #include <linux/vmalloc.h> #include <linux/preempt.h> #include <linux/spinlock.h> +#include <linux/sprintf.h> #include <linux/shrinker.h> #include <linux/types.h> #include <linux/debugfs.h> @@ -62,6 +49,7 @@ #include <linux/pagemap.h> #include <linux/fs.h> #include <linux/local_lock.h> +#include "zpdesc.h" #define ZSPAGE_MAGIC 0x58 @@ -110,17 +98,14 @@ #define OBJ_TAG_BITS 1 #define OBJ_TAG_MASK OBJ_ALLOCATED_TAG -#define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS - OBJ_TAG_BITS) +#define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS) #define OBJ_INDEX_MASK ((_AC(1, UL) << OBJ_INDEX_BITS) - 1) #define HUGE_BITS 1 #define FULLNESS_BITS 4 #define CLASS_BITS 8 -#define ISOLATED_BITS 5 #define MAGIC_VAL_BITS 8 -#define MAX(a, b) ((a) >= (b) ? (a) : (b)) - #define ZS_MAX_PAGES_PER_ZSPAGE (_AC(CONFIG_ZSMALLOC_CHAIN_SIZE, UL)) /* ZS_MIN_ALLOC_SIZE must be multiple of ZS_ALIGN */ @@ -183,6 +168,7 @@ static struct dentry *zs_stat_root; static size_t huge_class_size; struct size_class { + spinlock_t lock; struct list_head fullness_list[NR_FULLNESS_GROUPS]; /* * Size of objects stored in this class. Must be multiple @@ -237,21 +223,50 @@ struct zs_pool { #ifdef CONFIG_COMPACTION struct work_struct free_work; #endif - spinlock_t lock; + /* protect page/zspage migration */ + rwlock_t migrate_lock; atomic_t compaction_in_progress; }; +static inline void zpdesc_set_first(struct zpdesc *zpdesc) +{ + SetPagePrivate(zpdesc_page(zpdesc)); +} + +static inline void zpdesc_inc_zone_page_state(struct zpdesc *zpdesc) +{ + inc_zone_page_state(zpdesc_page(zpdesc), NR_ZSPAGES); +} + +static inline void zpdesc_dec_zone_page_state(struct zpdesc *zpdesc) +{ + dec_zone_page_state(zpdesc_page(zpdesc), NR_ZSPAGES); +} + +static inline struct zpdesc *alloc_zpdesc(gfp_t gfp) +{ + struct page *page = alloc_page(gfp); + + return page_zpdesc(page); +} + +static inline void free_zpdesc(struct zpdesc *zpdesc) +{ + struct page *page = zpdesc_page(zpdesc); + + __free_page(page); +} + struct zspage { struct { unsigned int huge:HUGE_BITS; unsigned int fullness:FULLNESS_BITS; unsigned int class:CLASS_BITS + 1; - unsigned int isolated:ISOLATED_BITS; unsigned int magic:MAGIC_VAL_BITS; }; unsigned int inuse; unsigned int freeobj; - struct page *first_page; + struct zpdesc *first_zpdesc; struct list_head list; /* fullness list */ struct zs_pool *pool; rwlock_t lock; @@ -260,7 +275,7 @@ struct zspage { struct mapping_area { local_lock_t lock; char *vm_buf; /* copy buffer for objects that span pages */ - char *vm_addr; /* address of kmap_atomic()'ed pages */ + char *vm_addr; /* address of kmap_local_page()'ed pages */ enum zs_mapmode vm_mm; /* mapping mode */ }; @@ -278,18 +293,14 @@ static bool ZsHugePage(struct zspage *zspage) static void migrate_lock_init(struct zspage *zspage); static void migrate_read_lock(struct zspage *zspage); static void migrate_read_unlock(struct zspage *zspage); - -#ifdef CONFIG_COMPACTION static void migrate_write_lock(struct zspage *zspage); -static void migrate_write_lock_nested(struct zspage *zspage); static void migrate_write_unlock(struct zspage *zspage); + +#ifdef CONFIG_COMPACTION static void kick_deferred_free(struct zs_pool *pool); static void init_deferred_free(struct zs_pool *pool); static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage); #else -static void migrate_write_lock(struct zspage *zspage) {} -static void migrate_write_lock_nested(struct zspage *zspage) {} -static void migrate_write_unlock(struct zspage *zspage) {} static void kick_deferred_free(struct zs_pool *pool) {} static void init_deferred_free(struct zs_pool *pool) {} static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage) {} @@ -297,17 +308,27 @@ static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage) {} static int create_cache(struct zs_pool *pool) { - pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_SIZE, - 0, 0, NULL); + char *name; + + name = kasprintf(GFP_KERNEL, "zs_handle-%s", pool->name); + if (!name) + return -ENOMEM; + pool->handle_cachep = kmem_cache_create(name, ZS_HANDLE_SIZE, + 0, 0, NULL); + kfree(name); if (!pool->handle_cachep) - return 1; + return -EINVAL; - pool->zspage_cachep = kmem_cache_create("zspage", sizeof(struct zspage), - 0, 0, NULL); + name = kasprintf(GFP_KERNEL, "zspage-%s", pool->name); + if (!name) + return -ENOMEM; + pool->zspage_cachep = kmem_cache_create(name, sizeof(struct zspage), + 0, 0, NULL); + kfree(name); if (!pool->zspage_cachep) { kmem_cache_destroy(pool->handle_cachep); pool->handle_cachep = NULL; - return 1; + return -EINVAL; } return 0; @@ -341,7 +362,7 @@ static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage) kmem_cache_free(pool->zspage_cachep, zspage); } -/* pool->lock(which owns the handle) synchronizes races */ +/* class->lock(which owns the handle) synchronizes races */ static void record_obj(unsigned long handle, unsigned long obj) { *(unsigned long *)handle = obj; @@ -405,9 +426,9 @@ static void zs_zpool_unmap(void *pool, unsigned long handle) zs_unmap_object(pool, handle); } -static u64 zs_zpool_total_size(void *pool) +static u64 zs_zpool_total_pages(void *pool) { - return zs_get_total_pages(pool) << PAGE_SHIFT; + return zs_get_total_pages(pool); } static struct zpool_driver zs_zpool_driver = { @@ -420,7 +441,7 @@ static struct zpool_driver zs_zpool_driver = { .free = zs_zpool_free, .map = zs_zpool_map, .unmap = zs_zpool_unmap, - .total_size = zs_zpool_total_size, + .total_pages = zs_zpool_total_pages, }; MODULE_ALIAS("zpool-zsmalloc"); @@ -431,39 +452,46 @@ static DEFINE_PER_CPU(struct mapping_area, zs_map_area) = { .lock = INIT_LOCAL_LOCK(lock), }; -static __maybe_unused int is_first_page(struct page *page) +static inline bool __maybe_unused is_first_zpdesc(struct zpdesc *zpdesc) { - return PagePrivate(page); + return PagePrivate(zpdesc_page(zpdesc)); } -/* Protected by pool->lock */ +/* Protected by class->lock */ static inline int get_zspage_inuse(struct zspage *zspage) { return zspage->inuse; } - static inline void mod_zspage_inuse(struct zspage *zspage, int val) { zspage->inuse += val; } -static inline struct page *get_first_page(struct zspage *zspage) +static struct zpdesc *get_first_zpdesc(struct zspage *zspage) { - struct page *first_page = zspage->first_page; + struct zpdesc *first_zpdesc = zspage->first_zpdesc; - VM_BUG_ON_PAGE(!is_first_page(first_page), first_page); - return first_page; + VM_BUG_ON_PAGE(!is_first_zpdesc(first_zpdesc), zpdesc_page(first_zpdesc)); + return first_zpdesc; } -static inline unsigned int get_first_obj_offset(struct page *page) +#define FIRST_OBJ_PAGE_TYPE_MASK 0xffffff + +static inline unsigned int get_first_obj_offset(struct zpdesc *zpdesc) { - return page->page_type; + VM_WARN_ON_ONCE(!PageZsmalloc(zpdesc_page(zpdesc))); + return zpdesc->first_obj_offset & FIRST_OBJ_PAGE_TYPE_MASK; } -static inline void set_first_obj_offset(struct page *page, unsigned int offset) +static inline void set_first_obj_offset(struct zpdesc *zpdesc, unsigned int offset) { - page->page_type = offset; + /* With 24 bits available, we can support offsets into 16 MiB pages. */ + BUILD_BUG_ON(PAGE_SIZE > SZ_16M); + VM_WARN_ON_ONCE(!PageZsmalloc(zpdesc_page(zpdesc))); + VM_WARN_ON_ONCE(offset & ~FIRST_OBJ_PAGE_TYPE_MASK); + zpdesc->first_obj_offset &= ~FIRST_OBJ_PAGE_TYPE_MASK; + zpdesc->first_obj_offset |= offset & FIRST_OBJ_PAGE_TYPE_MASK; } static inline unsigned int get_freeobj(struct zspage *zspage) @@ -476,30 +504,12 @@ static inline void set_freeobj(struct zspage *zspage, unsigned int obj) zspage->freeobj = obj; } -static void get_zspage_mapping(struct zspage *zspage, - unsigned int *class_idx, - int *fullness) -{ - BUG_ON(zspage->magic != ZSPAGE_MAGIC); - - *fullness = zspage->fullness; - *class_idx = zspage->class; -} - static struct size_class *zspage_class(struct zs_pool *pool, struct zspage *zspage) { return pool->size_class[zspage->class]; } -static void set_zspage_mapping(struct zspage *zspage, - unsigned int class_idx, - int fullness) -{ - zspage->class = class_idx; - zspage->fullness = fullness; -} - /* * zsmalloc divides the pool into various size classes where each * class maintains a list of zspages where each zspage is divided @@ -518,19 +528,19 @@ static int get_size_class_index(int size) return min_t(int, ZS_SIZE_CLASSES - 1, idx); } -static inline void class_stat_inc(struct size_class *class, - int type, unsigned long cnt) +static inline void class_stat_add(struct size_class *class, int type, + unsigned long cnt) { class->stats.objs[type] += cnt; } -static inline void class_stat_dec(struct size_class *class, - int type, unsigned long cnt) +static inline void class_stat_sub(struct size_class *class, int type, + unsigned long cnt) { class->stats.objs[type] -= cnt; } -static inline unsigned long zs_stat_get(struct size_class *class, int type) +static inline unsigned long class_stat_read(struct size_class *class, int type) { return class->stats.objs[type]; } @@ -578,18 +588,18 @@ static int zs_stats_size_show(struct seq_file *s, void *v) if (class->index != i) continue; - spin_lock(&pool->lock); + spin_lock(&class->lock); seq_printf(s, " %5u %5u ", i, class->size); for (fg = ZS_INUSE_RATIO_10; fg < NR_FULLNESS_GROUPS; fg++) { - inuse_totals[fg] += zs_stat_get(class, fg); - seq_printf(s, "%9lu ", zs_stat_get(class, fg)); + inuse_totals[fg] += class_stat_read(class, fg); + seq_printf(s, "%9lu ", class_stat_read(class, fg)); } - obj_allocated = zs_stat_get(class, ZS_OBJS_ALLOCATED); - obj_used = zs_stat_get(class, ZS_OBJS_INUSE); + obj_allocated = class_stat_read(class, ZS_OBJS_ALLOCATED); + obj_used = class_stat_read(class, ZS_OBJS_INUSE); freeable = zs_can_compact(class); - spin_unlock(&pool->lock); + spin_unlock(&class->lock); objs_per_zspage = class->objs_per_zspage; pages_used = obj_allocated / objs_per_zspage * @@ -692,22 +702,23 @@ static void insert_zspage(struct size_class *class, struct zspage *zspage, int fullness) { - class_stat_inc(class, fullness, 1); + class_stat_add(class, fullness, 1); list_add(&zspage->list, &class->fullness_list[fullness]); + zspage->fullness = fullness; } /* * This function removes the given zspage from the freelist identified * by <class, fullness_group>. */ -static void remove_zspage(struct size_class *class, - struct zspage *zspage, - int fullness) +static void remove_zspage(struct size_class *class, struct zspage *zspage) { + int fullness = zspage->fullness; + VM_BUG_ON(list_empty(&class->fullness_list[fullness])); list_del_init(&zspage->list); - class_stat_dec(class, fullness, 1); + class_stat_sub(class, fullness, 1); } /* @@ -721,71 +732,65 @@ static void remove_zspage(struct size_class *class, */ static int fix_fullness_group(struct size_class *class, struct zspage *zspage) { - int class_idx; - int currfg, newfg; + int newfg; - get_zspage_mapping(zspage, &class_idx, &currfg); newfg = get_fullness_group(class, zspage); - if (newfg == currfg) + if (newfg == zspage->fullness) goto out; - remove_zspage(class, zspage, currfg); + remove_zspage(class, zspage); insert_zspage(class, zspage, newfg); - set_zspage_mapping(zspage, class_idx, newfg); out: return newfg; } -static struct zspage *get_zspage(struct page *page) +static struct zspage *get_zspage(struct zpdesc *zpdesc) { - struct zspage *zspage = (struct zspage *)page_private(page); + struct zspage *zspage = zpdesc->zspage; BUG_ON(zspage->magic != ZSPAGE_MAGIC); return zspage; } -static struct page *get_next_page(struct page *page) +static struct zpdesc *get_next_zpdesc(struct zpdesc *zpdesc) { - struct zspage *zspage = get_zspage(page); + struct zspage *zspage = get_zspage(zpdesc); if (unlikely(ZsHugePage(zspage))) return NULL; - return (struct page *)page->index; + return zpdesc->next; } /** - * obj_to_location - get (<page>, <obj_idx>) from encoded object value + * obj_to_location - get (<zpdesc>, <obj_idx>) from encoded object value * @obj: the encoded object value - * @page: page object resides in zspage + * @zpdesc: zpdesc object resides in zspage * @obj_idx: object index */ -static void obj_to_location(unsigned long obj, struct page **page, +static void obj_to_location(unsigned long obj, struct zpdesc **zpdesc, unsigned int *obj_idx) { - obj >>= OBJ_TAG_BITS; - *page = pfn_to_page(obj >> OBJ_INDEX_BITS); + *zpdesc = pfn_zpdesc(obj >> OBJ_INDEX_BITS); *obj_idx = (obj & OBJ_INDEX_MASK); } -static void obj_to_page(unsigned long obj, struct page **page) +static void obj_to_zpdesc(unsigned long obj, struct zpdesc **zpdesc) { - obj >>= OBJ_TAG_BITS; - *page = pfn_to_page(obj >> OBJ_INDEX_BITS); + *zpdesc = pfn_zpdesc(obj >> OBJ_INDEX_BITS); } /** - * location_to_obj - get obj value encoded from (<page>, <obj_idx>) - * @page: page object resides in zspage + * location_to_obj - get obj value encoded from (<zpdesc>, <obj_idx>) + * @zpdesc: zpdesc object resides in zspage * @obj_idx: object index */ -static unsigned long location_to_obj(struct page *page, unsigned int obj_idx) +static unsigned long location_to_obj(struct zpdesc *zpdesc, unsigned int obj_idx) { unsigned long obj; - obj = page_to_pfn(page) << OBJ_INDEX_BITS; + obj = zpdesc_pfn(zpdesc) << OBJ_INDEX_BITS; obj |= obj_idx & OBJ_INDEX_MASK; - obj <<= OBJ_TAG_BITS; return obj; } @@ -795,15 +800,15 @@ static unsigned long handle_to_obj(unsigned long handle) return *(unsigned long *)handle; } -static inline bool obj_allocated(struct page *page, void *obj, +static inline bool obj_allocated(struct zpdesc *zpdesc, void *obj, unsigned long *phandle) { unsigned long handle; - struct zspage *zspage = get_zspage(page); + struct zspage *zspage = get_zspage(zpdesc); if (unlikely(ZsHugePage(zspage))) { - VM_BUG_ON_PAGE(!is_first_page(page), page); - handle = page->index; + VM_BUG_ON_PAGE(!is_first_zpdesc(zpdesc), zpdesc_page(zpdesc)); + handle = zpdesc->handle; } else handle = *(unsigned long *)obj; @@ -815,22 +820,24 @@ static inline bool obj_allocated(struct page *page, void *obj, return true; } -static void reset_page(struct page *page) +static void reset_zpdesc(struct zpdesc *zpdesc) { + struct page *page = zpdesc_page(zpdesc); + __ClearPageMovable(page); ClearPagePrivate(page); - set_page_private(page, 0); - page_mapcount_reset(page); - page->index = 0; + zpdesc->zspage = NULL; + zpdesc->next = NULL; + __ClearPageZsmalloc(page); } static int trylock_zspage(struct zspage *zspage) { - struct page *cursor, *fail; + struct zpdesc *cursor, *fail; - for (cursor = get_first_page(zspage); cursor != NULL; cursor = - get_next_page(cursor)) { - if (!trylock_page(cursor)) { + for (cursor = get_first_zpdesc(zspage); cursor != NULL; cursor = + get_next_zpdesc(cursor)) { + if (!zpdesc_trylock(cursor)) { fail = cursor; goto unlock; } @@ -838,9 +845,9 @@ static int trylock_zspage(struct zspage *zspage) return 1; unlock: - for (cursor = get_first_page(zspage); cursor != fail; cursor = - get_next_page(cursor)) - unlock_page(cursor); + for (cursor = get_first_zpdesc(zspage); cursor != fail; cursor = + get_next_zpdesc(cursor)) + zpdesc_unlock(cursor); return 0; } @@ -848,31 +855,27 @@ unlock: static void __free_zspage(struct zs_pool *pool, struct size_class *class, struct zspage *zspage) { - struct page *page, *next; - int fg; - unsigned int class_idx; - - get_zspage_mapping(zspage, &class_idx, &fg); + struct zpdesc *zpdesc, *next; - assert_spin_locked(&pool->lock); + assert_spin_locked(&class->lock); VM_BUG_ON(get_zspage_inuse(zspage)); - VM_BUG_ON(fg != ZS_INUSE_RATIO_0); + VM_BUG_ON(zspage->fullness != ZS_INUSE_RATIO_0); - next = page = get_first_page(zspage); + next = zpdesc = get_first_zpdesc(zspage); do { - VM_BUG_ON_PAGE(!PageLocked(page), page); - next = get_next_page(page); - reset_page(page); - unlock_page(page); - dec_zone_page_state(page, NR_ZSPAGES); - put_page(page); - page = next; - } while (page != NULL); + VM_BUG_ON_PAGE(!zpdesc_is_locked(zpdesc), zpdesc_page(zpdesc)); + next = get_next_zpdesc(zpdesc); + reset_zpdesc(zpdesc); + zpdesc_unlock(zpdesc); + zpdesc_dec_zone_page_state(zpdesc); + zpdesc_put(zpdesc); + zpdesc = next; + } while (zpdesc != NULL); cache_free_zspage(pool, zspage); - class_stat_dec(class, ZS_OBJS_ALLOCATED, class->objs_per_zspage); + class_stat_sub(class, ZS_OBJS_ALLOCATED, class->objs_per_zspage); atomic_long_sub(class->pages_per_zspage, &pool->pages_allocated); } @@ -892,7 +895,7 @@ static void free_zspage(struct zs_pool *pool, struct size_class *class, return; } - remove_zspage(class, zspage, ZS_INUSE_RATIO_0); + remove_zspage(class, zspage); __free_zspage(pool, class, zspage); } @@ -901,16 +904,16 @@ static void init_zspage(struct size_class *class, struct zspage *zspage) { unsigned int freeobj = 1; unsigned long off = 0; - struct page *page = get_first_page(zspage); + struct zpdesc *zpdesc = get_first_zpdesc(zspage); - while (page) { - struct page *next_page; + while (zpdesc) { + struct zpdesc *next_zpdesc; struct link_free *link; void *vaddr; - set_first_obj_offset(page, off); + set_first_obj_offset(zpdesc, off); - vaddr = kmap_atomic(page); + vaddr = kmap_local_zpdesc(zpdesc); link = (struct link_free *)vaddr + off / sizeof(*link); while ((off += class->size) < PAGE_SIZE) { @@ -923,8 +926,8 @@ static void init_zspage(struct size_class *class, struct zspage *zspage) * page, which must point to the first object on the next * page (if present) */ - next_page = get_next_page(page); - if (next_page) { + next_zpdesc = get_next_zpdesc(zpdesc); + if (next_zpdesc) { link->next = freeobj++ << OBJ_TAG_BITS; } else { /* @@ -933,8 +936,8 @@ static void init_zspage(struct size_class *class, struct zspage *zspage) */ link->next = -1UL << OBJ_TAG_BITS; } - kunmap_atomic(vaddr); - page = next_page; + kunmap_local(vaddr); + zpdesc = next_zpdesc; off %= PAGE_SIZE; } @@ -942,35 +945,35 @@ static void init_zspage(struct size_class *class, struct zspage *zspage) } static void create_page_chain(struct size_class *class, struct zspage *zspage, - struct page *pages[]) + struct zpdesc *zpdescs[]) { int i; - struct page *page; - struct page *prev_page = NULL; - int nr_pages = class->pages_per_zspage; + struct zpdesc *zpdesc; + struct zpdesc *prev_zpdesc = NULL; + int nr_zpdescs = class->pages_per_zspage; /* * Allocate individual pages and link them together as: - * 1. all pages are linked together using page->index - * 2. each sub-page point to zspage using page->private + * 1. all pages are linked together using zpdesc->next + * 2. each sub-page point to zspage using zpdesc->zspage * - * we set PG_private to identify the first page (i.e. no other sub-page + * we set PG_private to identify the first zpdesc (i.e. no other zpdesc * has this flag set). */ - for (i = 0; i < nr_pages; i++) { - page = pages[i]; - set_page_private(page, (unsigned long)zspage); - page->index = 0; + for (i = 0; i < nr_zpdescs; i++) { + zpdesc = zpdescs[i]; + zpdesc->zspage = zspage; + zpdesc->next = NULL; if (i == 0) { - zspage->first_page = page; - SetPagePrivate(page); + zspage->first_zpdesc = zpdesc; + zpdesc_set_first(zpdesc); if (unlikely(class->objs_per_zspage == 1 && class->pages_per_zspage == 1)) SetZsHugePage(zspage); } else { - prev_page->index = (unsigned long)page; + prev_zpdesc->next = zpdesc; } - prev_page = page; + prev_zpdesc = zpdesc; } } @@ -982,7 +985,7 @@ static struct zspage *alloc_zspage(struct zs_pool *pool, gfp_t gfp) { int i; - struct page *pages[ZS_MAX_PAGES_PER_ZSPAGE]; + struct zpdesc *zpdescs[ZS_MAX_PAGES_PER_ZSPAGE]; struct zspage *zspage = cache_alloc_zspage(pool, gfp); if (!zspage) @@ -992,25 +995,28 @@ static struct zspage *alloc_zspage(struct zs_pool *pool, migrate_lock_init(zspage); for (i = 0; i < class->pages_per_zspage; i++) { - struct page *page; + struct zpdesc *zpdesc; - page = alloc_page(gfp); - if (!page) { + zpdesc = alloc_zpdesc(gfp); + if (!zpdesc) { while (--i >= 0) { - dec_zone_page_state(pages[i], NR_ZSPAGES); - __free_page(pages[i]); + zpdesc_dec_zone_page_state(zpdescs[i]); + __zpdesc_clear_zsmalloc(zpdescs[i]); + free_zpdesc(zpdescs[i]); } cache_free_zspage(pool, zspage); return NULL; } + __zpdesc_set_zsmalloc(zpdesc); - inc_zone_page_state(page, NR_ZSPAGES); - pages[i] = page; + zpdesc_inc_zone_page_state(zpdesc); + zpdescs[i] = zpdesc; } - create_page_chain(class, zspage, pages); + create_page_chain(class, zspage, zpdescs); init_zspage(class, zspage); zspage->pool = pool; + zspage->class = class->index; return zspage; } @@ -1051,13 +1057,12 @@ static inline void __zs_cpu_down(struct mapping_area *area) } static void *__zs_map_object(struct mapping_area *area, - struct page *pages[2], int off, int size) + struct zpdesc *zpdescs[2], int off, int size) { - int sizes[2]; - void *addr; + size_t sizes[2]; char *buf = area->vm_buf; - /* disable page faults to match kmap_atomic() return conditions */ + /* disable page faults to match kmap_local_page() return conditions */ pagefault_disable(); /* no read fastpath */ @@ -1068,21 +1073,16 @@ static void *__zs_map_object(struct mapping_area *area, sizes[1] = size - sizes[0]; /* copy object to per-cpu buffer */ - addr = kmap_atomic(pages[0]); - memcpy(buf, addr + off, sizes[0]); - kunmap_atomic(addr); - addr = kmap_atomic(pages[1]); - memcpy(buf + sizes[0], addr, sizes[1]); - kunmap_atomic(addr); + memcpy_from_page(buf, zpdesc_page(zpdescs[0]), off, sizes[0]); + memcpy_from_page(buf + sizes[0], zpdesc_page(zpdescs[1]), 0, sizes[1]); out: return area->vm_buf; } static void __zs_unmap_object(struct mapping_area *area, - struct page *pages[2], int off, int size) + struct zpdesc *zpdescs[2], int off, int size) { - int sizes[2]; - void *addr; + size_t sizes[2]; char *buf; /* no write fastpath */ @@ -1098,15 +1098,11 @@ static void __zs_unmap_object(struct mapping_area *area, sizes[1] = size - sizes[0]; /* copy per-cpu buffer to object */ - addr = kmap_atomic(pages[0]); - memcpy(addr + off, buf, sizes[0]); - kunmap_atomic(addr); - addr = kmap_atomic(pages[1]); - memcpy(addr, buf + sizes[0], sizes[1]); - kunmap_atomic(addr); + memcpy_to_page(zpdesc_page(zpdescs[0]), off, buf, sizes[0]); + memcpy_to_page(zpdesc_page(zpdescs[1]), 0, buf + sizes[0], sizes[1]); out: - /* enable page faults to match kunmap_atomic() return conditions */ + /* enable page faults to match kunmap_local() return conditions */ pagefault_enable(); } @@ -1193,13 +1189,13 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle, enum zs_mapmode mm) { struct zspage *zspage; - struct page *page; + struct zpdesc *zpdesc; unsigned long obj, off; unsigned int obj_idx; struct size_class *class; struct mapping_area *area; - struct page *pages[2]; + struct zpdesc *zpdescs[2]; void *ret; /* @@ -1210,19 +1206,19 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle, BUG_ON(in_interrupt()); /* It guarantees it can get zspage from handle safely */ - spin_lock(&pool->lock); + read_lock(&pool->migrate_lock); obj = handle_to_obj(handle); - obj_to_location(obj, &page, &obj_idx); - zspage = get_zspage(page); + obj_to_location(obj, &zpdesc, &obj_idx); + zspage = get_zspage(zpdesc); /* - * migration cannot move any zpages in this zspage. Here, pool->lock + * migration cannot move any zpages in this zspage. Here, class->lock * is too heavy since callers would take some time until they calls * zs_unmap_object API so delegate the locking from class to zspage * which is smaller granularity. */ migrate_read_lock(zspage); - spin_unlock(&pool->lock); + read_unlock(&pool->migrate_lock); class = zspage_class(pool, zspage); off = offset_in_page(class->size * obj_idx); @@ -1232,17 +1228,17 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle, area->vm_mm = mm; if (off + class->size <= PAGE_SIZE) { /* this object is contained entirely within a page */ - area->vm_addr = kmap_atomic(page); + area->vm_addr = kmap_local_zpdesc(zpdesc); ret = area->vm_addr + off; goto out; } /* this object spans two pages */ - pages[0] = page; - pages[1] = get_next_page(page); - BUG_ON(!pages[1]); + zpdescs[0] = zpdesc; + zpdescs[1] = get_next_zpdesc(zpdesc); + BUG_ON(!zpdescs[1]); - ret = __zs_map_object(area, pages, off, class->size); + ret = __zs_map_object(area, zpdescs, off, class->size); out: if (likely(!ZsHugePage(zspage))) ret += ZS_HANDLE_SIZE; @@ -1254,7 +1250,7 @@ EXPORT_SYMBOL_GPL(zs_map_object); void zs_unmap_object(struct zs_pool *pool, unsigned long handle) { struct zspage *zspage; - struct page *page; + struct zpdesc *zpdesc; unsigned long obj, off; unsigned int obj_idx; @@ -1262,22 +1258,22 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle) struct mapping_area *area; obj = handle_to_obj(handle); - obj_to_location(obj, &page, &obj_idx); - zspage = get_zspage(page); + obj_to_location(obj, &zpdesc, &obj_idx); + zspage = get_zspage(zpdesc); class = zspage_class(pool, zspage); off = offset_in_page(class->size * obj_idx); area = this_cpu_ptr(&zs_map_area); if (off + class->size <= PAGE_SIZE) - kunmap_atomic(area->vm_addr); + kunmap_local(area->vm_addr); else { - struct page *pages[2]; + struct zpdesc *zpdescs[2]; - pages[0] = page; - pages[1] = get_next_page(page); - BUG_ON(!pages[1]); + zpdescs[0] = zpdesc; + zpdescs[1] = get_next_zpdesc(zpdesc); + BUG_ON(!zpdescs[1]); - __zs_unmap_object(area, pages, off, class->size); + __zs_unmap_object(area, zpdescs, off, class->size); } local_unlock(&zs_map_area.lock); @@ -1307,41 +1303,40 @@ EXPORT_SYMBOL_GPL(zs_huge_class_size); static unsigned long obj_malloc(struct zs_pool *pool, struct zspage *zspage, unsigned long handle) { - int i, nr_page, offset; + int i, nr_zpdesc, offset; unsigned long obj; struct link_free *link; struct size_class *class; - struct page *m_page; + struct zpdesc *m_zpdesc; unsigned long m_offset; void *vaddr; class = pool->size_class[zspage->class]; - handle |= OBJ_ALLOCATED_TAG; obj = get_freeobj(zspage); offset = obj * class->size; - nr_page = offset >> PAGE_SHIFT; + nr_zpdesc = offset >> PAGE_SHIFT; m_offset = offset_in_page(offset); - m_page = get_first_page(zspage); + m_zpdesc = get_first_zpdesc(zspage); - for (i = 0; i < nr_page; i++) - m_page = get_next_page(m_page); + for (i = 0; i < nr_zpdesc; i++) + m_zpdesc = get_next_zpdesc(m_zpdesc); - vaddr = kmap_atomic(m_page); + vaddr = kmap_local_zpdesc(m_zpdesc); link = (struct link_free *)vaddr + m_offset / sizeof(*link); set_freeobj(zspage, link->next >> OBJ_TAG_BITS); if (likely(!ZsHugePage(zspage))) /* record handle in the header of allocated chunk */ - link->handle = handle; + link->handle = handle | OBJ_ALLOCATED_TAG; else - /* record handle to page->index */ - zspage->first_page->index = handle; + zspage->first_zpdesc->handle = handle | OBJ_ALLOCATED_TAG; - kunmap_atomic(vaddr); + kunmap_local(vaddr); mod_zspage_inuse(zspage, 1); - obj = location_to_obj(m_page, obj); + obj = location_to_obj(m_zpdesc, obj); + record_obj(handle, obj); return obj; } @@ -1359,7 +1354,7 @@ static unsigned long obj_malloc(struct zs_pool *pool, */ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp) { - unsigned long handle, obj; + unsigned long handle; struct size_class *class; int newfg; struct zspage *zspage; @@ -1378,20 +1373,19 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp) size += ZS_HANDLE_SIZE; class = pool->size_class[get_size_class_index(size)]; - /* pool->lock effectively protects the zpage migration */ - spin_lock(&pool->lock); + /* class->lock effectively protects the zpage migration */ + spin_lock(&class->lock); zspage = find_get_zspage(class); if (likely(zspage)) { - obj = obj_malloc(pool, zspage, handle); + obj_malloc(pool, zspage, handle); /* Now move the zspage to another fullness group, if required */ fix_fullness_group(class, zspage); - record_obj(handle, obj); - class_stat_inc(class, ZS_OBJS_INUSE, 1); + class_stat_add(class, ZS_OBJS_INUSE, 1); goto out; } - spin_unlock(&pool->lock); + spin_unlock(&class->lock); zspage = alloc_zspage(pool, class, gfp); if (!zspage) { @@ -1399,20 +1393,18 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp) return (unsigned long)ERR_PTR(-ENOMEM); } - spin_lock(&pool->lock); - obj = obj_malloc(pool, zspage, handle); + spin_lock(&class->lock); + obj_malloc(pool, zspage, handle); newfg = get_fullness_group(class, zspage); insert_zspage(class, zspage, newfg); - set_zspage_mapping(zspage, class->index, newfg); - record_obj(handle, obj); atomic_long_add(class->pages_per_zspage, &pool->pages_allocated); - class_stat_inc(class, ZS_OBJS_ALLOCATED, class->objs_per_zspage); - class_stat_inc(class, ZS_OBJS_INUSE, 1); + class_stat_add(class, ZS_OBJS_ALLOCATED, class->objs_per_zspage); + class_stat_add(class, ZS_OBJS_INUSE, 1); /* We completely set up zspage so mark them as movable */ SetZsPageMovable(pool, zspage); out: - spin_unlock(&pool->lock); + spin_unlock(&class->lock); return handle; } @@ -1422,33 +1414,34 @@ static void obj_free(int class_size, unsigned long obj) { struct link_free *link; struct zspage *zspage; - struct page *f_page; + struct zpdesc *f_zpdesc; unsigned long f_offset; unsigned int f_objidx; void *vaddr; - obj_to_location(obj, &f_page, &f_objidx); + + obj_to_location(obj, &f_zpdesc, &f_objidx); f_offset = offset_in_page(class_size * f_objidx); - zspage = get_zspage(f_page); + zspage = get_zspage(f_zpdesc); - vaddr = kmap_atomic(f_page); + vaddr = kmap_local_zpdesc(f_zpdesc); link = (struct link_free *)(vaddr + f_offset); /* Insert this object in containing zspage's freelist */ if (likely(!ZsHugePage(zspage))) link->next = get_freeobj(zspage) << OBJ_TAG_BITS; else - f_page->index = 0; + f_zpdesc->handle = 0; set_freeobj(zspage, f_objidx); - kunmap_atomic(vaddr); + kunmap_local(vaddr); mod_zspage_inuse(zspage, -1); } void zs_free(struct zs_pool *pool, unsigned long handle) { struct zspage *zspage; - struct page *f_page; + struct zpdesc *f_zpdesc; unsigned long obj; struct size_class *class; int fullness; @@ -1457,23 +1450,25 @@ void zs_free(struct zs_pool *pool, unsigned long handle) return; /* - * The pool->lock protects the race with zpage's migration + * The pool->migrate_lock protects the race with zpage's migration * so it's safe to get the page from handle. */ - spin_lock(&pool->lock); + read_lock(&pool->migrate_lock); obj = handle_to_obj(handle); - obj_to_page(obj, &f_page); - zspage = get_zspage(f_page); + obj_to_zpdesc(obj, &f_zpdesc); + zspage = get_zspage(f_zpdesc); class = zspage_class(pool, zspage); + spin_lock(&class->lock); + read_unlock(&pool->migrate_lock); - class_stat_dec(class, ZS_OBJS_INUSE, 1); + class_stat_sub(class, ZS_OBJS_INUSE, 1); obj_free(class->size, obj); fullness = fix_fullness_group(class, zspage); if (fullness == ZS_INUSE_RATIO_0) free_zspage(pool, class, zspage); - spin_unlock(&pool->lock); + spin_unlock(&class->lock); cache_free_handle(pool, handle); } EXPORT_SYMBOL_GPL(zs_free); @@ -1481,7 +1476,7 @@ EXPORT_SYMBOL_GPL(zs_free); static void zs_object_copy(struct size_class *class, unsigned long dst, unsigned long src) { - struct page *s_page, *d_page; + struct zpdesc *s_zpdesc, *d_zpdesc; unsigned int s_objidx, d_objidx; unsigned long s_off, d_off; void *s_addr, *d_addr; @@ -1490,8 +1485,8 @@ static void zs_object_copy(struct size_class *class, unsigned long dst, s_size = d_size = class->size; - obj_to_location(src, &s_page, &s_objidx); - obj_to_location(dst, &d_page, &d_objidx); + obj_to_location(src, &s_zpdesc, &s_objidx); + obj_to_location(dst, &d_zpdesc, &d_objidx); s_off = offset_in_page(class->size * s_objidx); d_off = offset_in_page(class->size * d_objidx); @@ -1502,8 +1497,8 @@ static void zs_object_copy(struct size_class *class, unsigned long dst, if (d_off + class->size > PAGE_SIZE) d_size = PAGE_SIZE - d_off; - s_addr = kmap_atomic(s_page); - d_addr = kmap_atomic(d_page); + s_addr = kmap_local_zpdesc(s_zpdesc); + d_addr = kmap_local_zpdesc(d_zpdesc); while (1) { size = min(s_size, d_size); @@ -1519,33 +1514,33 @@ static void zs_object_copy(struct size_class *class, unsigned long dst, d_size -= size; /* - * Calling kunmap_atomic(d_addr) is necessary. kunmap_atomic() - * calls must occurs in reverse order of calls to kmap_atomic(). - * So, to call kunmap_atomic(s_addr) we should first call - * kunmap_atomic(d_addr). For more details see + * Calling kunmap_local(d_addr) is necessary. kunmap_local() + * calls must occurs in reverse order of calls to kmap_local_page(). + * So, to call kunmap_local(s_addr) we should first call + * kunmap_local(d_addr). For more details see * Documentation/mm/highmem.rst. */ if (s_off >= PAGE_SIZE) { - kunmap_atomic(d_addr); - kunmap_atomic(s_addr); - s_page = get_next_page(s_page); - s_addr = kmap_atomic(s_page); - d_addr = kmap_atomic(d_page); + kunmap_local(d_addr); + kunmap_local(s_addr); + s_zpdesc = get_next_zpdesc(s_zpdesc); + s_addr = kmap_local_zpdesc(s_zpdesc); + d_addr = kmap_local_zpdesc(d_zpdesc); s_size = class->size - written; s_off = 0; } if (d_off >= PAGE_SIZE) { - kunmap_atomic(d_addr); - d_page = get_next_page(d_page); - d_addr = kmap_atomic(d_page); + kunmap_local(d_addr); + d_zpdesc = get_next_zpdesc(d_zpdesc); + d_addr = kmap_local_zpdesc(d_zpdesc); d_size = class->size - written; d_off = 0; } } - kunmap_atomic(d_addr); - kunmap_atomic(s_addr); + kunmap_local(d_addr); + kunmap_local(s_addr); } /* @@ -1553,25 +1548,25 @@ static void zs_object_copy(struct size_class *class, unsigned long dst, * return handle. */ static unsigned long find_alloced_obj(struct size_class *class, - struct page *page, int *obj_idx) + struct zpdesc *zpdesc, int *obj_idx) { unsigned int offset; int index = *obj_idx; unsigned long handle = 0; - void *addr = kmap_atomic(page); + void *addr = kmap_local_zpdesc(zpdesc); - offset = get_first_obj_offset(page); + offset = get_first_obj_offset(zpdesc); offset += class->size * index; while (offset < PAGE_SIZE) { - if (obj_allocated(page, addr + offset, &handle)) + if (obj_allocated(zpdesc, addr + offset, &handle)) break; offset += class->size; index++; } - kunmap_atomic(addr); + kunmap_local(addr); *obj_idx = index; @@ -1584,14 +1579,14 @@ static void migrate_zspage(struct zs_pool *pool, struct zspage *src_zspage, unsigned long used_obj, free_obj; unsigned long handle; int obj_idx = 0; - struct page *s_page = get_first_page(src_zspage); + struct zpdesc *s_zpdesc = get_first_zpdesc(src_zspage); struct size_class *class = pool->size_class[src_zspage->class]; while (1) { - handle = find_alloced_obj(class, s_page, &obj_idx); + handle = find_alloced_obj(class, s_zpdesc, &obj_idx); if (!handle) { - s_page = get_next_page(s_page); - if (!s_page) + s_zpdesc = get_next_zpdesc(s_zpdesc); + if (!s_zpdesc) break; obj_idx = 0; continue; @@ -1601,7 +1596,6 @@ static void migrate_zspage(struct zs_pool *pool, struct zspage *src_zspage, free_obj = obj_malloc(pool, dst_zspage, handle); zs_object_copy(class, free_obj, used_obj); obj_idx++; - record_obj(handle, free_obj); obj_free(class->size, used_obj); /* Stop if there is no more space */ @@ -1623,7 +1617,7 @@ static struct zspage *isolate_src_zspage(struct size_class *class) zspage = list_first_entry_or_null(&class->fullness_list[fg], struct zspage, list); if (zspage) { - remove_zspage(class, zspage, fg); + remove_zspage(class, zspage); return zspage; } } @@ -1640,7 +1634,7 @@ static struct zspage *isolate_dst_zspage(struct size_class *class) zspage = list_first_entry_or_null(&class->fullness_list[fg], struct zspage, list); if (zspage) { - remove_zspage(class, zspage, fg); + remove_zspage(class, zspage); return zspage; } } @@ -1661,7 +1655,6 @@ static int putback_zspage(struct size_class *class, struct zspage *zspage) fullness = get_fullness_group(class, zspage); insert_zspage(class, zspage, fullness); - set_zspage_mapping(zspage, class->index, fullness); return fullness; } @@ -1673,7 +1666,7 @@ static int putback_zspage(struct size_class *class, struct zspage *zspage) */ static void lock_zspage(struct zspage *zspage) { - struct page *curr_page, *page; + struct zpdesc *curr_zpdesc, *zpdesc; /* * Pages we haven't locked yet can be migrated off the list while we're @@ -1685,24 +1678,24 @@ static void lock_zspage(struct zspage *zspage) */ while (1) { migrate_read_lock(zspage); - page = get_first_page(zspage); - if (trylock_page(page)) + zpdesc = get_first_zpdesc(zspage); + if (zpdesc_trylock(zpdesc)) break; - get_page(page); + zpdesc_get(zpdesc); migrate_read_unlock(zspage); - wait_on_page_locked(page); - put_page(page); + zpdesc_wait_locked(zpdesc); + zpdesc_put(zpdesc); } - curr_page = page; - while ((page = get_next_page(curr_page))) { - if (trylock_page(page)) { - curr_page = page; + curr_zpdesc = zpdesc; + while ((zpdesc = get_next_zpdesc(curr_zpdesc))) { + if (zpdesc_trylock(zpdesc)) { + curr_zpdesc = zpdesc; } else { - get_page(page); + zpdesc_get(zpdesc); migrate_read_unlock(zspage); - wait_on_page_locked(page); - put_page(page); + zpdesc_wait_locked(zpdesc); + zpdesc_put(zpdesc); migrate_read_lock(zspage); } } @@ -1725,76 +1718,53 @@ static void migrate_read_unlock(struct zspage *zspage) __releases(&zspage->lock) read_unlock(&zspage->lock); } -#ifdef CONFIG_COMPACTION static void migrate_write_lock(struct zspage *zspage) { write_lock(&zspage->lock); } -static void migrate_write_lock_nested(struct zspage *zspage) -{ - write_lock_nested(&zspage->lock, SINGLE_DEPTH_NESTING); -} - static void migrate_write_unlock(struct zspage *zspage) { write_unlock(&zspage->lock); } -/* Number of isolated subpage for *page migration* in this zspage */ -static void inc_zspage_isolation(struct zspage *zspage) -{ - zspage->isolated++; -} - -static void dec_zspage_isolation(struct zspage *zspage) -{ - VM_BUG_ON(zspage->isolated == 0); - zspage->isolated--; -} +#ifdef CONFIG_COMPACTION static const struct movable_operations zsmalloc_mops; static void replace_sub_page(struct size_class *class, struct zspage *zspage, - struct page *newpage, struct page *oldpage) + struct zpdesc *newzpdesc, struct zpdesc *oldzpdesc) { - struct page *page; - struct page *pages[ZS_MAX_PAGES_PER_ZSPAGE] = {NULL, }; + struct zpdesc *zpdesc; + struct zpdesc *zpdescs[ZS_MAX_PAGES_PER_ZSPAGE] = {NULL, }; + unsigned int first_obj_offset; int idx = 0; - page = get_first_page(zspage); + zpdesc = get_first_zpdesc(zspage); do { - if (page == oldpage) - pages[idx] = newpage; + if (zpdesc == oldzpdesc) + zpdescs[idx] = newzpdesc; else - pages[idx] = page; + zpdescs[idx] = zpdesc; idx++; - } while ((page = get_next_page(page)) != NULL); + } while ((zpdesc = get_next_zpdesc(zpdesc)) != NULL); - create_page_chain(class, zspage, pages); - set_first_obj_offset(newpage, get_first_obj_offset(oldpage)); + create_page_chain(class, zspage, zpdescs); + first_obj_offset = get_first_obj_offset(oldzpdesc); + set_first_obj_offset(newzpdesc, first_obj_offset); if (unlikely(ZsHugePage(zspage))) - newpage->index = oldpage->index; - __SetPageMovable(newpage, &zsmalloc_mops); + newzpdesc->handle = oldzpdesc->handle; + __zpdesc_set_movable(newzpdesc, &zsmalloc_mops); } static bool zs_page_isolate(struct page *page, isolate_mode_t mode) { - struct zs_pool *pool; - struct zspage *zspage; - /* * Page is locked so zspage couldn't be destroyed. For detail, look at * lock_zspage in free_zspage. */ VM_BUG_ON_PAGE(PageIsolated(page), page); - zspage = get_zspage(page); - pool = zspage->pool; - spin_lock(&pool->lock); - inc_zspage_isolation(zspage); - spin_unlock(&pool->lock); - return true; } @@ -1804,93 +1774,84 @@ static int zs_page_migrate(struct page *newpage, struct page *page, struct zs_pool *pool; struct size_class *class; struct zspage *zspage; - struct page *dummy; + struct zpdesc *dummy; + struct zpdesc *newzpdesc = page_zpdesc(newpage); + struct zpdesc *zpdesc = page_zpdesc(page); void *s_addr, *d_addr, *addr; unsigned int offset; unsigned long handle; unsigned long old_obj, new_obj; unsigned int obj_idx; - /* - * We cannot support the _NO_COPY case here, because copy needs to - * happen under the zs lock, which does not work with - * MIGRATE_SYNC_NO_COPY workflow. - */ - if (mode == MIGRATE_SYNC_NO_COPY) - return -EINVAL; + VM_BUG_ON_PAGE(!zpdesc_is_isolated(zpdesc), zpdesc_page(zpdesc)); - VM_BUG_ON_PAGE(!PageIsolated(page), page); + /* We're committed, tell the world that this is a Zsmalloc page. */ + __zpdesc_set_zsmalloc(newzpdesc); /* The page is locked, so this pointer must remain valid */ - zspage = get_zspage(page); + zspage = get_zspage(zpdesc); pool = zspage->pool; /* - * The pool's lock protects the race between zpage migration + * The pool migrate_lock protects the race between zpage migration * and zs_free. */ - spin_lock(&pool->lock); + write_lock(&pool->migrate_lock); class = zspage_class(pool, zspage); + /* + * the class lock protects zpage alloc/free in the zspage. + */ + spin_lock(&class->lock); /* the migrate_write_lock protects zpage access via zs_map_object */ migrate_write_lock(zspage); - offset = get_first_obj_offset(page); - s_addr = kmap_atomic(page); + offset = get_first_obj_offset(zpdesc); + s_addr = kmap_local_zpdesc(zpdesc); /* * Here, any user cannot access all objects in the zspage so let's move. */ - d_addr = kmap_atomic(newpage); + d_addr = kmap_local_zpdesc(newzpdesc); copy_page(d_addr, s_addr); - kunmap_atomic(d_addr); + kunmap_local(d_addr); for (addr = s_addr + offset; addr < s_addr + PAGE_SIZE; addr += class->size) { - if (obj_allocated(page, addr, &handle)) { + if (obj_allocated(zpdesc, addr, &handle)) { old_obj = handle_to_obj(handle); obj_to_location(old_obj, &dummy, &obj_idx); - new_obj = (unsigned long)location_to_obj(newpage, - obj_idx); + new_obj = (unsigned long)location_to_obj(newzpdesc, obj_idx); record_obj(handle, new_obj); } } - kunmap_atomic(s_addr); + kunmap_local(s_addr); - replace_sub_page(class, zspage, newpage, page); - dec_zspage_isolation(zspage); + replace_sub_page(class, zspage, newzpdesc, zpdesc); /* * Since we complete the data copy and set up new zspage structure, - * it's okay to release the pool's lock. + * it's okay to release migration_lock. */ - spin_unlock(&pool->lock); + write_unlock(&pool->migrate_lock); + spin_unlock(&class->lock); migrate_write_unlock(zspage); - get_page(newpage); - if (page_zone(newpage) != page_zone(page)) { - dec_zone_page_state(page, NR_ZSPAGES); - inc_zone_page_state(newpage, NR_ZSPAGES); + zpdesc_get(newzpdesc); + if (zpdesc_zone(newzpdesc) != zpdesc_zone(zpdesc)) { + zpdesc_dec_zone_page_state(zpdesc); + zpdesc_inc_zone_page_state(newzpdesc); } - reset_page(page); - put_page(page); + reset_zpdesc(zpdesc); + zpdesc_put(zpdesc); return MIGRATEPAGE_SUCCESS; } static void zs_page_putback(struct page *page) { - struct zs_pool *pool; - struct zspage *zspage; - VM_BUG_ON_PAGE(!PageIsolated(page), page); - - zspage = get_zspage(page); - pool = zspage->pool; - spin_lock(&pool->lock); - dec_zspage_isolation(zspage); - spin_unlock(&pool->lock); } static const struct movable_operations zsmalloc_mops = { @@ -1907,8 +1868,6 @@ static void async_free_zspage(struct work_struct *work) { int i; struct size_class *class; - unsigned int class_idx; - int fullness; struct zspage *zspage, *tmp; LIST_HEAD(free_pages); struct zs_pool *pool = container_of(work, struct zs_pool, @@ -1919,22 +1878,21 @@ static void async_free_zspage(struct work_struct *work) if (class->index != i) continue; - spin_lock(&pool->lock); + spin_lock(&class->lock); list_splice_init(&class->fullness_list[ZS_INUSE_RATIO_0], &free_pages); - spin_unlock(&pool->lock); + spin_unlock(&class->lock); } list_for_each_entry_safe(zspage, tmp, &free_pages, list) { list_del(&zspage->list); lock_zspage(zspage); - get_zspage_mapping(zspage, &class_idx, &fullness); - VM_BUG_ON(fullness != ZS_INUSE_RATIO_0); - class = pool->size_class[class_idx]; - spin_lock(&pool->lock); + class = zspage_class(pool, zspage); + spin_lock(&class->lock); + class_stat_sub(class, ZS_INUSE_RATIO_0, 1); __free_zspage(pool, class, zspage); - spin_unlock(&pool->lock); + spin_unlock(&class->lock); } }; @@ -1955,13 +1913,13 @@ static void init_deferred_free(struct zs_pool *pool) static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage) { - struct page *page = get_first_page(zspage); + struct zpdesc *zpdesc = get_first_zpdesc(zspage); do { - WARN_ON(!trylock_page(page)); - __SetPageMovable(page, &zsmalloc_mops); - unlock_page(page); - } while ((page = get_next_page(page)) != NULL); + WARN_ON(!zpdesc_trylock(zpdesc)); + __zpdesc_set_movable(zpdesc, &zsmalloc_mops); + zpdesc_unlock(zpdesc); + } while ((zpdesc = get_next_zpdesc(zpdesc)) != NULL); } #else static inline void zs_flush_migration(struct zs_pool *pool) { } @@ -1975,8 +1933,8 @@ static inline void zs_flush_migration(struct zs_pool *pool) { } static unsigned long zs_can_compact(struct size_class *class) { unsigned long obj_wasted; - unsigned long obj_allocated = zs_stat_get(class, ZS_OBJS_ALLOCATED); - unsigned long obj_used = zs_stat_get(class, ZS_OBJS_INUSE); + unsigned long obj_allocated = class_stat_read(class, ZS_OBJS_ALLOCATED); + unsigned long obj_used = class_stat_read(class, ZS_OBJS_INUSE); if (obj_allocated <= obj_used) return 0; @@ -1998,7 +1956,8 @@ static unsigned long __zs_compact(struct zs_pool *pool, * protect the race between zpage migration and zs_free * as well as zpage allocation/free */ - spin_lock(&pool->lock); + write_lock(&pool->migrate_lock); + spin_lock(&class->lock); while (zs_can_compact(class)) { int fg; @@ -2006,19 +1965,17 @@ static unsigned long __zs_compact(struct zs_pool *pool, dst_zspage = isolate_dst_zspage(class); if (!dst_zspage) break; - migrate_write_lock(dst_zspage); } src_zspage = isolate_src_zspage(class); if (!src_zspage) break; - migrate_write_lock_nested(src_zspage); - + migrate_write_lock(src_zspage); migrate_zspage(pool, src_zspage, dst_zspage); - fg = putback_zspage(class, src_zspage); migrate_write_unlock(src_zspage); + fg = putback_zspage(class, src_zspage); if (fg == ZS_INUSE_RATIO_0) { free_zspage(pool, class, src_zspage); pages_freed += class->pages_per_zspage; @@ -2026,27 +1983,26 @@ static unsigned long __zs_compact(struct zs_pool *pool, src_zspage = NULL; if (get_fullness_group(class, dst_zspage) == ZS_INUSE_RATIO_100 - || spin_is_contended(&pool->lock)) { + || rwlock_is_contended(&pool->migrate_lock)) { putback_zspage(class, dst_zspage); - migrate_write_unlock(dst_zspage); dst_zspage = NULL; - spin_unlock(&pool->lock); + spin_unlock(&class->lock); + write_unlock(&pool->migrate_lock); cond_resched(); - spin_lock(&pool->lock); + write_lock(&pool->migrate_lock); + spin_lock(&class->lock); } } - if (src_zspage) { + if (src_zspage) putback_zspage(class, src_zspage); - migrate_write_unlock(src_zspage); - } - if (dst_zspage) { + if (dst_zspage) putback_zspage(class, dst_zspage); - migrate_write_unlock(dst_zspage); - } - spin_unlock(&pool->lock); + + spin_unlock(&class->lock); + write_unlock(&pool->migrate_lock); return pages_freed; } @@ -2058,10 +2014,10 @@ unsigned long zs_compact(struct zs_pool *pool) unsigned long pages_freed = 0; /* - * Pool compaction is performed under pool->lock so it is basically + * Pool compaction is performed under pool->migrate_lock so it is basically * single-threaded. Having more than one thread in __zs_compact() - * will increase pool->lock contention, which will impact other - * zsmalloc operations that need pool->lock. + * will increase pool->migrate_lock contention, which will impact other + * zsmalloc operations that need pool->migrate_lock. */ if (atomic_xchg(&pool->compaction_in_progress, 1)) return 0; @@ -2183,7 +2139,7 @@ struct zs_pool *zs_create_pool(const char *name) return NULL; init_deferred_free(pool); - spin_lock_init(&pool->lock); + rwlock_init(&pool->migrate_lock); atomic_set(&pool->compaction_in_progress, 0); pool->name = kstrdup(name, GFP_KERNEL); @@ -2255,6 +2211,7 @@ struct zs_pool *zs_create_pool(const char *name) class->index = i; class->pages_per_zspage = pages_per_zspage; class->objs_per_zspage = objs_per_zspage; + spin_lock_init(&class->lock); pool->size_class[i] = class; fullness = ZS_INUSE_RATIO_0; @@ -2355,3 +2312,4 @@ module_exit(zs_exit); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>"); +MODULE_DESCRIPTION("zsmalloc memory allocator"); |