diff options
Diffstat (limited to 'tools/mm')
-rw-r--r-- | tools/mm/Makefile | 4 | ||||
-rw-r--r-- | tools/mm/page-types.c | 26 | ||||
-rw-r--r-- | tools/mm/page_owner_sort.c | 1 | ||||
-rw-r--r-- | tools/mm/slabinfo.c | 14 | ||||
-rw-r--r-- | tools/mm/thp_swap_allocator_test.c | 234 |
5 files changed, 258 insertions, 21 deletions
diff --git a/tools/mm/Makefile b/tools/mm/Makefile index 7bb03606b9ea..f5725b5c23aa 100644 --- a/tools/mm/Makefile +++ b/tools/mm/Makefile @@ -3,7 +3,7 @@ # include ../scripts/Makefile.include -BUILD_TARGETS=page-types slabinfo page_owner_sort +BUILD_TARGETS=page-types slabinfo page_owner_sort thp_swap_allocator_test INSTALL_TARGETS = $(BUILD_TARGETS) thpmaps LIB_DIR = ../lib/api @@ -23,7 +23,7 @@ $(LIBS): $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) clean: - $(RM) page-types slabinfo page_owner_sort + $(RM) page-types slabinfo page_owner_sort thp_swap_allocator_test make -C $(LIB_DIR) clean sbindir ?= /usr/sbin diff --git a/tools/mm/page-types.c b/tools/mm/page-types.c index 8d5595b6c59f..d7e5e8902af8 100644 --- a/tools/mm/page-types.c +++ b/tools/mm/page-types.c @@ -22,9 +22,10 @@ #include <time.h> #include <setjmp.h> #include <signal.h> +#include <inttypes.h> #include <sys/types.h> -#include <sys/errno.h> -#include <sys/fcntl.h> +#include <errno.h> +#include <fcntl.h> #include <sys/mount.h> #include <sys/statfs.h> #include <sys/mman.h> @@ -71,12 +72,12 @@ /* [32-] kernel hacking assistances */ #define KPF_RESERVED 32 #define KPF_MLOCKED 33 -#define KPF_MAPPEDTODISK 34 +#define KPF_OWNER_2 34 #define KPF_PRIVATE 35 #define KPF_PRIVATE_2 36 #define KPF_OWNER_PRIVATE 37 #define KPF_ARCH 38 -#define KPF_UNCACHED 39 +#define KPF_UNCACHED 39 /* unused */ #define KPF_SOFTDIRTY 40 #define KPF_ARCH_2 41 @@ -129,12 +130,11 @@ static const char * const page_flag_names[] = { [KPF_RESERVED] = "r:reserved", [KPF_MLOCKED] = "m:mlocked", - [KPF_MAPPEDTODISK] = "d:mappedtodisk", + [KPF_OWNER_2] = "d:owner_2", [KPF_PRIVATE] = "P:private", [KPF_PRIVATE_2] = "p:private_2", [KPF_OWNER_PRIVATE] = "O:owner_private", [KPF_ARCH] = "h:arch", - [KPF_UNCACHED] = "c:uncached", [KPF_SOFTDIRTY] = "f:softdirty", [KPF_ARCH_2] = "H:arch_2", @@ -392,9 +392,9 @@ static void show_page_range(unsigned long voffset, unsigned long offset, if (opt_file) printf("%lx\t", voff); if (opt_list_cgroup) - printf("@%llu\t", (unsigned long long)cgroup0); + printf("@%" PRIu64 "\t", cgroup0); if (opt_list_mapcnt) - printf("%lu\t", mapcnt0); + printf("%" PRIu64 "\t", mapcnt0); printf("%lx\t%lx\t%s\n", index, count, page_flag_name(flags0)); } @@ -420,9 +420,9 @@ static void show_page(unsigned long voffset, unsigned long offset, if (opt_file) printf("%lx\t", voffset); if (opt_list_cgroup) - printf("@%llu\t", (unsigned long long)cgroup); + printf("@%" PRIu64 "\t", cgroup); if (opt_list_mapcnt) - printf("%lu\t", mapcnt); + printf("%" PRIu64 "\t", mapcnt); printf("%lx\t%s\n", offset, page_flag_name(flags)); } @@ -472,9 +472,9 @@ static int bit_mask_ok(uint64_t flags) static uint64_t expand_overloaded_flags(uint64_t flags, uint64_t pme) { - /* Anonymous pages overload PG_mappedtodisk */ - if ((flags & BIT(ANON)) && (flags & BIT(MAPPEDTODISK))) - flags ^= BIT(MAPPEDTODISK) | BIT(ANON_EXCLUSIVE); + /* Anonymous pages use PG_owner_2 for anon_exclusive */ + if ((flags & BIT(ANON)) && (flags & BIT(OWNER_2))) + flags ^= BIT(OWNER_2) | BIT(ANON_EXCLUSIVE); /* SLUB overloads several page flags */ if (flags & BIT(SLAB)) { diff --git a/tools/mm/page_owner_sort.c b/tools/mm/page_owner_sort.c index e1f264444342..880e36df0c11 100644 --- a/tools/mm/page_owner_sort.c +++ b/tools/mm/page_owner_sort.c @@ -377,6 +377,7 @@ static char *get_comm(char *buf) if (errno != 0) { if (debug_on) fprintf(stderr, "wrong comm in follow buf:\n%s\n", buf); + free(comm_str); return NULL; } diff --git a/tools/mm/slabinfo.c b/tools/mm/slabinfo.c index cfaeaea71042..1433eff99feb 100644 --- a/tools/mm/slabinfo.c +++ b/tools/mm/slabinfo.c @@ -21,7 +21,7 @@ #include <regex.h> #include <errno.h> -#define MAX_SLABS 500 +#define MAX_SLABS 2000 #define MAX_ALIASES 500 #define MAX_NODES 1024 @@ -1228,6 +1228,8 @@ static void read_slab_dir(void) continue; switch (de->d_type) { case DT_LNK: + if (alias - aliasinfo == MAX_ALIASES) + fatal("Too many aliases\n"); alias->name = strdup(de->d_name); count = readlink(de->d_name, buffer, sizeof(buffer)-1); @@ -1242,6 +1244,8 @@ static void read_slab_dir(void) alias++; break; case DT_DIR: + if (slab - slabinfo == MAX_SLABS) + fatal("Too many slabs\n"); if (chdir(de->d_name)) fatal("Unable to access slab %s\n", slab->name); slab->name = strdup(de->d_name); @@ -1297,7 +1301,9 @@ static void read_slab_dir(void) slab->cpu_partial_free = get_obj("cpu_partial_free"); slab->alloc_node_mismatch = get_obj("alloc_node_mismatch"); slab->deactivate_bypass = get_obj("deactivate_bypass"); - chdir(".."); + if (chdir("..")) + fatal("Unable to chdir from slab ../%s\n", + slab->name); if (slab->name[0] == ':') alias_targets++; slab++; @@ -1310,10 +1316,6 @@ static void read_slab_dir(void) slabs = slab - slabinfo; actual_slabs = slabs; aliases = alias - aliasinfo; - if (slabs > MAX_SLABS) - fatal("Too many slabs\n"); - if (aliases > MAX_ALIASES) - fatal("Too many aliases\n"); } static void output_slabs(void) diff --git a/tools/mm/thp_swap_allocator_test.c b/tools/mm/thp_swap_allocator_test.c new file mode 100644 index 000000000000..83afc52275a5 --- /dev/null +++ b/tools/mm/thp_swap_allocator_test.c @@ -0,0 +1,234 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * thp_swap_allocator_test + * + * The purpose of this test program is helping check if THP swpout + * can correctly get swap slots to swap out as a whole instead of + * being split. It randomly releases swap entries through madvise + * DONTNEED and swapin/out on two memory areas: a memory area for + * 64KB THP and the other area for small folios. The second memory + * can be enabled by "-s". + * Before running the program, we need to setup a zRAM or similar + * swap device by: + * echo lzo > /sys/block/zram0/comp_algorithm + * echo 64M > /sys/block/zram0/disksize + * echo never > /sys/kernel/mm/transparent_hugepage/hugepages-2048kB/enabled + * echo always > /sys/kernel/mm/transparent_hugepage/hugepages-64kB/enabled + * mkswap /dev/zram0 + * swapon /dev/zram0 + * The expected result should be 0% anon swpout fallback ratio w/ or + * w/o "-s". + * + * Author(s): Barry Song <v-songbaohua@oppo.com> + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> +#include <linux/mman.h> +#include <sys/mman.h> +#include <errno.h> +#include <time.h> + +#define MEMSIZE_MTHP (60 * 1024 * 1024) +#define MEMSIZE_SMALLFOLIO (4 * 1024 * 1024) +#define ALIGNMENT_MTHP (64 * 1024) +#define ALIGNMENT_SMALLFOLIO (4 * 1024) +#define TOTAL_DONTNEED_MTHP (16 * 1024 * 1024) +#define TOTAL_DONTNEED_SMALLFOLIO (1 * 1024 * 1024) +#define MTHP_FOLIO_SIZE (64 * 1024) + +#define SWPOUT_PATH \ + "/sys/kernel/mm/transparent_hugepage/hugepages-64kB/stats/swpout" +#define SWPOUT_FALLBACK_PATH \ + "/sys/kernel/mm/transparent_hugepage/hugepages-64kB/stats/swpout_fallback" + +static void *aligned_alloc_mem(size_t size, size_t alignment) +{ + void *mem = NULL; + + if (posix_memalign(&mem, alignment, size) != 0) { + perror("posix_memalign"); + return NULL; + } + return mem; +} + +/* + * This emulates the behavior of native libc and Java heap, + * as well as process exit and munmap. It helps generate mTHP + * and ensures that iterations can proceed with mTHP, as we + * currently don't support large folios swap-in. + */ +static void random_madvise_dontneed(void *mem, size_t mem_size, + size_t align_size, size_t total_dontneed_size) +{ + size_t num_pages = total_dontneed_size / align_size; + size_t i; + size_t offset; + void *addr; + + for (i = 0; i < num_pages; ++i) { + offset = (rand() % (mem_size / align_size)) * align_size; + addr = (char *)mem + offset; + if (madvise(addr, align_size, MADV_DONTNEED) != 0) + perror("madvise dontneed"); + + memset(addr, 0x11, align_size); + } +} + +static void random_swapin(void *mem, size_t mem_size, + size_t align_size, size_t total_swapin_size) +{ + size_t num_pages = total_swapin_size / align_size; + size_t i; + size_t offset; + void *addr; + + for (i = 0; i < num_pages; ++i) { + offset = (rand() % (mem_size / align_size)) * align_size; + addr = (char *)mem + offset; + memset(addr, 0x11, align_size); + } +} + +static unsigned long read_stat(const char *path) +{ + FILE *file; + unsigned long value; + + file = fopen(path, "r"); + if (!file) { + perror("fopen"); + return 0; + } + + if (fscanf(file, "%lu", &value) != 1) { + perror("fscanf"); + fclose(file); + return 0; + } + + fclose(file); + return value; +} + +int main(int argc, char *argv[]) +{ + int use_small_folio = 0, aligned_swapin = 0; + void *mem1 = NULL, *mem2 = NULL; + int i; + + for (i = 1; i < argc; ++i) { + if (strcmp(argv[i], "-s") == 0) + use_small_folio = 1; + else if (strcmp(argv[i], "-a") == 0) + aligned_swapin = 1; + } + + mem1 = aligned_alloc_mem(MEMSIZE_MTHP, ALIGNMENT_MTHP); + if (mem1 == NULL) { + fprintf(stderr, "Failed to allocate large folios memory\n"); + return EXIT_FAILURE; + } + + if (madvise(mem1, MEMSIZE_MTHP, MADV_HUGEPAGE) != 0) { + perror("madvise hugepage for mem1"); + free(mem1); + return EXIT_FAILURE; + } + + if (use_small_folio) { + mem2 = aligned_alloc_mem(MEMSIZE_SMALLFOLIO, ALIGNMENT_MTHP); + if (mem2 == NULL) { + fprintf(stderr, "Failed to allocate small folios memory\n"); + free(mem1); + return EXIT_FAILURE; + } + + if (madvise(mem2, MEMSIZE_SMALLFOLIO, MADV_NOHUGEPAGE) != 0) { + perror("madvise nohugepage for mem2"); + free(mem1); + free(mem2); + return EXIT_FAILURE; + } + } + + /* warm-up phase to occupy the swapfile */ + memset(mem1, 0x11, MEMSIZE_MTHP); + madvise(mem1, MEMSIZE_MTHP, MADV_PAGEOUT); + if (use_small_folio) { + memset(mem2, 0x11, MEMSIZE_SMALLFOLIO); + madvise(mem2, MEMSIZE_SMALLFOLIO, MADV_PAGEOUT); + } + + /* iterations with newly created mTHP, swap-in, and swap-out */ + for (i = 0; i < 100; ++i) { + unsigned long initial_swpout; + unsigned long initial_swpout_fallback; + unsigned long final_swpout; + unsigned long final_swpout_fallback; + unsigned long swpout_inc; + unsigned long swpout_fallback_inc; + double fallback_percentage; + + initial_swpout = read_stat(SWPOUT_PATH); + initial_swpout_fallback = read_stat(SWPOUT_FALLBACK_PATH); + + /* + * The following setup creates a 1:1 ratio of mTHP to small folios + * since large folio swap-in isn't supported yet. Once we support + * mTHP swap-in, we'll likely need to reduce MEMSIZE_MTHP and + * increase MEMSIZE_SMALLFOLIO to maintain the ratio. + */ + random_swapin(mem1, MEMSIZE_MTHP, + aligned_swapin ? ALIGNMENT_MTHP : ALIGNMENT_SMALLFOLIO, + TOTAL_DONTNEED_MTHP); + random_madvise_dontneed(mem1, MEMSIZE_MTHP, ALIGNMENT_MTHP, + TOTAL_DONTNEED_MTHP); + + if (use_small_folio) { + random_swapin(mem2, MEMSIZE_SMALLFOLIO, + ALIGNMENT_SMALLFOLIO, + TOTAL_DONTNEED_SMALLFOLIO); + } + + if (madvise(mem1, MEMSIZE_MTHP, MADV_PAGEOUT) != 0) { + perror("madvise pageout for mem1"); + free(mem1); + if (mem2 != NULL) + free(mem2); + return EXIT_FAILURE; + } + + if (use_small_folio) { + if (madvise(mem2, MEMSIZE_SMALLFOLIO, MADV_PAGEOUT) != 0) { + perror("madvise pageout for mem2"); + free(mem1); + free(mem2); + return EXIT_FAILURE; + } + } + + final_swpout = read_stat(SWPOUT_PATH); + final_swpout_fallback = read_stat(SWPOUT_FALLBACK_PATH); + + swpout_inc = final_swpout - initial_swpout; + swpout_fallback_inc = final_swpout_fallback - initial_swpout_fallback; + + fallback_percentage = (double)swpout_fallback_inc / + (swpout_fallback_inc + swpout_inc) * 100; + + printf("Iteration %d: swpout inc: %lu, swpout fallback inc: %lu, Fallback percentage: %.2f%%\n", + i + 1, swpout_inc, swpout_fallback_inc, fallback_percentage); + } + + free(mem1); + if (mem2 != NULL) + free(mem2); + + return EXIT_SUCCESS; +} |