diff options
Diffstat (limited to 'lib')
75 files changed, 4659 insertions, 1344 deletions
diff --git a/lib/Kconfig b/lib/Kconfig index a9e56539bd11..f323b85ad11c 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -18,6 +18,23 @@ config RAID6_PQ_BENCHMARK Benchmark all available RAID6 PQ functions on init and choose the fastest one. +config PACKING + bool "Generic bitfield packing and unpacking" + default n + help + This option provides the packing() helper function, which permits + converting bitfields between a CPU-usable representation and a + memory representation that can have any combination of these quirks: + - Is little endian (bytes are reversed within a 32-bit group) + - The least-significant 32-bit word comes first (within a 64-bit + group) + - The most significant bit of a byte is at its right (bit 0 of a + register description is numerically 2^7). + Drivers may use these helpers to match the bit indices as described + in the data sheets of the peripherals they are in control of. + + When in doubt, say N. + config BITREVERSE tristate @@ -597,6 +614,10 @@ config ARCH_HAS_UACCESS_FLUSHCACHE config ARCH_HAS_UACCESS_MCSAFE bool +# Temporary. Goes away when all archs are cleaned up +config ARCH_STACKWALK + bool + config STACKDEPOT bool select STACKTRACE diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index d4df5b24d75e..d695ec1477f3 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -17,6 +17,23 @@ config PRINTK_TIME The behavior is also controlled by the kernel command line parameter printk.time=1. See Documentation/admin-guide/kernel-parameters.rst +config PRINTK_CALLER + bool "Show caller information on printks" + depends on PRINTK + help + Selecting this option causes printk() to add a caller "thread id" (if + in task context) or a caller "processor id" (if not in task context) + to every message. + + This option is intended for environments where multiple threads + concurrently call printk() for many times, for it is difficult to + interpret without knowing where these lines (or sometimes individual + line which was divided into multiple lines due to race) came from. + + Since toggling after boot makes the code racy, currently there is + no option to enable/disable at the kernel command line parameter or + sysfs interface. + config CONSOLE_LOGLEVEL_DEFAULT int "Default console loglevel (1-15)" range 1 15 @@ -179,6 +196,7 @@ config DEBUG_INFO_REDUCED config DEBUG_INFO_SPLIT bool "Produce split debuginfo in .dwo files" depends on DEBUG_INFO + depends on $(cc-option,-gsplit-dwarf) help Generate debug info into separate .dwo files. This significantly reduces the build directory size for builds with DEBUG_INFO, @@ -194,12 +212,21 @@ config DEBUG_INFO_SPLIT config DEBUG_INFO_DWARF4 bool "Generate dwarf4 debuginfo" depends on DEBUG_INFO + depends on $(cc-option,-gdwarf-4) help Generate dwarf4 debug info. This requires recent versions of gcc and gdb. It makes the debug information larger. But it significantly improves the success of resolving variables in gdb on optimized code. +config DEBUG_INFO_BTF + bool "Generate BTF typeinfo" + depends on DEBUG_INFO + help + Generate deduplicated BTF type information from DWARF debug info. + Turning this on expects presence of pahole tool, which will convert + DWARF type info into equivalent deduplicated BTF type info. + config GDB_SCRIPTS bool "Provide GDB scripts for kernel debugging" depends on DEBUG_INFO @@ -222,7 +249,6 @@ config ENABLE_MUST_CHECK config FRAME_WARN int "Warn for stack frames larger than (needs gcc 4.4)" range 0 8192 - default 3072 if KASAN_EXTRA default 2048 if GCC_PLUGIN_LATENT_ENTROPY default 1280 if (!64BIT && PARISC) default 1024 if (!64BIT && !PARISC) @@ -266,23 +292,6 @@ config UNUSED_SYMBOLS you really need it, and what the merge plan to the mainline kernel for your module is. -config PAGE_OWNER - bool "Track page owner" - depends on DEBUG_KERNEL && STACKTRACE_SUPPORT - select DEBUG_FS - select STACKTRACE - select STACKDEPOT - select PAGE_EXTENSION - help - This keeps track of what call chain is the owner of a page, may - help to find bare alloc_page(s) leaks. Even if you include this - feature on your build, it is disabled in default. You should pass - "page_owner=on" to boot parameter in order to enable it. Eats - a fair amount of memory if enabled. See tools/vm/page_owner_sort.c - for user-space helper. - - If unsure, say N. - config DEBUG_FS bool "Debug Filesystem" help @@ -752,9 +761,9 @@ endmenu # "Memory Debugging" config ARCH_HAS_KCOV bool help - KCOV does not have any arch-specific code, but currently it is enabled - only for x86_64. KCOV requires testing on other archs, and most likely - disabling of instrumentation for some early boot code. + An architecture should select this when it can successfully + build and run with CONFIG_KCOV. This typically requires + disabling instrumentation for some early boot code. config CC_HAS_SANCOV_TRACE_PC def_bool $(cc-option,-fsanitize-coverage=trace-pc) @@ -1655,42 +1664,6 @@ config PROVIDE_OHCI1394_DMA_INIT See Documentation/debugging-via-ohci1394.txt for more information. -config DMA_API_DEBUG - bool "Enable debugging of DMA-API usage" - select NEED_DMA_MAP_STATE - help - Enable this option to debug the use of the DMA API by device drivers. - With this option you will be able to detect common bugs in device - drivers like double-freeing of DMA mappings or freeing mappings that - were never allocated. - - This also attempts to catch cases where a page owned by DMA is - accessed by the cpu in a way that could cause data corruption. For - example, this enables cow_user_page() to check that the source page is - not undergoing DMA. - - This option causes a performance degradation. Use only if you want to - debug device drivers and dma interactions. - - If unsure, say N. - -config DMA_API_DEBUG_SG - bool "Debug DMA scatter-gather usage" - default y - depends on DMA_API_DEBUG - help - Perform extra checking that callers of dma_map_sg() have respected the - appropriate segment length/boundary limits for the given device when - preparing DMA scatterlists. - - This is particularly likely to have been overlooked in cases where the - dma_map_sg() API is used for general bulk mapping of pages rather than - preparing literal scatter-gather descriptors, where there is a risk of - unexpected behaviour from DMA API implementations if the scatterlist - is technically out-of-spec. - - If unsure, say N. - menuconfig RUNTIME_TESTING_MENU bool "Runtime Testing" def_bool y @@ -1700,7 +1673,6 @@ if RUNTIME_TESTING_MENU config LKDTM tristate "Linux Kernel Dump Test Tool Module" depends on DEBUG_FS - depends on BLOCK help This module enables testing of the different dumping mechanisms by inducing system failures at predefined crash points. @@ -1805,6 +1777,9 @@ config TEST_HEXDUMP config TEST_STRING_HELPERS tristate "Test functions located in the string_helpers module at runtime" +config TEST_STRSCPY + tristate "Test strscpy*() family of functions at runtime" + config TEST_KSTRTOX tristate "Test kstrto*() family of functions at runtime" @@ -1876,6 +1851,19 @@ config TEST_LKM If unsure, say N. +config TEST_VMALLOC + tristate "Test module for stress/performance analysis of vmalloc allocator" + default n + depends on MMU + depends on m + help + This builds the "test_vmalloc" module that should be used for + stress and performance analysis. So, any new change for vmalloc + subsystem can be evaluated from performance and stability point + of view. + + If unsure, say N. + config TEST_USER_COPY tristate "Test user/kernel boundary protections" depends on m @@ -1950,8 +1938,8 @@ config TEST_STATIC_KEYS config TEST_KMOD tristate "kmod stress tester" depends on m - depends on BLOCK && (64BIT || LBDAF) # for XFS, BTRFS depends on NETDEVICES && NET_CORE && INET # for TUN + depends on BLOCK select TEST_LKM select XFS_FS select TUN @@ -1991,6 +1979,28 @@ config TEST_MEMCAT_P If unsure, say N. +config TEST_LIVEPATCH + tristate "Test livepatching" + default n + depends on DYNAMIC_DEBUG + depends on LIVEPATCH + depends on m + help + Test kernel livepatching features for correctness. The tests will + load test modules that will be livepatched in various scenarios. + + To run all the livepatching tests: + + make -C tools/testing/selftests TARGETS=livepatch run_tests + + Alternatively, individual tests may be invoked: + + tools/testing/selftests/livepatch/test-callbacks.sh + tools/testing/selftests/livepatch/test-livepatch.sh + tools/testing/selftests/livepatch/test-shadow-vars.sh + + If unsure, say N. + config TEST_OBJAGG tristate "Perform selftest on object aggreration manager" default n @@ -1999,6 +2009,15 @@ config TEST_OBJAGG Enable this option to test object aggregation manager on boot (or module load). + +config TEST_STACKINIT + tristate "Test level of stack variable initialization" + help + Test if the kernel is zero-initializing stack variables and + padding. Coverage is controlled by compiler flags, + CONFIG_GCC_PLUGIN_STRUCTLEAK, CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF, + or CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF_ALL. + If unsure, say N. endif # RUNTIME_TESTING_MENU diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index 9737059ec58b..9950b660e62d 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -78,16 +78,6 @@ config KASAN_SW_TAGS endchoice -config KASAN_EXTRA - bool "KASAN: extra checks" - depends on KASAN_GENERIC && DEBUG_KERNEL && !COMPILE_TEST - help - This enables further checks in generic KASAN, for now it only - includes the address-use-after-scope check that can lead to - excessive kernel stack usage, frame size warnings and longer - compile time. - See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 - choice prompt "Instrumentation type" depends on KASAN diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan index 98fa559ebd80..a2ae4a8e4fa6 100644 --- a/lib/Kconfig.ubsan +++ b/lib/Kconfig.ubsan @@ -27,15 +27,19 @@ config UBSAN_SANITIZE_ALL Enabling this option will get kernel image size increased significantly. -config UBSAN_ALIGNMENT - bool "Enable checking of pointers alignment" +config UBSAN_NO_ALIGNMENT + bool "Disable checking of pointers alignment" depends on UBSAN - default y if !HAVE_EFFICIENT_UNALIGNED_ACCESS + default y if HAVE_EFFICIENT_UNALIGNED_ACCESS help - This option enables detection of unaligned memory accesses. - Enabling this option on architectures that support unaligned + This option disables the check of unaligned memory accesses. + This option should be used when building allmodconfig. + Disabling this option on architectures that support unaligned accesses may produce a lot of false positives. +config UBSAN_ALIGNMENT + def_bool !UBSAN_NO_ALIGNMENT + config TEST_UBSAN tristate "Module for testing for undefined behavior detection" depends on m && UBSAN diff --git a/lib/Makefile b/lib/Makefile index e1b59da71418..83d7df2661ff 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -17,6 +17,17 @@ KCOV_INSTRUMENT_list_debug.o := n KCOV_INSTRUMENT_debugobjects.o := n KCOV_INSTRUMENT_dynamic_debug.o := n +# Early boot use of cmdline, don't instrument it +ifdef CONFIG_AMD_MEM_ENCRYPT +KASAN_SANITIZE_string.o := n + +ifdef CONFIG_FUNCTION_TRACER +CFLAGS_REMOVE_string.o = -pg +endif + +CFLAGS_string.o := $(call cc-option, -fno-stack-protector) +endif + lib-y := ctype.o string.o vsprintf.o cmdline.o \ rbtree.o radix-tree.o timerqueue.o xarray.o \ idr.o int_sqrt.o extable.o \ @@ -35,10 +46,11 @@ obj-y += lockref.o obj-y += bcd.o div64.o sort.o parser.o debug_locks.o random32.o \ bust_spinlocks.o kasprintf.o bitmap.o scatterlist.o \ - gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \ + gcd.o lcm.o list_sort.o uuid.o iov_iter.o clz_ctz.o \ bsearch.o find_bit.o llist.o memweight.o kfifo.o \ percpu-refcount.o rhashtable.o reciprocal_div.o \ - once.o refcount.o usercopy.o errseq.o bucket_locks.o + once.o refcount.o usercopy.o errseq.o bucket_locks.o \ + generic-radix-tree.o obj-$(CONFIG_STRING_SELFTEST) += test_string.o obj-y += string_helpers.o obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o @@ -60,6 +72,7 @@ UBSAN_SANITIZE_test_ubsan.o := y obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o obj-$(CONFIG_TEST_LIST_SORT) += test_list_sort.o obj-$(CONFIG_TEST_LKM) += test_module.o +obj-$(CONFIG_TEST_VMALLOC) += test_vmalloc.o obj-$(CONFIG_TEST_OVERFLOW) += test_overflow.o obj-$(CONFIG_TEST_RHASHTABLE) += test_rhashtable.o obj-$(CONFIG_TEST_SORT) += test_sort.o @@ -68,6 +81,7 @@ obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_keys.o obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o obj-$(CONFIG_TEST_PRINTF) += test_printf.o obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o +obj-$(CONFIG_TEST_STRSCPY) += test_strscpy.o obj-$(CONFIG_TEST_BITFIELD) += test_bitfield.o obj-$(CONFIG_TEST_UUID) += test_uuid.o obj-$(CONFIG_TEST_XARRAY) += test_xarray.o @@ -76,6 +90,9 @@ obj-$(CONFIG_TEST_KMOD) += test_kmod.o obj-$(CONFIG_TEST_DEBUG_VIRTUAL) += test_debug_virtual.o obj-$(CONFIG_TEST_MEMCAT_P) += test_memcat_p.o obj-$(CONFIG_TEST_OBJAGG) += test_objagg.o +obj-$(CONFIG_TEST_STACKINIT) += test_stackinit.o + +obj-$(CONFIG_TEST_LIVEPATCH) += livepatch/ ifeq ($(CONFIG_DEBUG_KOBJECT),y) CFLAGS_kobject.o += -DDEBUG @@ -103,6 +120,7 @@ obj-$(CONFIG_DEBUG_LIST) += list_debug.o obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o obj-$(CONFIG_BITREVERSE) += bitrev.o +obj-$(CONFIG_PACKING) += packing.o obj-$(CONFIG_RATIONAL) += rational.o obj-$(CONFIG_CRC_CCITT) += crc-ccitt.o obj-$(CONFIG_CRC16) += crc16.o @@ -208,7 +226,7 @@ KCOV_INSTRUMENT_stackdepot.o := n libfdt_files = fdt.o fdt_ro.o fdt_wip.o fdt_rw.o fdt_sw.o fdt_strerror.o \ fdt_empty_tree.o $(foreach file, $(libfdt_files), \ - $(eval CFLAGS_$(file) = -I$(src)/../scripts/dtc/libfdt)) + $(eval CFLAGS_$(file) = -I $(srctree)/scripts/dtc/libfdt)) lib-$(CONFIG_LIBFDT) += $(libfdt_files) obj-$(CONFIG_RBTREE_TEST) += rbtree_test.o @@ -263,6 +281,7 @@ obj-$(CONFIG_UCS2_STRING) += ucs2_string.o obj-$(CONFIG_UBSAN) += ubsan.o UBSAN_SANITIZE_ubsan.o := n +CFLAGS_ubsan.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector) obj-$(CONFIG_SBITMAP) += sbitmap.o diff --git a/lib/asn1_decoder.c b/lib/asn1_decoder.c index dc14beae2c9a..8f3d207d2b00 100644 --- a/lib/asn1_decoder.c +++ b/lib/asn1_decoder.c @@ -385,6 +385,8 @@ next_op: case ASN1_OP_END_SET_ACT: if (unlikely(!(flags & FLAG_MATCHED))) goto tag_mismatch; + /* fall through */ + case ASN1_OP_END_SEQ: case ASN1_OP_END_SET_OF: case ASN1_OP_END_SEQ_OF: @@ -450,6 +452,8 @@ next_op: pc += asn1_op_lengths[op]; goto next_op; } + /* fall through */ + case ASN1_OP_ACT: ret = actions[machine[pc + 1]](context, hdr, tag, data + tdp, len); if (ret < 0) diff --git a/lib/assoc_array.c b/lib/assoc_array.c index 59875eb278ea..edc3c14af41d 100644 --- a/lib/assoc_array.c +++ b/lib/assoc_array.c @@ -1117,6 +1117,7 @@ struct assoc_array_edit *assoc_array_delete(struct assoc_array *array, index_key)) goto found_leaf; } + /* fall through */ case assoc_array_walk_tree_empty: case assoc_array_walk_found_wrong_shortcut: default: diff --git a/lib/bsearch.c b/lib/bsearch.c index 18b445b010c3..82512fe7b33c 100644 --- a/lib/bsearch.c +++ b/lib/bsearch.c @@ -11,6 +11,7 @@ #include <linux/export.h> #include <linux/bsearch.h> +#include <linux/kprobes.h> /* * bsearch - binary search an array of elements @@ -53,3 +54,4 @@ void *bsearch(const void *key, const void *base, size_t num, size_t size, return NULL; } EXPORT_SYMBOL(bsearch); +NOKPROBE_SYMBOL(bsearch); diff --git a/lib/cmdline.c b/lib/cmdline.c index 171c19b6888e..dc59d6216318 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c @@ -135,18 +135,23 @@ unsigned long long memparse(const char *ptr, char **retptr) case 'E': case 'e': ret <<= 10; + /* fall through */ case 'P': case 'p': ret <<= 10; + /* fall through */ case 'T': case 't': ret <<= 10; + /* fall through */ case 'G': case 'g': ret <<= 10; + /* fall through */ case 'M': case 'm': ret <<= 10; + /* fall through */ case 'K': case 'k': ret <<= 10; diff --git a/lib/cpumask.c b/lib/cpumask.c index 8d666ab84b5c..0cb672eb107c 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -5,6 +5,7 @@ #include <linux/cpumask.h> #include <linux/export.h> #include <linux/memblock.h> +#include <linux/numa.h> /** * cpumask_next - get the next cpu in a cpumask @@ -164,6 +165,9 @@ EXPORT_SYMBOL(zalloc_cpumask_var); void __init alloc_bootmem_cpumask_var(cpumask_var_t *mask) { *mask = memblock_alloc(cpumask_size(), SMP_CACHE_BYTES); + if (!*mask) + panic("%s: Failed to allocate %u bytes\n", __func__, + cpumask_size()); } /** @@ -206,7 +210,7 @@ unsigned int cpumask_local_spread(unsigned int i, int node) /* Wrap: we always want a cpu. */ i %= num_online_cpus(); - if (node == -1) { + if (node == NUMA_NO_NODE) { for_each_cpu(cpu, cpu_online_mask) if (i-- == 0) return cpu; diff --git a/lib/crc-t10dif.c b/lib/crc-t10dif.c index 4d0d47c1ffbd..e89ebfdbb0fc 100644 --- a/lib/crc-t10dif.c +++ b/lib/crc-t10dif.c @@ -69,7 +69,6 @@ __u16 crc_t10dif_update(__u16 crc, const unsigned char *buffer, size_t len) rcu_read_lock(); desc.shash.tfm = rcu_dereference(crct10dif_tfm); - desc.shash.flags = 0; *(__u16 *)desc.ctx = crc; err = crypto_shash_update(&desc.shash, buffer, len); diff --git a/lib/devres.c b/lib/devres.c index faccf1a037d0..69bed2f38306 100644 --- a/lib/devres.c +++ b/lib/devres.c @@ -134,7 +134,6 @@ EXPORT_SYMBOL(devm_iounmap); void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res) { resource_size_t size; - const char *name; void __iomem *dest_ptr; BUG_ON(!dev); @@ -145,9 +144,8 @@ void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res) } size = resource_size(res); - name = res->name ?: dev_name(dev); - if (!devm_request_mem_region(dev, res->start, size, name)) { + if (!devm_request_mem_region(dev, res->start, size, dev_name(dev))) { dev_err(dev, "can't request region for resource %pR\n", res); return IOMEM_ERR_PTR(-EBUSY); } diff --git a/lib/digsig.c b/lib/digsig.c index 6ba6fcd92dd1..3b0a579bdcdf 100644 --- a/lib/digsig.c +++ b/lib/digsig.c @@ -240,7 +240,6 @@ int digsig_verify(struct key *keyring, const char *sig, int siglen, goto err; desc->tfm = shash; - desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; crypto_shash_init(desc); crypto_shash_update(desc, data, datalen); diff --git a/lib/div64.c b/lib/div64.c index 01c8602bb6ff..ee146bb4c558 100644 --- a/lib/div64.c +++ b/lib/div64.c @@ -109,7 +109,7 @@ u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder) quot = div_u64_rem(dividend, divisor, &rem32); *remainder = rem32; } else { - int n = 1 + fls(high); + int n = fls(high); quot = div_u64(dividend >> n, divisor >> n); if (quot != 0) @@ -147,7 +147,7 @@ u64 div64_u64(u64 dividend, u64 divisor) if (high == 0) { quot = div_u64(dividend, divisor); } else { - int n = 1 + fls(high); + int n = fls(high); quot = div_u64(dividend >> n, divisor >> n); if (quot != 0) diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index dbf2b457e47e..8a16c2d498e9 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -37,6 +37,8 @@ #include <linux/device.h> #include <linux/netdevice.h> +#include <rdma/ib_verbs.h> + extern struct _ddebug __start___verbose[]; extern struct _ddebug __stop___verbose[]; @@ -636,6 +638,41 @@ EXPORT_SYMBOL(__dynamic_netdev_dbg); #endif +#if IS_ENABLED(CONFIG_INFINIBAND) + +void __dynamic_ibdev_dbg(struct _ddebug *descriptor, + const struct ib_device *ibdev, const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + + va_start(args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; + + if (ibdev && ibdev->dev.parent) { + char buf[PREFIX_SIZE]; + + dev_printk_emit(LOGLEVEL_DEBUG, ibdev->dev.parent, + "%s%s %s %s: %pV", + dynamic_emit_prefix(descriptor, buf), + dev_driver_string(ibdev->dev.parent), + dev_name(ibdev->dev.parent), + dev_name(&ibdev->dev), + &vaf); + } else if (ibdev) { + printk(KERN_DEBUG "%s: %pV", dev_name(&ibdev->dev), &vaf); + } else { + printk(KERN_DEBUG "(NULL ib_device): %pV", &vaf); + } + + va_end(args); +} +EXPORT_SYMBOL(__dynamic_ibdev_dbg); + +#endif + #define DDEBUG_STRING_SIZE 1024 static __initdata char ddebug_setup_string[DDEBUG_STRING_SIZE]; @@ -847,17 +884,19 @@ int ddebug_add_module(struct _ddebug *tab, unsigned int n, const char *name) { struct ddebug_table *dt; - const char *new_name; dt = kzalloc(sizeof(*dt), GFP_KERNEL); - if (dt == NULL) - return -ENOMEM; - new_name = kstrdup_const(name, GFP_KERNEL); - if (new_name == NULL) { - kfree(dt); + if (dt == NULL) { + pr_err("error adding module: %s\n", name); return -ENOMEM; } - dt->mod_name = new_name; + /* + * For built-in modules, name lives in .rodata and is + * immortal. For loaded modules, name points at the name[] + * member of struct module, which lives at least as long as + * this struct ddebug_table. + */ + dt->mod_name = name; dt->num_ddebugs = n; dt->ddebugs = tab; @@ -868,7 +907,6 @@ int ddebug_add_module(struct _ddebug *tab, unsigned int n, vpr_info("%u debug prints in module %s\n", n, dt->mod_name); return 0; } -EXPORT_SYMBOL_GPL(ddebug_add_module); /* helper for ddebug_dyndbg_(boot|module)_param_cb */ static int ddebug_dyndbg_param_cb(char *param, char *val, @@ -913,7 +951,6 @@ int ddebug_dyndbg_module_param_cb(char *param, char *val, const char *module) static void ddebug_table_free(struct ddebug_table *dt) { list_del_init(&dt->link); - kfree_const(dt->mod_name); kfree(dt); } @@ -930,15 +967,15 @@ int ddebug_remove_module(const char *mod_name) mutex_lock(&ddebug_lock); list_for_each_entry_safe(dt, nextdt, &ddebug_tables, link) { - if (!strcmp(dt->mod_name, mod_name)) { + if (dt->mod_name == mod_name) { ddebug_table_free(dt); ret = 0; + break; } } mutex_unlock(&ddebug_lock); return ret; } -EXPORT_SYMBOL_GPL(ddebug_remove_module); static void ddebug_remove_all_tables(void) { diff --git a/lib/error-inject.c b/lib/error-inject.c index c0d4600f4896..aa63751c916f 100644 --- a/lib/error-inject.c +++ b/lib/error-inject.c @@ -189,7 +189,7 @@ static int ei_seq_show(struct seq_file *m, void *v) { struct ei_entry *ent = list_entry(v, struct ei_entry, list); - seq_printf(m, "%pf\t%s\n", (void *)ent->start_addr, + seq_printf(m, "%ps\t%s\n", (void *)ent->start_addr, error_type_string(ent->etype)); return 0; } diff --git a/lib/fault-inject.c b/lib/fault-inject.c index cf7b129b0b2b..e26aa4f65eb9 100644 --- a/lib/fault-inject.c +++ b/lib/fault-inject.c @@ -65,22 +65,16 @@ static bool fail_task(struct fault_attr *attr, struct task_struct *task) static bool fail_stacktrace(struct fault_attr *attr) { - struct stack_trace trace; int depth = attr->stacktrace_depth; unsigned long entries[MAX_STACK_TRACE_DEPTH]; - int n; + int n, nr_entries; bool found = (attr->require_start == 0 && attr->require_end == ULONG_MAX); if (depth == 0) return found; - trace.nr_entries = 0; - trace.entries = entries; - trace.max_entries = depth; - trace.skip = 1; - - save_stack_trace(&trace); - for (n = 0; n < trace.nr_entries; n++) { + nr_entries = stack_trace_save(entries, depth, 1); + for (n = 0; n < nr_entries; n++) { if (attr->reject_start <= entries[n] && entries[n] < attr->reject_end) return false; diff --git a/lib/flex_array.c b/lib/flex_array.c deleted file mode 100644 index 2eed22fa507c..000000000000 --- a/lib/flex_array.c +++ /dev/null @@ -1,398 +0,0 @@ -/* - * Flexible array managed in PAGE_SIZE parts - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * Copyright IBM Corporation, 2009 - * - * Author: Dave Hansen <dave@linux.vnet.ibm.com> - */ - -#include <linux/flex_array.h> -#include <linux/slab.h> -#include <linux/stddef.h> -#include <linux/export.h> -#include <linux/reciprocal_div.h> - -struct flex_array_part { - char elements[FLEX_ARRAY_PART_SIZE]; -}; - -/* - * If a user requests an allocation which is small - * enough, we may simply use the space in the - * flex_array->parts[] array to store the user - * data. - */ -static inline int elements_fit_in_base(struct flex_array *fa) -{ - int data_size = fa->element_size * fa->total_nr_elements; - if (data_size <= FLEX_ARRAY_BASE_BYTES_LEFT) - return 1; - return 0; -} - -/** - * flex_array_alloc - allocate a new flexible array - * @element_size: the size of individual elements in the array - * @total: total number of elements that this should hold - * @flags: page allocation flags to use for base array - * - * Note: all locking must be provided by the caller. - * - * @total is used to size internal structures. If the user ever - * accesses any array indexes >=@total, it will produce errors. - * - * The maximum number of elements is defined as: the number of - * elements that can be stored in a page times the number of - * page pointers that we can fit in the base structure or (using - * integer math): - * - * (PAGE_SIZE/element_size) * (PAGE_SIZE-8)/sizeof(void *) - * - * Here's a table showing example capacities. Note that the maximum - * index that the get/put() functions is just nr_objects-1. This - * basically means that you get 4MB of storage on 32-bit and 2MB on - * 64-bit. - * - * - * Element size | Objects | Objects | - * PAGE_SIZE=4k | 32-bit | 64-bit | - * ---------------------------------| - * 1 bytes | 4177920 | 2088960 | - * 2 bytes | 2088960 | 1044480 | - * 3 bytes | 1392300 | 696150 | - * 4 bytes | 1044480 | 522240 | - * 32 bytes | 130560 | 65408 | - * 33 bytes | 126480 | 63240 | - * 2048 bytes | 2040 | 1020 | - * 2049 bytes | 1020 | 510 | - * void * | 1044480 | 261120 | - * - * Since 64-bit pointers are twice the size, we lose half the - * capacity in the base structure. Also note that no effort is made - * to efficiently pack objects across page boundaries. - */ -struct flex_array *flex_array_alloc(int element_size, unsigned int total, - gfp_t flags) -{ - struct flex_array *ret; - int elems_per_part = 0; - int max_size = 0; - struct reciprocal_value reciprocal_elems = { 0 }; - - if (element_size) { - elems_per_part = FLEX_ARRAY_ELEMENTS_PER_PART(element_size); - reciprocal_elems = reciprocal_value(elems_per_part); - max_size = FLEX_ARRAY_NR_BASE_PTRS * elems_per_part; - } - - /* max_size will end up 0 if element_size > PAGE_SIZE */ - if (total > max_size) - return NULL; - ret = kzalloc(sizeof(struct flex_array), flags); - if (!ret) - return NULL; - ret->element_size = element_size; - ret->total_nr_elements = total; - ret->elems_per_part = elems_per_part; - ret->reciprocal_elems = reciprocal_elems; - if (elements_fit_in_base(ret) && !(flags & __GFP_ZERO)) - memset(&ret->parts[0], FLEX_ARRAY_FREE, - FLEX_ARRAY_BASE_BYTES_LEFT); - return ret; -} -EXPORT_SYMBOL(flex_array_alloc); - -static int fa_element_to_part_nr(struct flex_array *fa, - unsigned int element_nr) -{ - /* - * if element_size == 0 we don't get here, so we never touch - * the zeroed fa->reciprocal_elems, which would yield invalid - * results - */ - return reciprocal_divide(element_nr, fa->reciprocal_elems); -} - -/** - * flex_array_free_parts - just free the second-level pages - * @fa: the flex array from which to free parts - * - * This is to be used in cases where the base 'struct flex_array' - * has been statically allocated and should not be free. - */ -void flex_array_free_parts(struct flex_array *fa) -{ - int part_nr; - - if (elements_fit_in_base(fa)) - return; - for (part_nr = 0; part_nr < FLEX_ARRAY_NR_BASE_PTRS; part_nr++) - kfree(fa->parts[part_nr]); -} -EXPORT_SYMBOL(flex_array_free_parts); - -void flex_array_free(struct flex_array *fa) -{ - flex_array_free_parts(fa); - kfree(fa); -} -EXPORT_SYMBOL(flex_array_free); - -static unsigned int index_inside_part(struct flex_array *fa, - unsigned int element_nr, - unsigned int part_nr) -{ - unsigned int part_offset; - - part_offset = element_nr - part_nr * fa->elems_per_part; - return part_offset * fa->element_size; -} - -static struct flex_array_part * -__fa_get_part(struct flex_array *fa, int part_nr, gfp_t flags) -{ - struct flex_array_part *part = fa->parts[part_nr]; - if (!part) { - part = kmalloc(sizeof(struct flex_array_part), flags); - if (!part) - return NULL; - if (!(flags & __GFP_ZERO)) - memset(part, FLEX_ARRAY_FREE, - sizeof(struct flex_array_part)); - fa->parts[part_nr] = part; - } - return part; -} - -/** - * flex_array_put - copy data into the array at @element_nr - * @fa: the flex array to copy data into - * @element_nr: index of the position in which to insert - * the new element. - * @src: address of data to copy into the array - * @flags: page allocation flags to use for array expansion - * - * - * Note that this *copies* the contents of @src into - * the array. If you are trying to store an array of - * pointers, make sure to pass in &ptr instead of ptr. - * You may instead wish to use the flex_array_put_ptr() - * helper function. - * - * Locking must be provided by the caller. - */ -int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src, - gfp_t flags) -{ - int part_nr = 0; - struct flex_array_part *part; - void *dst; - - if (element_nr >= fa->total_nr_elements) - return -ENOSPC; - if (!fa->element_size) - return 0; - if (elements_fit_in_base(fa)) - part = (struct flex_array_part *)&fa->parts[0]; - else { - part_nr = fa_element_to_part_nr(fa, element_nr); - part = __fa_get_part(fa, part_nr, flags); - if (!part) - return -ENOMEM; - } - dst = &part->elements[index_inside_part(fa, element_nr, part_nr)]; - memcpy(dst, src, fa->element_size); - return 0; -} -EXPORT_SYMBOL(flex_array_put); - -/** - * flex_array_clear - clear element in array at @element_nr - * @fa: the flex array of the element. - * @element_nr: index of the position to clear. - * - * Locking must be provided by the caller. - */ -int flex_array_clear(struct flex_array *fa, unsigned int element_nr) -{ - int part_nr = 0; - struct flex_array_part *part; - void *dst; - - if (element_nr >= fa->total_nr_elements) - return -ENOSPC; - if (!fa->element_size) - return 0; - if (elements_fit_in_base(fa)) - part = (struct flex_array_part *)&fa->parts[0]; - else { - part_nr = fa_element_to_part_nr(fa, element_nr); - part = fa->parts[part_nr]; - if (!part) - return -EINVAL; - } - dst = &part->elements[index_inside_part(fa, element_nr, part_nr)]; - memset(dst, FLEX_ARRAY_FREE, fa->element_size); - return 0; -} -EXPORT_SYMBOL(flex_array_clear); - -/** - * flex_array_prealloc - guarantee that array space exists - * @fa: the flex array for which to preallocate parts - * @start: index of first array element for which space is allocated - * @nr_elements: number of elements for which space is allocated - * @flags: page allocation flags - * - * This will guarantee that no future calls to flex_array_put() - * will allocate memory. It can be used if you are expecting to - * be holding a lock or in some atomic context while writing - * data into the array. - * - * Locking must be provided by the caller. - */ -int flex_array_prealloc(struct flex_array *fa, unsigned int start, - unsigned int nr_elements, gfp_t flags) -{ - int start_part; - int end_part; - int part_nr; - unsigned int end; - struct flex_array_part *part; - - if (!start && !nr_elements) - return 0; - if (start >= fa->total_nr_elements) - return -ENOSPC; - if (!nr_elements) - return 0; - - end = start + nr_elements - 1; - - if (end >= fa->total_nr_elements) - return -ENOSPC; - if (!fa->element_size) - return 0; - if (elements_fit_in_base(fa)) - return 0; - start_part = fa_element_to_part_nr(fa, start); - end_part = fa_element_to_part_nr(fa, end); - for (part_nr = start_part; part_nr <= end_part; part_nr++) { - part = __fa_get_part(fa, part_nr, flags); - if (!part) - return -ENOMEM; - } - return 0; -} -EXPORT_SYMBOL(flex_array_prealloc); - -/** - * flex_array_get - pull data back out of the array - * @fa: the flex array from which to extract data - * @element_nr: index of the element to fetch from the array - * - * Returns a pointer to the data at index @element_nr. Note - * that this is a copy of the data that was passed in. If you - * are using this to store pointers, you'll get back &ptr. You - * may instead wish to use the flex_array_get_ptr helper. - * - * Locking must be provided by the caller. - */ -void *flex_array_get(struct flex_array *fa, unsigned int element_nr) -{ - int part_nr = 0; - struct flex_array_part *part; - - if (!fa->element_size) - return NULL; - if (element_nr >= fa->total_nr_elements) - return NULL; - if (elements_fit_in_base(fa)) - part = (struct flex_array_part *)&fa->parts[0]; - else { - part_nr = fa_element_to_part_nr(fa, element_nr); - part = fa->parts[part_nr]; - if (!part) - return NULL; - } - return &part->elements[index_inside_part(fa, element_nr, part_nr)]; -} -EXPORT_SYMBOL(flex_array_get); - -/** - * flex_array_get_ptr - pull a ptr back out of the array - * @fa: the flex array from which to extract data - * @element_nr: index of the element to fetch from the array - * - * Returns the pointer placed in the flex array at element_nr using - * flex_array_put_ptr(). This function should not be called if the - * element in question was not set using the _put_ptr() helper. - */ -void *flex_array_get_ptr(struct flex_array *fa, unsigned int element_nr) -{ - void **tmp; - - tmp = flex_array_get(fa, element_nr); - if (!tmp) - return NULL; - - return *tmp; -} -EXPORT_SYMBOL(flex_array_get_ptr); - -static int part_is_free(struct flex_array_part *part) -{ - int i; - - for (i = 0; i < sizeof(struct flex_array_part); i++) - if (part->elements[i] != FLEX_ARRAY_FREE) - return 0; - return 1; -} - -/** - * flex_array_shrink - free unused second-level pages - * @fa: the flex array to shrink - * - * Frees all second-level pages that consist solely of unused - * elements. Returns the number of pages freed. - * - * Locking must be provided by the caller. - */ -int flex_array_shrink(struct flex_array *fa) -{ - struct flex_array_part *part; - int part_nr; - int ret = 0; - - if (!fa->total_nr_elements || !fa->element_size) - return 0; - if (elements_fit_in_base(fa)) - return ret; - for (part_nr = 0; part_nr < FLEX_ARRAY_NR_BASE_PTRS; part_nr++) { - part = fa->parts[part_nr]; - if (!part) - continue; - if (part_is_free(part)) { - fa->parts[part_nr] = NULL; - kfree(part); - ret++; - } - } - return ret; -} -EXPORT_SYMBOL(flex_array_shrink); diff --git a/lib/generic-radix-tree.c b/lib/generic-radix-tree.c new file mode 100644 index 000000000000..a7bafc413730 --- /dev/null +++ b/lib/generic-radix-tree.c @@ -0,0 +1,217 @@ + +#include <linux/export.h> +#include <linux/generic-radix-tree.h> +#include <linux/gfp.h> + +#define GENRADIX_ARY (PAGE_SIZE / sizeof(struct genradix_node *)) +#define GENRADIX_ARY_SHIFT ilog2(GENRADIX_ARY) + +struct genradix_node { + union { + /* Interior node: */ + struct genradix_node *children[GENRADIX_ARY]; + + /* Leaf: */ + u8 data[PAGE_SIZE]; + }; +}; + +static inline int genradix_depth_shift(unsigned depth) +{ + return PAGE_SHIFT + GENRADIX_ARY_SHIFT * depth; +} + +/* + * Returns size (of data, in bytes) that a tree of a given depth holds: + */ +static inline size_t genradix_depth_size(unsigned depth) +{ + return 1UL << genradix_depth_shift(depth); +} + +/* depth that's needed for a genradix that can address up to ULONG_MAX: */ +#define GENRADIX_MAX_DEPTH \ + DIV_ROUND_UP(BITS_PER_LONG - PAGE_SHIFT, GENRADIX_ARY_SHIFT) + +#define GENRADIX_DEPTH_MASK \ + ((unsigned long) (roundup_pow_of_two(GENRADIX_MAX_DEPTH + 1) - 1)) + +unsigned genradix_root_to_depth(struct genradix_root *r) +{ + return (unsigned long) r & GENRADIX_DEPTH_MASK; +} + +struct genradix_node *genradix_root_to_node(struct genradix_root *r) +{ + return (void *) ((unsigned long) r & ~GENRADIX_DEPTH_MASK); +} + +/* + * Returns pointer to the specified byte @offset within @radix, or NULL if not + * allocated + */ +void *__genradix_ptr(struct __genradix *radix, size_t offset) +{ + struct genradix_root *r = READ_ONCE(radix->root); + struct genradix_node *n = genradix_root_to_node(r); + unsigned level = genradix_root_to_depth(r); + + if (ilog2(offset) >= genradix_depth_shift(level)) + return NULL; + + while (1) { + if (!n) + return NULL; + if (!level) + break; + + level--; + + n = n->children[offset >> genradix_depth_shift(level)]; + offset &= genradix_depth_size(level) - 1; + } + + return &n->data[offset]; +} +EXPORT_SYMBOL(__genradix_ptr); + +/* + * Returns pointer to the specified byte @offset within @radix, allocating it if + * necessary - newly allocated slots are always zeroed out: + */ +void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset, + gfp_t gfp_mask) +{ + struct genradix_root *v = READ_ONCE(radix->root); + struct genradix_node *n, *new_node = NULL; + unsigned level; + + /* Increase tree depth if necessary: */ + while (1) { + struct genradix_root *r = v, *new_root; + + n = genradix_root_to_node(r); + level = genradix_root_to_depth(r); + + if (n && ilog2(offset) < genradix_depth_shift(level)) + break; + + if (!new_node) { + new_node = (void *) + __get_free_page(gfp_mask|__GFP_ZERO); + if (!new_node) + return NULL; + } + + new_node->children[0] = n; + new_root = ((struct genradix_root *) + ((unsigned long) new_node | (n ? level + 1 : 0))); + + if ((v = cmpxchg_release(&radix->root, r, new_root)) == r) { + v = new_root; + new_node = NULL; + } + } + + while (level--) { + struct genradix_node **p = + &n->children[offset >> genradix_depth_shift(level)]; + offset &= genradix_depth_size(level) - 1; + + n = READ_ONCE(*p); + if (!n) { + if (!new_node) { + new_node = (void *) + __get_free_page(gfp_mask|__GFP_ZERO); + if (!new_node) + return NULL; + } + + if (!(n = cmpxchg_release(p, NULL, new_node))) + swap(n, new_node); + } + } + + if (new_node) + free_page((unsigned long) new_node); + + return &n->data[offset]; +} +EXPORT_SYMBOL(__genradix_ptr_alloc); + +void *__genradix_iter_peek(struct genradix_iter *iter, + struct __genradix *radix, + size_t objs_per_page) +{ + struct genradix_root *r; + struct genradix_node *n; + unsigned level, i; +restart: + r = READ_ONCE(radix->root); + if (!r) + return NULL; + + n = genradix_root_to_node(r); + level = genradix_root_to_depth(r); + + if (ilog2(iter->offset) >= genradix_depth_shift(level)) + return NULL; + + while (level) { + level--; + + i = (iter->offset >> genradix_depth_shift(level)) & + (GENRADIX_ARY - 1); + + while (!n->children[i]) { + i++; + iter->offset = round_down(iter->offset + + genradix_depth_size(level), + genradix_depth_size(level)); + iter->pos = (iter->offset >> PAGE_SHIFT) * + objs_per_page; + if (i == GENRADIX_ARY) + goto restart; + } + + n = n->children[i]; + } + + return &n->data[iter->offset & (PAGE_SIZE - 1)]; +} +EXPORT_SYMBOL(__genradix_iter_peek); + +static void genradix_free_recurse(struct genradix_node *n, unsigned level) +{ + if (level) { + unsigned i; + + for (i = 0; i < GENRADIX_ARY; i++) + if (n->children[i]) + genradix_free_recurse(n->children[i], level - 1); + } + + free_page((unsigned long) n); +} + +int __genradix_prealloc(struct __genradix *radix, size_t size, + gfp_t gfp_mask) +{ + size_t offset; + + for (offset = 0; offset < size; offset += PAGE_SIZE) + if (!__genradix_ptr_alloc(radix, offset, gfp_mask)) + return -ENOMEM; + + return 0; +} +EXPORT_SYMBOL(__genradix_prealloc); + +void __genradix_free(struct __genradix *radix) +{ + struct genradix_root *r = xchg(&radix->root, NULL); + + genradix_free_recurse(genradix_root_to_node(r), + genradix_root_to_depth(r)); +} +EXPORT_SYMBOL(__genradix_free); diff --git a/lib/iomap.c b/lib/iomap.c index 541d926da95e..e909ab71e995 100644 --- a/lib/iomap.c +++ b/lib/iomap.c @@ -65,8 +65,9 @@ static void bad_io_access(unsigned long port, const char *access) #endif #ifndef mmio_read16be -#define mmio_read16be(addr) be16_to_cpu(__raw_readw(addr)) -#define mmio_read32be(addr) be32_to_cpu(__raw_readl(addr)) +#define mmio_read16be(addr) swab16(readw(addr)) +#define mmio_read32be(addr) swab32(readl(addr)) +#define mmio_read64be(addr) swab64(readq(addr)) #endif unsigned int ioread8(void __iomem *addr) @@ -100,14 +101,89 @@ EXPORT_SYMBOL(ioread16be); EXPORT_SYMBOL(ioread32); EXPORT_SYMBOL(ioread32be); +#ifdef readq +static u64 pio_read64_lo_hi(unsigned long port) +{ + u64 lo, hi; + + lo = inl(port); + hi = inl(port + sizeof(u32)); + + return lo | (hi << 32); +} + +static u64 pio_read64_hi_lo(unsigned long port) +{ + u64 lo, hi; + + hi = inl(port + sizeof(u32)); + lo = inl(port); + + return lo | (hi << 32); +} + +static u64 pio_read64be_lo_hi(unsigned long port) +{ + u64 lo, hi; + + lo = pio_read32be(port + sizeof(u32)); + hi = pio_read32be(port); + + return lo | (hi << 32); +} + +static u64 pio_read64be_hi_lo(unsigned long port) +{ + u64 lo, hi; + + hi = pio_read32be(port); + lo = pio_read32be(port + sizeof(u32)); + + return lo | (hi << 32); +} + +u64 ioread64_lo_hi(void __iomem *addr) +{ + IO_COND(addr, return pio_read64_lo_hi(port), return readq(addr)); + return 0xffffffffffffffffULL; +} + +u64 ioread64_hi_lo(void __iomem *addr) +{ + IO_COND(addr, return pio_read64_hi_lo(port), return readq(addr)); + return 0xffffffffffffffffULL; +} + +u64 ioread64be_lo_hi(void __iomem *addr) +{ + IO_COND(addr, return pio_read64be_lo_hi(port), + return mmio_read64be(addr)); + return 0xffffffffffffffffULL; +} + +u64 ioread64be_hi_lo(void __iomem *addr) +{ + IO_COND(addr, return pio_read64be_hi_lo(port), + return mmio_read64be(addr)); + return 0xffffffffffffffffULL; +} + +EXPORT_SYMBOL(ioread64_lo_hi); +EXPORT_SYMBOL(ioread64_hi_lo); +EXPORT_SYMBOL(ioread64be_lo_hi); +EXPORT_SYMBOL(ioread64be_hi_lo); + +#endif /* readq */ + #ifndef pio_write16be #define pio_write16be(val,port) outw(swab16(val),port) #define pio_write32be(val,port) outl(swab32(val),port) #endif #ifndef mmio_write16be -#define mmio_write16be(val,port) __raw_writew(be16_to_cpu(val),port) -#define mmio_write32be(val,port) __raw_writel(be32_to_cpu(val),port) +#define mmio_write16be(val,port) writew(swab16(val),port) +#define mmio_write32be(val,port) writel(swab32(val),port) +#define mmio_write64be(val,port) writeq(swab64(val),port) #endif void iowrite8(u8 val, void __iomem *addr) @@ -136,6 +212,62 @@ EXPORT_SYMBOL(iowrite16be); EXPORT_SYMBOL(iowrite32); EXPORT_SYMBOL(iowrite32be); +#ifdef writeq +static void pio_write64_lo_hi(u64 val, unsigned long port) +{ + outl(val, port); + outl(val >> 32, port + sizeof(u32)); +} + +static void pio_write64_hi_lo(u64 val, unsigned long port) +{ + outl(val >> 32, port + sizeof(u32)); + outl(val, port); +} + +static void pio_write64be_lo_hi(u64 val, unsigned long port) +{ + pio_write32be(val, port + sizeof(u32)); + pio_write32be(val >> 32, port); +} + +static void pio_write64be_hi_lo(u64 val, unsigned long port) +{ + pio_write32be(val >> 32, port); + pio_write32be(val, port + sizeof(u32)); +} + +void iowrite64_lo_hi(u64 val, void __iomem *addr) +{ + IO_COND(addr, pio_write64_lo_hi(val, port), + writeq(val, addr)); +} + +void iowrite64_hi_lo(u64 val, void __iomem *addr) +{ + IO_COND(addr, pio_write64_hi_lo(val, port), + writeq(val, addr)); +} + +void iowrite64be_lo_hi(u64 val, void __iomem *addr) +{ + IO_COND(addr, pio_write64be_lo_hi(val, port), + mmio_write64be(val, addr)); +} + +void iowrite64be_hi_lo(u64 val, void __iomem *addr) +{ + IO_COND(addr, pio_write64be_hi_lo(val, port), + mmio_write64be(val, addr)); +} + +EXPORT_SYMBOL(iowrite64_lo_hi); +EXPORT_SYMBOL(iowrite64_hi_lo); +EXPORT_SYMBOL(iowrite64be_lo_hi); +EXPORT_SYMBOL(iowrite64be_hi_lo); + +#endif /* readq */ + /* * These are the "repeat MMIO read/write" functions. * Note the "__raw" accesses, since we don't want to diff --git a/lib/iov_iter.c b/lib/iov_iter.c index be4bd627caf0..b396d328a764 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -861,8 +861,21 @@ EXPORT_SYMBOL(_copy_from_iter_full_nocache); static inline bool page_copy_sane(struct page *page, size_t offset, size_t n) { - struct page *head = compound_head(page); - size_t v = n + offset + page_address(page) - page_address(head); + struct page *head; + size_t v = n + offset; + + /* + * The general case needs to access the page order in order + * to compute the page size. + * However, we mostly deal with order-0 pages and thus can + * avoid a possible cache line miss for requests that fit all + * page orders. + */ + if (n <= v && v <= PAGE_SIZE) + return true; + + head = compound_head(page); + v += (page - head) << PAGE_SHIFT; if (likely(n <= v && v <= (PAGE_SIZE << compound_order(head)))) return true; @@ -1515,6 +1528,7 @@ EXPORT_SYMBOL(csum_and_copy_to_iter); size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp, struct iov_iter *i) { +#ifdef CONFIG_CRYPTO struct ahash_request *hash = hashp; struct scatterlist sg; size_t copied; @@ -1524,6 +1538,9 @@ size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp, ahash_request_set_crypt(hash, &sg, NULL, copied); crypto_ahash_update(hash); return copied; +#else + return 0; +#endif } EXPORT_SYMBOL(hash_and_copy_to_iter); diff --git a/lib/irq_poll.c b/lib/irq_poll.c index 86a709954f5a..2f17b488d58e 100644 --- a/lib/irq_poll.c +++ b/lib/irq_poll.c @@ -35,7 +35,7 @@ void irq_poll_sched(struct irq_poll *iop) local_irq_save(flags); list_add_tail(&iop->list, this_cpu_ptr(&blk_cpu_iopoll)); - __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ); + raise_softirq_irqoff(IRQ_POLL_SOFTIRQ); local_irq_restore(flags); } EXPORT_SYMBOL(irq_poll_sched); diff --git a/lib/kobject.c b/lib/kobject.c index b72e00fd7d09..f2ccdbac8ed9 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -18,7 +18,7 @@ #include <linux/random.h> /** - * kobject_namespace - return @kobj's namespace tag + * kobject_namespace() - Return @kobj's namespace tag. * @kobj: kobject in question * * Returns namespace tag of @kobj if its parent has namespace ops enabled @@ -36,7 +36,7 @@ const void *kobject_namespace(struct kobject *kobj) } /** - * kobject_get_ownership - get sysfs ownership data for @kobj + * kobject_get_ownership() - Get sysfs ownership data for @kobj. * @kobj: kobject in question * @uid: kernel user ID for sysfs objects * @gid: kernel group ID for sysfs objects @@ -82,6 +82,7 @@ static int populate_dir(struct kobject *kobj) static int create_dir(struct kobject *kobj) { + const struct kobj_type *ktype = get_ktype(kobj); const struct kobj_ns_type_operations *ops; int error; @@ -95,6 +96,14 @@ static int create_dir(struct kobject *kobj) return error; } + if (ktype) { + error = sysfs_create_groups(kobj, ktype->default_groups); + if (error) { + sysfs_remove_dir(kobj); + return error; + } + } + /* * @kobj->sd may be deleted by an ancestor going away. Hold an * extra reference so that it stays until @kobj is gone. @@ -153,12 +162,11 @@ static void fill_kobj_path(struct kobject *kobj, char *path, int length) } /** - * kobject_get_path - generate and return the path associated with a given kobj and kset pair. - * + * kobject_get_path() - Allocate memory and fill in the path for @kobj. * @kobj: kobject in question, with which to build the path * @gfp_mask: the allocation type used to allocate the path * - * The result must be freed by the caller with kfree(). + * Return: The newly allocated memory, caller must free with kfree(). */ char *kobject_get_path(struct kobject *kobj, gfp_t gfp_mask) { @@ -265,7 +273,7 @@ static int kobject_add_internal(struct kobject *kobj) } /** - * kobject_set_name_vargs - Set the name of an kobject + * kobject_set_name_vargs() - Set the name of a kobject. * @kobj: struct kobject to set the name of * @fmt: format string used to build the name * @vargs: vargs to format the string. @@ -305,7 +313,7 @@ int kobject_set_name_vargs(struct kobject *kobj, const char *fmt, } /** - * kobject_set_name - Set the name of a kobject + * kobject_set_name() - Set the name of a kobject. * @kobj: struct kobject to set the name of * @fmt: format string used to build the name * @@ -327,7 +335,7 @@ int kobject_set_name(struct kobject *kobj, const char *fmt, ...) EXPORT_SYMBOL(kobject_set_name); /** - * kobject_init - initialize a kobject structure + * kobject_init() - Initialize a kobject structure. * @kobj: pointer to the kobject to initialize * @ktype: pointer to the ktype for this kobject. * @@ -383,7 +391,7 @@ static __printf(3, 0) int kobject_add_varg(struct kobject *kobj, } /** - * kobject_add - the main kobject add function + * kobject_add() - The main kobject add function. * @kobj: the kobject to add * @parent: pointer to the parent of the kobject. * @fmt: format to name the kobject with. @@ -397,15 +405,23 @@ static __printf(3, 0) int kobject_add_varg(struct kobject *kobj, * is assigned to the kobject, then the kobject will be located in the * root of the sysfs tree. * - * If this function returns an error, kobject_put() must be called to - * properly clean up the memory associated with the object. - * Under no instance should the kobject that is passed to this function - * be directly freed with a call to kfree(), that can leak memory. - * * Note, no "add" uevent will be created with this call, the caller should set * up all of the necessary sysfs files for the object and then call * kobject_uevent() with the UEVENT_ADD parameter to ensure that * userspace is properly notified of this kobject's creation. + * + * Return: If this function returns an error, kobject_put() must be + * called to properly clean up the memory associated with the + * object. Under no instance should the kobject that is passed + * to this function be directly freed with a call to kfree(), + * that can leak memory. + * + * If this function returns success, kobject_put() must also be called + * in order to properly clean up the memory associated with the object. + * + * In short, once this function is called, kobject_put() MUST be called + * when the use of the object is finished in order to properly free + * everything. */ int kobject_add(struct kobject *kobj, struct kobject *parent, const char *fmt, ...) @@ -431,15 +447,19 @@ int kobject_add(struct kobject *kobj, struct kobject *parent, EXPORT_SYMBOL(kobject_add); /** - * kobject_init_and_add - initialize a kobject structure and add it to the kobject hierarchy + * kobject_init_and_add() - Initialize a kobject structure and add it to + * the kobject hierarchy. * @kobj: pointer to the kobject to initialize * @ktype: pointer to the ktype for this kobject. * @parent: pointer to the parent of this kobject. * @fmt: the name of the kobject. * - * This function combines the call to kobject_init() and - * kobject_add(). The same type of error handling after a call to - * kobject_add() and kobject lifetime rules are the same here. + * This function combines the call to kobject_init() and kobject_add(). + * + * If this function returns an error, kobject_put() must be called to + * properly clean up the memory associated with the object. This is the + * same type of error handling after a call to kobject_add() and kobject + * lifetime rules are the same here. */ int kobject_init_and_add(struct kobject *kobj, struct kobj_type *ktype, struct kobject *parent, const char *fmt, ...) @@ -458,7 +478,7 @@ int kobject_init_and_add(struct kobject *kobj, struct kobj_type *ktype, EXPORT_SYMBOL_GPL(kobject_init_and_add); /** - * kobject_rename - change the name of an object + * kobject_rename() - Change the name of an object. * @kobj: object in question. * @new_name: object's new name * @@ -525,7 +545,7 @@ out: EXPORT_SYMBOL_GPL(kobject_rename); /** - * kobject_move - move object to another parent + * kobject_move() - Move object to another parent. * @kobj: object in question. * @new_parent: object's new parent (can be NULL) */ @@ -578,17 +598,26 @@ out: EXPORT_SYMBOL_GPL(kobject_move); /** - * kobject_del - unlink kobject from hierarchy. + * kobject_del() - Unlink kobject from hierarchy. * @kobj: object. + * + * This is the function that should be called to delete an object + * successfully added via kobject_add(). */ void kobject_del(struct kobject *kobj) { struct kernfs_node *sd; + const struct kobj_type *ktype; if (!kobj) return; sd = kobj->sd; + ktype = get_ktype(kobj); + + if (ktype) + sysfs_remove_groups(kobj, ktype->default_groups); + sysfs_remove_dir(kobj); sysfs_put(sd); @@ -600,7 +629,7 @@ void kobject_del(struct kobject *kobj) EXPORT_SYMBOL(kobject_del); /** - * kobject_get - increment refcount for object. + * kobject_get() - Increment refcount for object. * @kobj: object. */ struct kobject *kobject_get(struct kobject *kobj) @@ -693,7 +722,7 @@ static void kobject_release(struct kref *kref) } /** - * kobject_put - decrement refcount for object. + * kobject_put() - Decrement refcount for object. * @kobj: object. * * Decrement the refcount, and if 0, call kobject_cleanup(). @@ -722,7 +751,7 @@ static struct kobj_type dynamic_kobj_ktype = { }; /** - * kobject_create - create a struct kobject dynamically + * kobject_create() - Create a struct kobject dynamically. * * This function creates a kobject structure dynamically and sets it up * to be a "dynamic" kobject with a default release function set up. @@ -745,8 +774,8 @@ struct kobject *kobject_create(void) } /** - * kobject_create_and_add - create a struct kobject dynamically and register it with sysfs - * + * kobject_create_and_add() - Create a struct kobject dynamically and + * register it with sysfs. * @name: the name for the kobject * @parent: the parent kobject of this kobject, if any. * @@ -777,7 +806,7 @@ struct kobject *kobject_create_and_add(const char *name, struct kobject *parent) EXPORT_SYMBOL_GPL(kobject_create_and_add); /** - * kset_init - initialize a kset for use + * kset_init() - Initialize a kset for use. * @k: kset */ void kset_init(struct kset *k) @@ -819,7 +848,7 @@ const struct sysfs_ops kobj_sysfs_ops = { EXPORT_SYMBOL_GPL(kobj_sysfs_ops); /** - * kset_register - initialize and add a kset. + * kset_register() - Initialize and add a kset. * @k: kset. */ int kset_register(struct kset *k) @@ -839,7 +868,7 @@ int kset_register(struct kset *k) EXPORT_SYMBOL(kset_register); /** - * kset_unregister - remove a kset. + * kset_unregister() - Remove a kset. * @k: kset. */ void kset_unregister(struct kset *k) @@ -852,7 +881,7 @@ void kset_unregister(struct kset *k) EXPORT_SYMBOL(kset_unregister); /** - * kset_find_obj - search for object in kset. + * kset_find_obj() - Search for object in kset. * @kset: kset we're looking in. * @name: object's name. * @@ -887,7 +916,7 @@ static void kset_release(struct kobject *kobj) kfree(kset); } -void kset_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid) +static void kset_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid) { if (kobj->parent) kobject_get_ownership(kobj->parent, uid, gid); @@ -900,7 +929,7 @@ static struct kobj_type kset_ktype = { }; /** - * kset_create - create a struct kset dynamically + * kset_create() - Create a struct kset dynamically. * * @name: the name for the kset * @uevent_ops: a struct kset_uevent_ops for the kset @@ -944,7 +973,7 @@ static struct kset *kset_create(const char *name, } /** - * kset_create_and_add - create a struct kset dynamically and add it to sysfs + * kset_create_and_add() - Create a struct kset dynamically and add it to sysfs. * * @name: the name for the kset * @uevent_ops: a struct kset_uevent_ops for the kset diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 27c6118afd1c..7998affa45d4 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -200,7 +200,7 @@ int kobject_synth_uevent(struct kobject *kobj, const char *buf, size_t count) r = kobject_action_type(buf, count, &action, &action_args); if (r) { - msg = "unknown uevent action string\n"; + msg = "unknown uevent action string"; goto out; } @@ -212,7 +212,7 @@ int kobject_synth_uevent(struct kobject *kobj, const char *buf, size_t count) r = kobject_action_args(action_args, count - (action_args - buf), &env); if (r == -EINVAL) { - msg = "incorrect uevent action arguments\n"; + msg = "incorrect uevent action arguments"; goto out; } @@ -224,7 +224,7 @@ int kobject_synth_uevent(struct kobject *kobj, const char *buf, size_t count) out: if (r) { devpath = kobject_get_path(kobj, GFP_KERNEL); - printk(KERN_WARNING "synth uevent: %s: %s", + pr_warn("synth uevent: %s: %s\n", devpath ?: "unknown device", msg ?: "failed to send uevent"); kfree(devpath); @@ -466,6 +466,13 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, int i = 0; int retval = 0; + /* + * Mark "remove" event done regardless of result, for some subsystems + * do not want to re-trigger "remove" event via automatic cleanup. + */ + if (action == KOBJ_REMOVE) + kobj->state_remove_uevent_sent = 1; + pr_debug("kobject: '%s' (%p): %s\n", kobject_name(kobj), kobj, __func__); @@ -567,10 +574,6 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, kobj->state_add_uevent_sent = 1; break; - case KOBJ_REMOVE: - kobj->state_remove_uevent_sent = 1; - break; - case KOBJ_UNBIND: zap_modalias_env(env); break; @@ -765,8 +768,7 @@ static int uevent_net_init(struct net *net) ue_sk->sk = netlink_kernel_create(net, NETLINK_KOBJECT_UEVENT, &cfg); if (!ue_sk->sk) { - printk(KERN_ERR - "kobject_uevent: unable to create netlink socket!\n"); + pr_err("kobject_uevent: unable to create netlink socket!\n"); kfree(ue_sk); return -ENODEV; } diff --git a/lib/libcrc32c.c b/lib/libcrc32c.c index f0a2934605bf..4e9829c4d64c 100644 --- a/lib/libcrc32c.c +++ b/lib/libcrc32c.c @@ -47,7 +47,6 @@ u32 crc32c(u32 crc, const void *address, unsigned int length) int err; shash->tfm = tfm; - shash->flags = 0; *ctx = crc; err = crypto_shash_update(shash, address, length); diff --git a/lib/livepatch/Makefile b/lib/livepatch/Makefile new file mode 100644 index 000000000000..26900ddaef82 --- /dev/null +++ b/lib/livepatch/Makefile @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for livepatch test code. + +obj-$(CONFIG_TEST_LIVEPATCH) += test_klp_atomic_replace.o \ + test_klp_callbacks_demo.o \ + test_klp_callbacks_demo2.o \ + test_klp_callbacks_busy.o \ + test_klp_callbacks_mod.o \ + test_klp_livepatch.o \ + test_klp_shadow_vars.o + +# Target modules to be livepatched require CC_FLAGS_FTRACE +CFLAGS_test_klp_callbacks_busy.o += $(CC_FLAGS_FTRACE) +CFLAGS_test_klp_callbacks_mod.o += $(CC_FLAGS_FTRACE) diff --git a/lib/livepatch/test_klp_atomic_replace.c b/lib/livepatch/test_klp_atomic_replace.c new file mode 100644 index 000000000000..5af7093ca00c --- /dev/null +++ b/lib/livepatch/test_klp_atomic_replace.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2018 Joe Lawrence <joe.lawrence@redhat.com> + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/livepatch.h> + +static int replace; +module_param(replace, int, 0644); +MODULE_PARM_DESC(replace, "replace (default=0)"); + +#include <linux/seq_file.h> +static int livepatch_meminfo_proc_show(struct seq_file *m, void *v) +{ + seq_printf(m, "%s: %s\n", THIS_MODULE->name, + "this has been live patched"); + return 0; +} + +static struct klp_func funcs[] = { + { + .old_name = "meminfo_proc_show", + .new_func = livepatch_meminfo_proc_show, + }, {} +}; + +static struct klp_object objs[] = { + { + /* name being NULL means vmlinux */ + .funcs = funcs, + }, {} +}; + +static struct klp_patch patch = { + .mod = THIS_MODULE, + .objs = objs, + /* set .replace in the init function below for demo purposes */ +}; + +static int test_klp_atomic_replace_init(void) +{ + patch.replace = replace; + return klp_enable_patch(&patch); +} + +static void test_klp_atomic_replace_exit(void) +{ +} + +module_init(test_klp_atomic_replace_init); +module_exit(test_klp_atomic_replace_exit); +MODULE_LICENSE("GPL"); +MODULE_INFO(livepatch, "Y"); +MODULE_AUTHOR("Joe Lawrence <joe.lawrence@redhat.com>"); +MODULE_DESCRIPTION("Livepatch test: atomic replace"); diff --git a/lib/livepatch/test_klp_callbacks_busy.c b/lib/livepatch/test_klp_callbacks_busy.c new file mode 100644 index 000000000000..40beddf8a0e2 --- /dev/null +++ b/lib/livepatch/test_klp_callbacks_busy.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2018 Joe Lawrence <joe.lawrence@redhat.com> + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/workqueue.h> +#include <linux/delay.h> + +static int sleep_secs; +module_param(sleep_secs, int, 0644); +MODULE_PARM_DESC(sleep_secs, "sleep_secs (default=0)"); + +static void busymod_work_func(struct work_struct *work); +static DECLARE_DELAYED_WORK(work, busymod_work_func); + +static void busymod_work_func(struct work_struct *work) +{ + pr_info("%s, sleeping %d seconds ...\n", __func__, sleep_secs); + msleep(sleep_secs * 1000); + pr_info("%s exit\n", __func__); +} + +static int test_klp_callbacks_busy_init(void) +{ + pr_info("%s\n", __func__); + schedule_delayed_work(&work, + msecs_to_jiffies(1000 * 0)); + return 0; +} + +static void test_klp_callbacks_busy_exit(void) +{ + cancel_delayed_work_sync(&work); + pr_info("%s\n", __func__); +} + +module_init(test_klp_callbacks_busy_init); +module_exit(test_klp_callbacks_busy_exit); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Joe Lawrence <joe.lawrence@redhat.com>"); +MODULE_DESCRIPTION("Livepatch test: busy target module"); diff --git a/lib/livepatch/test_klp_callbacks_demo.c b/lib/livepatch/test_klp_callbacks_demo.c new file mode 100644 index 000000000000..3fd8fe1cd1cc --- /dev/null +++ b/lib/livepatch/test_klp_callbacks_demo.c @@ -0,0 +1,121 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2018 Joe Lawrence <joe.lawrence@redhat.com> + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/livepatch.h> + +static int pre_patch_ret; +module_param(pre_patch_ret, int, 0644); +MODULE_PARM_DESC(pre_patch_ret, "pre_patch_ret (default=0)"); + +static const char *const module_state[] = { + [MODULE_STATE_LIVE] = "[MODULE_STATE_LIVE] Normal state", + [MODULE_STATE_COMING] = "[MODULE_STATE_COMING] Full formed, running module_init", + [MODULE_STATE_GOING] = "[MODULE_STATE_GOING] Going away", + [MODULE_STATE_UNFORMED] = "[MODULE_STATE_UNFORMED] Still setting it up", +}; + +static void callback_info(const char *callback, struct klp_object *obj) +{ + if (obj->mod) + pr_info("%s: %s -> %s\n", callback, obj->mod->name, + module_state[obj->mod->state]); + else + pr_info("%s: vmlinux\n", callback); +} + +/* Executed on object patching (ie, patch enablement) */ +static int pre_patch_callback(struct klp_object *obj) +{ + callback_info(__func__, obj); + return pre_patch_ret; +} + +/* Executed on object unpatching (ie, patch disablement) */ +static void post_patch_callback(struct klp_object *obj) +{ + callback_info(__func__, obj); +} + +/* Executed on object unpatching (ie, patch disablement) */ +static void pre_unpatch_callback(struct klp_object *obj) +{ + callback_info(__func__, obj); +} + +/* Executed on object unpatching (ie, patch disablement) */ +static void post_unpatch_callback(struct klp_object *obj) +{ + callback_info(__func__, obj); +} + +static void patched_work_func(struct work_struct *work) +{ + pr_info("%s\n", __func__); +} + +static struct klp_func no_funcs[] = { + {} +}; + +static struct klp_func busymod_funcs[] = { + { + .old_name = "busymod_work_func", + .new_func = patched_work_func, + }, {} +}; + +static struct klp_object objs[] = { + { + .name = NULL, /* vmlinux */ + .funcs = no_funcs, + .callbacks = { + .pre_patch = pre_patch_callback, + .post_patch = post_patch_callback, + .pre_unpatch = pre_unpatch_callback, + .post_unpatch = post_unpatch_callback, + }, + }, { + .name = "test_klp_callbacks_mod", + .funcs = no_funcs, + .callbacks = { + .pre_patch = pre_patch_callback, + .post_patch = post_patch_callback, + .pre_unpatch = pre_unpatch_callback, + .post_unpatch = post_unpatch_callback, + }, + }, { + .name = "test_klp_callbacks_busy", + .funcs = busymod_funcs, + .callbacks = { + .pre_patch = pre_patch_callback, + .post_patch = post_patch_callback, + .pre_unpatch = pre_unpatch_callback, + .post_unpatch = post_unpatch_callback, + }, + }, { } +}; + +static struct klp_patch patch = { + .mod = THIS_MODULE, + .objs = objs, +}; + +static int test_klp_callbacks_demo_init(void) +{ + return klp_enable_patch(&patch); +} + +static void test_klp_callbacks_demo_exit(void) +{ +} + +module_init(test_klp_callbacks_demo_init); +module_exit(test_klp_callbacks_demo_exit); +MODULE_LICENSE("GPL"); +MODULE_INFO(livepatch, "Y"); +MODULE_AUTHOR("Joe Lawrence <joe.lawrence@redhat.com>"); +MODULE_DESCRIPTION("Livepatch test: livepatch demo"); diff --git a/lib/livepatch/test_klp_callbacks_demo2.c b/lib/livepatch/test_klp_callbacks_demo2.c new file mode 100644 index 000000000000..5417573e80af --- /dev/null +++ b/lib/livepatch/test_klp_callbacks_demo2.c @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2018 Joe Lawrence <joe.lawrence@redhat.com> + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/livepatch.h> + +static int replace; +module_param(replace, int, 0644); +MODULE_PARM_DESC(replace, "replace (default=0)"); + +static const char *const module_state[] = { + [MODULE_STATE_LIVE] = "[MODULE_STATE_LIVE] Normal state", + [MODULE_STATE_COMING] = "[MODULE_STATE_COMING] Full formed, running module_init", + [MODULE_STATE_GOING] = "[MODULE_STATE_GOING] Going away", + [MODULE_STATE_UNFORMED] = "[MODULE_STATE_UNFORMED] Still setting it up", +}; + +static void callback_info(const char *callback, struct klp_object *obj) +{ + if (obj->mod) + pr_info("%s: %s -> %s\n", callback, obj->mod->name, + module_state[obj->mod->state]); + else + pr_info("%s: vmlinux\n", callback); +} + +/* Executed on object patching (ie, patch enablement) */ +static int pre_patch_callback(struct klp_object *obj) +{ + callback_info(__func__, obj); + return 0; +} + +/* Executed on object unpatching (ie, patch disablement) */ +static void post_patch_callback(struct klp_object *obj) +{ + callback_info(__func__, obj); +} + +/* Executed on object unpatching (ie, patch disablement) */ +static void pre_unpatch_callback(struct klp_object *obj) +{ + callback_info(__func__, obj); +} + +/* Executed on object unpatching (ie, patch disablement) */ +static void post_unpatch_callback(struct klp_object *obj) +{ + callback_info(__func__, obj); +} + +static struct klp_func no_funcs[] = { + { } +}; + +static struct klp_object objs[] = { + { + .name = NULL, /* vmlinux */ + .funcs = no_funcs, + .callbacks = { + .pre_patch = pre_patch_callback, + .post_patch = post_patch_callback, + .pre_unpatch = pre_unpatch_callback, + .post_unpatch = post_unpatch_callback, + }, + }, { } +}; + +static struct klp_patch patch = { + .mod = THIS_MODULE, + .objs = objs, + /* set .replace in the init function below for demo purposes */ +}; + +static int test_klp_callbacks_demo2_init(void) +{ + patch.replace = replace; + return klp_enable_patch(&patch); +} + +static void test_klp_callbacks_demo2_exit(void) +{ +} + +module_init(test_klp_callbacks_demo2_init); +module_exit(test_klp_callbacks_demo2_exit); +MODULE_LICENSE("GPL"); +MODULE_INFO(livepatch, "Y"); +MODULE_AUTHOR("Joe Lawrence <joe.lawrence@redhat.com>"); +MODULE_DESCRIPTION("Livepatch test: livepatch demo2"); diff --git a/lib/livepatch/test_klp_callbacks_mod.c b/lib/livepatch/test_klp_callbacks_mod.c new file mode 100644 index 000000000000..8fbe645b1c2c --- /dev/null +++ b/lib/livepatch/test_klp_callbacks_mod.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2018 Joe Lawrence <joe.lawrence@redhat.com> + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/kernel.h> + +static int test_klp_callbacks_mod_init(void) +{ + pr_info("%s\n", __func__); + return 0; +} + +static void test_klp_callbacks_mod_exit(void) +{ + pr_info("%s\n", __func__); +} + +module_init(test_klp_callbacks_mod_init); +module_exit(test_klp_callbacks_mod_exit); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Joe Lawrence <joe.lawrence@redhat.com>"); +MODULE_DESCRIPTION("Livepatch test: target module"); diff --git a/lib/livepatch/test_klp_livepatch.c b/lib/livepatch/test_klp_livepatch.c new file mode 100644 index 000000000000..aff08199de71 --- /dev/null +++ b/lib/livepatch/test_klp_livepatch.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2014 Seth Jennings <sjenning@redhat.com> + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/livepatch.h> + +#include <linux/seq_file.h> +static int livepatch_cmdline_proc_show(struct seq_file *m, void *v) +{ + seq_printf(m, "%s: %s\n", THIS_MODULE->name, + "this has been live patched"); + return 0; +} + +static struct klp_func funcs[] = { + { + .old_name = "cmdline_proc_show", + .new_func = livepatch_cmdline_proc_show, + }, { } +}; + +static struct klp_object objs[] = { + { + /* name being NULL means vmlinux */ + .funcs = funcs, + }, { } +}; + +static struct klp_patch patch = { + .mod = THIS_MODULE, + .objs = objs, +}; + +static int test_klp_livepatch_init(void) +{ + return klp_enable_patch(&patch); +} + +static void test_klp_livepatch_exit(void) +{ +} + +module_init(test_klp_livepatch_init); +module_exit(test_klp_livepatch_exit); +MODULE_LICENSE("GPL"); +MODULE_INFO(livepatch, "Y"); +MODULE_AUTHOR("Seth Jennings <sjenning@redhat.com>"); +MODULE_DESCRIPTION("Livepatch test: livepatch module"); diff --git a/lib/livepatch/test_klp_shadow_vars.c b/lib/livepatch/test_klp_shadow_vars.c new file mode 100644 index 000000000000..fe5c413efe96 --- /dev/null +++ b/lib/livepatch/test_klp_shadow_vars.c @@ -0,0 +1,258 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2018 Joe Lawrence <joe.lawrence@redhat.com> + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/livepatch.h> +#include <linux/slab.h> + +/* + * Keep a small list of pointers so that we can print address-agnostic + * pointer values. Use a rolling integer count to differentiate the values. + * Ironically we could have used the shadow variable API to do this, but + * let's not lean too heavily on the very code we're testing. + */ +static LIST_HEAD(ptr_list); +struct shadow_ptr { + void *ptr; + int id; + struct list_head list; +}; + +static void free_ptr_list(void) +{ + struct shadow_ptr *sp, *tmp_sp; + + list_for_each_entry_safe(sp, tmp_sp, &ptr_list, list) { + list_del(&sp->list); + kfree(sp); + } +} + +static int ptr_id(void *ptr) +{ + struct shadow_ptr *sp; + static int count; + + list_for_each_entry(sp, &ptr_list, list) { + if (sp->ptr == ptr) + return sp->id; + } + + sp = kmalloc(sizeof(*sp), GFP_ATOMIC); + if (!sp) + return -ENOMEM; + sp->ptr = ptr; + sp->id = count++; + + list_add(&sp->list, &ptr_list); + + return sp->id; +} + +/* + * Shadow variable wrapper functions that echo the function and arguments + * to the kernel log for testing verification. Don't display raw pointers, + * but use the ptr_id() value instead. + */ +static void *shadow_get(void *obj, unsigned long id) +{ + void *ret = klp_shadow_get(obj, id); + + pr_info("klp_%s(obj=PTR%d, id=0x%lx) = PTR%d\n", + __func__, ptr_id(obj), id, ptr_id(ret)); + + return ret; +} + +static void *shadow_alloc(void *obj, unsigned long id, size_t size, + gfp_t gfp_flags, klp_shadow_ctor_t ctor, + void *ctor_data) +{ + void *ret = klp_shadow_alloc(obj, id, size, gfp_flags, ctor, + ctor_data); + pr_info("klp_%s(obj=PTR%d, id=0x%lx, size=%zx, gfp_flags=%pGg), ctor=PTR%d, ctor_data=PTR%d = PTR%d\n", + __func__, ptr_id(obj), id, size, &gfp_flags, ptr_id(ctor), + ptr_id(ctor_data), ptr_id(ret)); + return ret; +} + +static void *shadow_get_or_alloc(void *obj, unsigned long id, size_t size, + gfp_t gfp_flags, klp_shadow_ctor_t ctor, + void *ctor_data) +{ + void *ret = klp_shadow_get_or_alloc(obj, id, size, gfp_flags, ctor, + ctor_data); + pr_info("klp_%s(obj=PTR%d, id=0x%lx, size=%zx, gfp_flags=%pGg), ctor=PTR%d, ctor_data=PTR%d = PTR%d\n", + __func__, ptr_id(obj), id, size, &gfp_flags, ptr_id(ctor), + ptr_id(ctor_data), ptr_id(ret)); + return ret; +} + +static void shadow_free(void *obj, unsigned long id, klp_shadow_dtor_t dtor) +{ + klp_shadow_free(obj, id, dtor); + pr_info("klp_%s(obj=PTR%d, id=0x%lx, dtor=PTR%d)\n", + __func__, ptr_id(obj), id, ptr_id(dtor)); +} + +static void shadow_free_all(unsigned long id, klp_shadow_dtor_t dtor) +{ + klp_shadow_free_all(id, dtor); + pr_info("klp_%s(id=0x%lx, dtor=PTR%d)\n", + __func__, id, ptr_id(dtor)); +} + + +/* Shadow variable constructor - remember simple pointer data */ +static int shadow_ctor(void *obj, void *shadow_data, void *ctor_data) +{ + int **shadow_int = shadow_data; + *shadow_int = ctor_data; + pr_info("%s: PTR%d -> PTR%d\n", + __func__, ptr_id(shadow_int), ptr_id(ctor_data)); + + return 0; +} + +static void shadow_dtor(void *obj, void *shadow_data) +{ + pr_info("%s(obj=PTR%d, shadow_data=PTR%d)\n", + __func__, ptr_id(obj), ptr_id(shadow_data)); +} + +static int test_klp_shadow_vars_init(void) +{ + void *obj = THIS_MODULE; + int id = 0x1234; + size_t size = sizeof(int *); + gfp_t gfp_flags = GFP_KERNEL; + + int var1, var2, var3, var4; + int **sv1, **sv2, **sv3, **sv4; + + void *ret; + + ptr_id(NULL); + ptr_id(&var1); + ptr_id(&var2); + ptr_id(&var3); + ptr_id(&var4); + + /* + * With an empty shadow variable hash table, expect not to find + * any matches. + */ + ret = shadow_get(obj, id); + if (!ret) + pr_info(" got expected NULL result\n"); + + /* + * Allocate a few shadow variables with different <obj> and <id>. + */ + sv1 = shadow_alloc(obj, id, size, gfp_flags, shadow_ctor, &var1); + if (!sv1) + return -ENOMEM; + + sv2 = shadow_alloc(obj + 1, id, size, gfp_flags, shadow_ctor, &var2); + if (!sv2) + return -ENOMEM; + + sv3 = shadow_alloc(obj, id + 1, size, gfp_flags, shadow_ctor, &var3); + if (!sv3) + return -ENOMEM; + + /* + * Verify we can find our new shadow variables and that they point + * to expected data. + */ + ret = shadow_get(obj, id); + if (!ret) + return -EINVAL; + if (ret == sv1 && *sv1 == &var1) + pr_info(" got expected PTR%d -> PTR%d result\n", + ptr_id(sv1), ptr_id(*sv1)); + + ret = shadow_get(obj + 1, id); + if (!ret) + return -EINVAL; + if (ret == sv2 && *sv2 == &var2) + pr_info(" got expected PTR%d -> PTR%d result\n", + ptr_id(sv2), ptr_id(*sv2)); + ret = shadow_get(obj, id + 1); + if (!ret) + return -EINVAL; + if (ret == sv3 && *sv3 == &var3) + pr_info(" got expected PTR%d -> PTR%d result\n", + ptr_id(sv3), ptr_id(*sv3)); + + /* + * Allocate or get a few more, this time with the same <obj>, <id>. + * The second invocation should return the same shadow var. + */ + sv4 = shadow_get_or_alloc(obj + 2, id, size, gfp_flags, shadow_ctor, &var4); + if (!sv4) + return -ENOMEM; + + ret = shadow_get_or_alloc(obj + 2, id, size, gfp_flags, shadow_ctor, &var4); + if (!ret) + return -EINVAL; + if (ret == sv4 && *sv4 == &var4) + pr_info(" got expected PTR%d -> PTR%d result\n", + ptr_id(sv4), ptr_id(*sv4)); + + /* + * Free the <obj=*, id> shadow variables and check that we can no + * longer find them. + */ + shadow_free(obj, id, shadow_dtor); /* sv1 */ + ret = shadow_get(obj, id); + if (!ret) + pr_info(" got expected NULL result\n"); + + shadow_free(obj + 1, id, shadow_dtor); /* sv2 */ + ret = shadow_get(obj + 1, id); + if (!ret) + pr_info(" got expected NULL result\n"); + + shadow_free(obj + 2, id, shadow_dtor); /* sv4 */ + ret = shadow_get(obj + 2, id); + if (!ret) + pr_info(" got expected NULL result\n"); + + /* + * We should still find an <id+1> variable. + */ + ret = shadow_get(obj, id + 1); + if (!ret) + return -EINVAL; + if (ret == sv3 && *sv3 == &var3) + pr_info(" got expected PTR%d -> PTR%d result\n", + ptr_id(sv3), ptr_id(*sv3)); + + /* + * Free all the <id+1> variables, too. + */ + shadow_free_all(id + 1, shadow_dtor); /* sv3 */ + ret = shadow_get(obj, id); + if (!ret) + pr_info(" shadow_get() got expected NULL result\n"); + + + free_ptr_list(); + + return 0; +} + +static void test_klp_shadow_vars_exit(void) +{ +} + +module_init(test_klp_shadow_vars_init); +module_exit(test_klp_shadow_vars_exit); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Joe Lawrence <joe.lawrence@redhat.com>"); +MODULE_DESCRIPTION("Livepatch test: shadow variables"); diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c index 1e1bbf171eca..a1705545e6ac 100644 --- a/lib/locking-selftest.c +++ b/lib/locking-selftest.c @@ -1989,6 +1989,7 @@ void locking_selftest(void) init_shared_classes(); debug_locks_silent = !debug_locks_verbose; + lockdep_set_selftest_task(current); DO_TESTCASE_6R("A-A deadlock", AA); DO_TESTCASE_6R("A-B-B-A deadlock", ABBA); @@ -2097,5 +2098,6 @@ void locking_selftest(void) printk("---------------------------------\n"); debug_locks = 1; } + lockdep_set_selftest_task(NULL); debug_locks_silent = 0; } diff --git a/lib/lzo/lzo1x_compress.c b/lib/lzo/lzo1x_compress.c index 236eb21167b5..a8ede77afe0d 100644 --- a/lib/lzo/lzo1x_compress.c +++ b/lib/lzo/lzo1x_compress.c @@ -20,7 +20,8 @@ static noinline size_t lzo1x_1_do_compress(const unsigned char *in, size_t in_len, unsigned char *out, size_t *out_len, - size_t ti, void *wrkmem) + size_t ti, void *wrkmem, signed char *state_offset, + const unsigned char bitstream_version) { const unsigned char *ip; unsigned char *op; @@ -35,27 +36,85 @@ lzo1x_1_do_compress(const unsigned char *in, size_t in_len, ip += ti < 4 ? 4 - ti : 0; for (;;) { - const unsigned char *m_pos; + const unsigned char *m_pos = NULL; size_t t, m_len, m_off; u32 dv; + u32 run_length = 0; literal: ip += 1 + ((ip - ii) >> 5); next: if (unlikely(ip >= ip_end)) break; dv = get_unaligned_le32(ip); - t = ((dv * 0x1824429d) >> (32 - D_BITS)) & D_MASK; - m_pos = in + dict[t]; - dict[t] = (lzo_dict_t) (ip - in); - if (unlikely(dv != get_unaligned_le32(m_pos))) - goto literal; + + if (dv == 0 && bitstream_version) { + const unsigned char *ir = ip + 4; + const unsigned char *limit = ip_end + < (ip + MAX_ZERO_RUN_LENGTH + 1) + ? ip_end : ip + MAX_ZERO_RUN_LENGTH + 1; +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && \ + defined(LZO_FAST_64BIT_MEMORY_ACCESS) + u64 dv64; + + for (; (ir + 32) <= limit; ir += 32) { + dv64 = get_unaligned((u64 *)ir); + dv64 |= get_unaligned((u64 *)ir + 1); + dv64 |= get_unaligned((u64 *)ir + 2); + dv64 |= get_unaligned((u64 *)ir + 3); + if (dv64) + break; + } + for (; (ir + 8) <= limit; ir += 8) { + dv64 = get_unaligned((u64 *)ir); + if (dv64) { +# if defined(__LITTLE_ENDIAN) + ir += __builtin_ctzll(dv64) >> 3; +# elif defined(__BIG_ENDIAN) + ir += __builtin_clzll(dv64) >> 3; +# else +# error "missing endian definition" +# endif + break; + } + } +#else + while ((ir < (const unsigned char *) + ALIGN((uintptr_t)ir, 4)) && + (ir < limit) && (*ir == 0)) + ir++; + for (; (ir + 4) <= limit; ir += 4) { + dv = *((u32 *)ir); + if (dv) { +# if defined(__LITTLE_ENDIAN) + ir += __builtin_ctz(dv) >> 3; +# elif defined(__BIG_ENDIAN) + ir += __builtin_clz(dv) >> 3; +# else +# error "missing endian definition" +# endif + break; + } + } +#endif + while (likely(ir < limit) && unlikely(*ir == 0)) + ir++; + run_length = ir - ip; + if (run_length > MAX_ZERO_RUN_LENGTH) + run_length = MAX_ZERO_RUN_LENGTH; + } else { + t = ((dv * 0x1824429d) >> (32 - D_BITS)) & D_MASK; + m_pos = in + dict[t]; + dict[t] = (lzo_dict_t) (ip - in); + if (unlikely(dv != get_unaligned_le32(m_pos))) + goto literal; + } ii -= ti; ti = 0; t = ip - ii; if (t != 0) { if (t <= 3) { - op[-2] |= t; + op[*state_offset] |= t; COPY4(op, ii); op += t; } else if (t <= 16) { @@ -88,6 +147,17 @@ next: } } + if (unlikely(run_length)) { + ip += run_length; + run_length -= MIN_ZERO_RUN_LENGTH; + put_unaligned_le32((run_length << 21) | 0xfffc18 + | (run_length & 0x7), op); + op += 4; + run_length = 0; + *state_offset = -3; + goto finished_writing_instruction; + } + m_len = 4; { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(LZO_USE_CTZ64) @@ -170,7 +240,6 @@ m_len_done: m_off = ip - m_pos; ip += m_len; - ii = ip; if (m_len <= M2_MAX_LEN && m_off <= M2_MAX_OFFSET) { m_off -= 1; *op++ = (((m_len - 1) << 5) | ((m_off & 7) << 2)); @@ -207,29 +276,48 @@ m_len_done: *op++ = (m_off << 2); *op++ = (m_off >> 6); } + *state_offset = -2; +finished_writing_instruction: + ii = ip; goto next; } *out_len = op - out; return in_end - (ii - ti); } -int lzo1x_1_compress(const unsigned char *in, size_t in_len, +int lzogeneric1x_1_compress(const unsigned char *in, size_t in_len, unsigned char *out, size_t *out_len, - void *wrkmem) + void *wrkmem, const unsigned char bitstream_version) { const unsigned char *ip = in; unsigned char *op = out; + unsigned char *data_start; size_t l = in_len; size_t t = 0; + signed char state_offset = -2; + unsigned int m4_max_offset; + + // LZO v0 will never write 17 as first byte (except for zero-length + // input), so this is used to version the bitstream + if (bitstream_version > 0) { + *op++ = 17; + *op++ = bitstream_version; + m4_max_offset = M4_MAX_OFFSET_V1; + } else { + m4_max_offset = M4_MAX_OFFSET_V0; + } + + data_start = op; while (l > 20) { - size_t ll = l <= (M4_MAX_OFFSET + 1) ? l : (M4_MAX_OFFSET + 1); + size_t ll = l <= (m4_max_offset + 1) ? l : (m4_max_offset + 1); uintptr_t ll_end = (uintptr_t) ip + ll; if ((ll_end + ((t + ll) >> 5)) <= ll_end) break; BUILD_BUG_ON(D_SIZE * sizeof(lzo_dict_t) > LZO1X_1_MEM_COMPRESS); memset(wrkmem, 0, D_SIZE * sizeof(lzo_dict_t)); - t = lzo1x_1_do_compress(ip, ll, op, out_len, t, wrkmem); + t = lzo1x_1_do_compress(ip, ll, op, out_len, t, wrkmem, + &state_offset, bitstream_version); ip += ll; op += *out_len; l -= ll; @@ -239,10 +327,10 @@ int lzo1x_1_compress(const unsigned char *in, size_t in_len, if (t > 0) { const unsigned char *ii = in + in_len - t; - if (op == out && t <= 238) { + if (op == data_start && t <= 238) { *op++ = (17 + t); } else if (t <= 3) { - op[-2] |= t; + op[state_offset] |= t; } else if (t <= 18) { *op++ = (t - 3); } else { @@ -273,7 +361,24 @@ int lzo1x_1_compress(const unsigned char *in, size_t in_len, *out_len = op - out; return LZO_E_OK; } + +int lzo1x_1_compress(const unsigned char *in, size_t in_len, + unsigned char *out, size_t *out_len, + void *wrkmem) +{ + return lzogeneric1x_1_compress(in, in_len, out, out_len, wrkmem, 0); +} + +int lzorle1x_1_compress(const unsigned char *in, size_t in_len, + unsigned char *out, size_t *out_len, + void *wrkmem) +{ + return lzogeneric1x_1_compress(in, in_len, out, out_len, + wrkmem, LZO_VERSION); +} + EXPORT_SYMBOL_GPL(lzo1x_1_compress); +EXPORT_SYMBOL_GPL(lzorle1x_1_compress); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("LZO1X-1 Compressor"); diff --git a/lib/lzo/lzo1x_decompress_safe.c b/lib/lzo/lzo1x_decompress_safe.c index a1c387f6afba..9e07e9ef1aad 100644 --- a/lib/lzo/lzo1x_decompress_safe.c +++ b/lib/lzo/lzo1x_decompress_safe.c @@ -46,11 +46,21 @@ int lzo1x_decompress_safe(const unsigned char *in, size_t in_len, const unsigned char * const ip_end = in + in_len; unsigned char * const op_end = out + *out_len; + unsigned char bitstream_version; + op = out; ip = in; if (unlikely(in_len < 3)) goto input_overrun; + + if (likely(in_len >= 5) && likely(*ip == 17)) { + bitstream_version = ip[1]; + ip += 2; + } else { + bitstream_version = 0; + } + if (*ip > 17) { t = *ip++ - 17; if (t < 4) { @@ -154,32 +164,49 @@ copy_literal_run: m_pos -= next >> 2; next &= 3; } else { - m_pos = op; - m_pos -= (t & 8) << 11; - t = (t & 7) + (3 - 1); - if (unlikely(t == 2)) { - size_t offset; - const unsigned char *ip_last = ip; + NEED_IP(2); + next = get_unaligned_le16(ip); + if (((next & 0xfffc) == 0xfffc) && + ((t & 0xf8) == 0x18) && + likely(bitstream_version)) { + NEED_IP(3); + t &= 7; + t |= ip[2] << 3; + t += MIN_ZERO_RUN_LENGTH; + NEED_OP(t); + memset(op, 0, t); + op += t; + next &= 3; + ip += 3; + goto match_next; + } else { + m_pos = op; + m_pos -= (t & 8) << 11; + t = (t & 7) + (3 - 1); + if (unlikely(t == 2)) { + size_t offset; + const unsigned char *ip_last = ip; - while (unlikely(*ip == 0)) { - ip++; - NEED_IP(1); - } - offset = ip - ip_last; - if (unlikely(offset > MAX_255_COUNT)) - return LZO_E_ERROR; + while (unlikely(*ip == 0)) { + ip++; + NEED_IP(1); + } + offset = ip - ip_last; + if (unlikely(offset > MAX_255_COUNT)) + return LZO_E_ERROR; - offset = (offset << 8) - offset; - t += offset + 7 + *ip++; - NEED_IP(2); + offset = (offset << 8) - offset; + t += offset + 7 + *ip++; + NEED_IP(2); + next = get_unaligned_le16(ip); + } + ip += 2; + m_pos -= next >> 2; + next &= 3; + if (m_pos == op) + goto eof_found; + m_pos -= 0x4000; } - next = get_unaligned_le16(ip); - ip += 2; - m_pos -= next >> 2; - next &= 3; - if (m_pos == op) - goto eof_found; - m_pos -= 0x4000; } TEST_LB(m_pos); #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) diff --git a/lib/lzo/lzodefs.h b/lib/lzo/lzodefs.h index 4edefd2f540c..b60851fcf6ce 100644 --- a/lib/lzo/lzodefs.h +++ b/lib/lzo/lzodefs.h @@ -13,9 +13,15 @@ */ +/* Version + * 0: original lzo version + * 1: lzo with support for RLE + */ +#define LZO_VERSION 1 + #define COPY4(dst, src) \ put_unaligned(get_unaligned((const u32 *)(src)), (u32 *)(dst)) -#if defined(__x86_64__) +#if defined(CONFIG_X86_64) || defined(CONFIG_ARM64) #define COPY8(dst, src) \ put_unaligned(get_unaligned((const u64 *)(src)), (u64 *)(dst)) #else @@ -25,19 +31,21 @@ #if defined(__BIG_ENDIAN) && defined(__LITTLE_ENDIAN) #error "conflicting endian definitions" -#elif defined(__x86_64__) +#elif defined(CONFIG_X86_64) || defined(CONFIG_ARM64) #define LZO_USE_CTZ64 1 #define LZO_USE_CTZ32 1 -#elif defined(__i386__) || defined(__powerpc__) +#define LZO_FAST_64BIT_MEMORY_ACCESS +#elif defined(CONFIG_X86) || defined(CONFIG_PPC) #define LZO_USE_CTZ32 1 -#elif defined(__arm__) && (__LINUX_ARM_ARCH__ >= 5) +#elif defined(CONFIG_ARM) && (__LINUX_ARM_ARCH__ >= 5) #define LZO_USE_CTZ32 1 #endif #define M1_MAX_OFFSET 0x0400 #define M2_MAX_OFFSET 0x0800 #define M3_MAX_OFFSET 0x4000 -#define M4_MAX_OFFSET 0xbfff +#define M4_MAX_OFFSET_V0 0xbfff +#define M4_MAX_OFFSET_V1 0xbffe #define M1_MIN_LEN 2 #define M1_MAX_LEN 2 @@ -53,6 +61,9 @@ #define M3_MARKER 32 #define M4_MARKER 16 +#define MIN_ZERO_RUN_LENGTH 4 +#define MAX_ZERO_RUN_LENGTH (2047 + MIN_ZERO_RUN_LENGTH) + #define lzo_dict_t unsigned short #define D_BITS 13 #define D_SIZE (1u << D_BITS) diff --git a/lib/nlattr.c b/lib/nlattr.c index d26de6156b97..cace9b307781 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -69,7 +69,8 @@ static int validate_nla_bitfield32(const struct nlattr *nla, static int nla_validate_array(const struct nlattr *head, int len, int maxtype, const struct nla_policy *policy, - struct netlink_ext_ack *extack) + struct netlink_ext_ack *extack, + unsigned int validate) { const struct nlattr *entry; int rem; @@ -86,8 +87,8 @@ static int nla_validate_array(const struct nlattr *head, int len, int maxtype, return -ERANGE; } - ret = nla_validate(nla_data(entry), nla_len(entry), - maxtype, policy, extack); + ret = __nla_validate(nla_data(entry), nla_len(entry), + maxtype, policy, validate, extack); if (ret < 0) return ret; } @@ -154,13 +155,17 @@ static int nla_validate_int_range(const struct nla_policy *pt, } static int validate_nla(const struct nlattr *nla, int maxtype, - const struct nla_policy *policy, + const struct nla_policy *policy, unsigned int validate, struct netlink_ext_ack *extack) { + u16 strict_start_type = policy[0].strict_start_type; const struct nla_policy *pt; int minlen = 0, attrlen = nla_len(nla), type = nla_type(nla); int err = -ERANGE; + if (strict_start_type && type >= strict_start_type) + validate |= NL_VALIDATE_STRICT; + if (type <= 0 || type > maxtype) return 0; @@ -172,6 +177,26 @@ static int validate_nla(const struct nlattr *nla, int maxtype, (pt->type == NLA_EXACT_LEN_WARN && attrlen != pt->len)) { pr_warn_ratelimited("netlink: '%s': attribute type %d has an invalid length.\n", current->comm, type); + if (validate & NL_VALIDATE_STRICT_ATTRS) { + NL_SET_ERR_MSG_ATTR(extack, nla, + "invalid attribute length"); + return -EINVAL; + } + } + + if (validate & NL_VALIDATE_NESTED) { + if ((pt->type == NLA_NESTED || pt->type == NLA_NESTED_ARRAY) && + !(nla->nla_type & NLA_F_NESTED)) { + NL_SET_ERR_MSG_ATTR(extack, nla, + "NLA_F_NESTED is missing"); + return -EINVAL; + } + if (pt->type != NLA_NESTED && pt->type != NLA_NESTED_ARRAY && + pt->type != NLA_UNSPEC && (nla->nla_type & NLA_F_NESTED)) { + NL_SET_ERR_MSG_ATTR(extack, nla, + "NLA_F_NESTED not expected"); + return -EINVAL; + } } switch (pt->type) { @@ -244,8 +269,9 @@ static int validate_nla(const struct nlattr *nla, int maxtype, if (attrlen < NLA_HDRLEN) goto out_err; if (pt->validation_data) { - err = nla_validate(nla_data(nla), nla_len(nla), pt->len, - pt->validation_data, extack); + err = __nla_validate(nla_data(nla), nla_len(nla), pt->len, + pt->validation_data, validate, + extack); if (err < 0) { /* * return directly to preserve the inner @@ -268,7 +294,7 @@ static int validate_nla(const struct nlattr *nla, int maxtype, err = nla_validate_array(nla_data(nla), nla_len(nla), pt->len, pt->validation_data, - extack); + extack, validate); if (err < 0) { /* * return directly to preserve the inner @@ -278,10 +304,23 @@ static int validate_nla(const struct nlattr *nla, int maxtype, } } break; + + case NLA_UNSPEC: + if (validate & NL_VALIDATE_UNSPEC) { + NL_SET_ERR_MSG_ATTR(extack, nla, + "Unsupported attribute"); + return -EINVAL; + } + /* fall through */ + case NLA_MIN_LEN: + if (attrlen < pt->len) + goto out_err; + break; + default: if (pt->len) minlen = pt->len; - else if (pt->type != NLA_UNSPEC) + else minlen = nla_attr_minlen[pt->type]; if (attrlen < minlen) @@ -315,37 +354,76 @@ out_err: return err; } +static int __nla_validate_parse(const struct nlattr *head, int len, int maxtype, + const struct nla_policy *policy, + unsigned int validate, + struct netlink_ext_ack *extack, + struct nlattr **tb) +{ + const struct nlattr *nla; + int rem; + + if (tb) + memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); + + nla_for_each_attr(nla, head, len, rem) { + u16 type = nla_type(nla); + + if (type == 0 || type > maxtype) { + if (validate & NL_VALIDATE_MAXTYPE) { + NL_SET_ERR_MSG_ATTR(extack, nla, + "Unknown attribute type"); + return -EINVAL; + } + continue; + } + if (policy) { + int err = validate_nla(nla, maxtype, policy, + validate, extack); + + if (err < 0) + return err; + } + + if (tb) + tb[type] = (struct nlattr *)nla; + } + + if (unlikely(rem > 0)) { + pr_warn_ratelimited("netlink: %d bytes leftover after parsing attributes in process `%s'.\n", + rem, current->comm); + NL_SET_ERR_MSG(extack, "bytes leftover after parsing attributes"); + if (validate & NL_VALIDATE_TRAILING) + return -EINVAL; + } + + return 0; +} + /** - * nla_validate - Validate a stream of attributes + * __nla_validate - Validate a stream of attributes * @head: head of attribute stream * @len: length of attribute stream * @maxtype: maximum attribute type to be expected * @policy: validation policy + * @validate: validation strictness * @extack: extended ACK report struct * * Validates all attributes in the specified attribute stream against the - * specified policy. Attributes with a type exceeding maxtype will be - * ignored. See documenation of struct nla_policy for more details. + * specified policy. Validation depends on the validate flags passed, see + * &enum netlink_validation for more details on that. + * See documenation of struct nla_policy for more details. * * Returns 0 on success or a negative error code. */ -int nla_validate(const struct nlattr *head, int len, int maxtype, - const struct nla_policy *policy, - struct netlink_ext_ack *extack) +int __nla_validate(const struct nlattr *head, int len, int maxtype, + const struct nla_policy *policy, unsigned int validate, + struct netlink_ext_ack *extack) { - const struct nlattr *nla; - int rem; - - nla_for_each_attr(nla, head, len, rem) { - int err = validate_nla(nla, maxtype, policy, extack); - - if (err < 0) - return err; - } - - return 0; + return __nla_validate_parse(head, len, maxtype, policy, validate, + extack, NULL); } -EXPORT_SYMBOL(nla_validate); +EXPORT_SYMBOL(__nla_validate); /** * nla_policy_len - Determin the max. length of a policy @@ -377,76 +455,30 @@ nla_policy_len(const struct nla_policy *p, int n) EXPORT_SYMBOL(nla_policy_len); /** - * nla_parse - Parse a stream of attributes into a tb buffer + * __nla_parse - Parse a stream of attributes into a tb buffer * @tb: destination array with maxtype+1 elements * @maxtype: maximum attribute type to be expected * @head: head of attribute stream * @len: length of attribute stream * @policy: validation policy + * @validate: validation strictness + * @extack: extended ACK pointer * * Parses a stream of attributes and stores a pointer to each attribute in - * the tb array accessible via the attribute type. Attributes with a type - * exceeding maxtype will be silently ignored for backwards compatibility - * reasons. policy may be set to NULL if no validation is required. + * the tb array accessible via the attribute type. + * Validation is controlled by the @validate parameter. * * Returns 0 on success or a negative error code. */ -static int __nla_parse(struct nlattr **tb, int maxtype, - const struct nlattr *head, int len, - bool strict, const struct nla_policy *policy, - struct netlink_ext_ack *extack) -{ - const struct nlattr *nla; - int rem; - - memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); - - nla_for_each_attr(nla, head, len, rem) { - u16 type = nla_type(nla); - - if (type == 0 || type > maxtype) { - if (strict) { - NL_SET_ERR_MSG(extack, "Unknown attribute type"); - return -EINVAL; - } - continue; - } - if (policy) { - int err = validate_nla(nla, maxtype, policy, extack); - - if (err < 0) - return err; - } - - tb[type] = (struct nlattr *)nla; - } - - if (unlikely(rem > 0)) { - pr_warn_ratelimited("netlink: %d bytes leftover after parsing attributes in process `%s'.\n", - rem, current->comm); - NL_SET_ERR_MSG(extack, "bytes leftover after parsing attributes"); - if (strict) - return -EINVAL; - } - - return 0; -} - -int nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head, - int len, const struct nla_policy *policy, - struct netlink_ext_ack *extack) -{ - return __nla_parse(tb, maxtype, head, len, false, policy, extack); -} -EXPORT_SYMBOL(nla_parse); - -int nla_parse_strict(struct nlattr **tb, int maxtype, const struct nlattr *head, - int len, const struct nla_policy *policy, - struct netlink_ext_ack *extack) +int __nla_parse(struct nlattr **tb, int maxtype, + const struct nlattr *head, int len, + const struct nla_policy *policy, unsigned int validate, + struct netlink_ext_ack *extack) { - return __nla_parse(tb, maxtype, head, len, true, policy, extack); + return __nla_validate_parse(head, len, maxtype, policy, validate, + extack, tb); } -EXPORT_SYMBOL(nla_parse_strict); +EXPORT_SYMBOL(__nla_parse); /** * nla_find - Find a specific attribute in a stream of attributes diff --git a/lib/objagg.c b/lib/objagg.c index c9b457a91153..576be22e86de 100644 --- a/lib/objagg.c +++ b/lib/objagg.c @@ -4,6 +4,7 @@ #include <linux/module.h> #include <linux/slab.h> #include <linux/rhashtable.h> +#include <linux/idr.h> #include <linux/list.h> #include <linux/sort.h> #include <linux/objagg.h> @@ -11,6 +12,34 @@ #define CREATE_TRACE_POINTS #include <trace/events/objagg.h> +struct objagg_hints { + struct rhashtable node_ht; + struct rhashtable_params ht_params; + struct list_head node_list; + unsigned int node_count; + unsigned int root_count; + unsigned int refcount; + const struct objagg_ops *ops; +}; + +struct objagg_hints_node { + struct rhash_head ht_node; /* member of objagg_hints->node_ht */ + struct list_head list; /* member of objagg_hints->node_list */ + struct objagg_hints_node *parent; + unsigned int root_id; + struct objagg_obj_stats_info stats_info; + unsigned long obj[0]; +}; + +static struct objagg_hints_node * +objagg_hints_lookup(struct objagg_hints *objagg_hints, void *obj) +{ + if (!objagg_hints) + return NULL; + return rhashtable_lookup_fast(&objagg_hints->node_ht, obj, + objagg_hints->ht_params); +} + struct objagg { const struct objagg_ops *ops; void *priv; @@ -18,6 +47,8 @@ struct objagg { struct rhashtable_params ht_params; struct list_head obj_list; unsigned int obj_count; + struct ida root_ida; + struct objagg_hints *hints; }; struct objagg_obj { @@ -30,6 +61,7 @@ struct objagg_obj { void *delta_priv; /* user delta private */ void *root_priv; /* user root private */ }; + unsigned int root_id; unsigned int refcount; /* counts number of users of this object * including nested objects */ @@ -130,7 +162,8 @@ static struct objagg_obj *objagg_obj_lookup(struct objagg *objagg, void *obj) static int objagg_obj_parent_assign(struct objagg *objagg, struct objagg_obj *objagg_obj, - struct objagg_obj *parent) + struct objagg_obj *parent, + bool take_parent_ref) { void *delta_priv; @@ -144,7 +177,8 @@ static int objagg_obj_parent_assign(struct objagg *objagg, */ objagg_obj->parent = parent; objagg_obj->delta_priv = delta_priv; - objagg_obj_ref_inc(objagg_obj->parent); + if (take_parent_ref) + objagg_obj_ref_inc(objagg_obj->parent); trace_objagg_obj_parent_assign(objagg, objagg_obj, parent, parent->refcount); @@ -164,7 +198,7 @@ static int objagg_obj_parent_lookup_assign(struct objagg *objagg, if (!objagg_obj_is_root(objagg_obj_cur)) continue; err = objagg_obj_parent_assign(objagg, objagg_obj, - objagg_obj_cur); + objagg_obj_cur, true); if (!err) return 0; } @@ -184,16 +218,68 @@ static void objagg_obj_parent_unassign(struct objagg *objagg, __objagg_obj_put(objagg, objagg_obj->parent); } +static int objagg_obj_root_id_alloc(struct objagg *objagg, + struct objagg_obj *objagg_obj, + struct objagg_hints_node *hnode) +{ + unsigned int min, max; + int root_id; + + /* In case there are no hints available, the root id is invalid. */ + if (!objagg->hints) { + objagg_obj->root_id = OBJAGG_OBJ_ROOT_ID_INVALID; + return 0; + } + + if (hnode) { + min = hnode->root_id; + max = hnode->root_id; + } else { + /* For objects with no hint, start after the last + * hinted root_id. + */ + min = objagg->hints->root_count; + max = ~0; + } + + root_id = ida_alloc_range(&objagg->root_ida, min, max, GFP_KERNEL); + + if (root_id < 0) + return root_id; + objagg_obj->root_id = root_id; + return 0; +} + +static void objagg_obj_root_id_free(struct objagg *objagg, + struct objagg_obj *objagg_obj) +{ + if (!objagg->hints) + return; + ida_free(&objagg->root_ida, objagg_obj->root_id); +} + static int objagg_obj_root_create(struct objagg *objagg, - struct objagg_obj *objagg_obj) + struct objagg_obj *objagg_obj, + struct objagg_hints_node *hnode) { - objagg_obj->root_priv = objagg->ops->root_create(objagg->priv, - objagg_obj->obj); - if (IS_ERR(objagg_obj->root_priv)) - return PTR_ERR(objagg_obj->root_priv); + int err; + err = objagg_obj_root_id_alloc(objagg, objagg_obj, hnode); + if (err) + return err; + objagg_obj->root_priv = objagg->ops->root_create(objagg->priv, + objagg_obj->obj, + objagg_obj->root_id); + if (IS_ERR(objagg_obj->root_priv)) { + err = PTR_ERR(objagg_obj->root_priv); + goto err_root_create; + } trace_objagg_obj_root_create(objagg, objagg_obj); return 0; + +err_root_create: + objagg_obj_root_id_free(objagg, objagg_obj); + return err; } static void objagg_obj_root_destroy(struct objagg *objagg, @@ -201,19 +287,69 @@ static void objagg_obj_root_destroy(struct objagg *objagg, { trace_objagg_obj_root_destroy(objagg, objagg_obj); objagg->ops->root_destroy(objagg->priv, objagg_obj->root_priv); + objagg_obj_root_id_free(objagg, objagg_obj); +} + +static struct objagg_obj *__objagg_obj_get(struct objagg *objagg, void *obj); + +static int objagg_obj_init_with_hints(struct objagg *objagg, + struct objagg_obj *objagg_obj, + bool *hint_found) +{ + struct objagg_hints_node *hnode; + struct objagg_obj *parent; + int err; + + hnode = objagg_hints_lookup(objagg->hints, objagg_obj->obj); + if (!hnode) { + *hint_found = false; + return 0; + } + *hint_found = true; + + if (!hnode->parent) + return objagg_obj_root_create(objagg, objagg_obj, hnode); + + parent = __objagg_obj_get(objagg, hnode->parent->obj); + if (IS_ERR(parent)) + return PTR_ERR(parent); + + err = objagg_obj_parent_assign(objagg, objagg_obj, parent, false); + if (err) { + *hint_found = false; + err = 0; + goto err_parent_assign; + } + + return 0; + +err_parent_assign: + objagg_obj_put(objagg, parent); + return err; } static int objagg_obj_init(struct objagg *objagg, struct objagg_obj *objagg_obj) { + bool hint_found; int err; + /* First, try to use hints if they are available and + * if they provide result. + */ + err = objagg_obj_init_with_hints(objagg, objagg_obj, &hint_found); + if (err) + return err; + + if (hint_found) + return 0; + /* Try to find if the object can be aggregated under an existing one. */ err = objagg_obj_parent_lookup_assign(objagg, objagg_obj); if (!err) return 0; /* If aggregation is not possible, make the object a root. */ - return objagg_obj_root_create(objagg, objagg_obj); + return objagg_obj_root_create(objagg, objagg_obj, NULL); } static void objagg_obj_fini(struct objagg *objagg, @@ -349,8 +485,9 @@ EXPORT_SYMBOL(objagg_obj_put); /** * objagg_create - creates a new objagg instance - * @ops: user-specific callbacks - * @priv: pointer to a private data passed to the ops + * @ops: user-specific callbacks + * @objagg_hints: hints, can be NULL + * @priv: pointer to a private data passed to the ops * * Note: all locking must be provided by the caller. * @@ -374,18 +511,25 @@ EXPORT_SYMBOL(objagg_obj_put); * Returns a pointer to newly created objagg instance in case of success, * otherwise it returns pointer error using ERR_PTR macro. */ -struct objagg *objagg_create(const struct objagg_ops *ops, void *priv) +struct objagg *objagg_create(const struct objagg_ops *ops, + struct objagg_hints *objagg_hints, void *priv) { struct objagg *objagg; int err; if (WARN_ON(!ops || !ops->root_create || !ops->root_destroy || - !ops->delta_create || !ops->delta_destroy)) + !ops->delta_check || !ops->delta_create || + !ops->delta_destroy)) return ERR_PTR(-EINVAL); + objagg = kzalloc(sizeof(*objagg), GFP_KERNEL); if (!objagg) return ERR_PTR(-ENOMEM); objagg->ops = ops; + if (objagg_hints) { + objagg->hints = objagg_hints; + objagg_hints->refcount++; + } objagg->priv = priv; INIT_LIST_HEAD(&objagg->obj_list); @@ -397,6 +541,8 @@ struct objagg *objagg_create(const struct objagg_ops *ops, void *priv) if (err) goto err_rhashtable_init; + ida_init(&objagg->root_ida); + trace_objagg_create(objagg); return objagg; @@ -415,8 +561,11 @@ EXPORT_SYMBOL(objagg_create); void objagg_destroy(struct objagg *objagg) { trace_objagg_destroy(objagg); + ida_destroy(&objagg->root_ida); WARN_ON(!list_empty(&objagg->obj_list)); rhashtable_destroy(&objagg->obj_ht); + if (objagg->hints) + objagg_hints_put(objagg->hints); kfree(objagg); } EXPORT_SYMBOL(objagg_destroy); @@ -472,6 +621,8 @@ const struct objagg_stats *objagg_stats_get(struct objagg *objagg) objagg_stats->stats_info[i].objagg_obj = objagg_obj; objagg_stats->stats_info[i].is_root = objagg_obj_is_root(objagg_obj); + if (objagg_stats->stats_info[i].is_root) + objagg_stats->root_count++; i++; } objagg_stats->stats_info_count = i; @@ -485,7 +636,7 @@ const struct objagg_stats *objagg_stats_get(struct objagg *objagg) EXPORT_SYMBOL(objagg_stats_get); /** - * objagg_stats_puts - puts stats of the objagg instance + * objagg_stats_put - puts stats of the objagg instance * @objagg_stats: objagg instance stats * * Note: all locking must be provided by the caller. @@ -496,6 +647,410 @@ void objagg_stats_put(const struct objagg_stats *objagg_stats) } EXPORT_SYMBOL(objagg_stats_put); +static struct objagg_hints_node * +objagg_hints_node_create(struct objagg_hints *objagg_hints, + struct objagg_obj *objagg_obj, size_t obj_size, + struct objagg_hints_node *parent_hnode) +{ + unsigned int user_count = objagg_obj->stats.user_count; + struct objagg_hints_node *hnode; + int err; + + hnode = kzalloc(sizeof(*hnode) + obj_size, GFP_KERNEL); + if (!hnode) + return ERR_PTR(-ENOMEM); + memcpy(hnode->obj, &objagg_obj->obj, obj_size); + hnode->stats_info.stats.user_count = user_count; + hnode->stats_info.stats.delta_user_count = user_count; + if (parent_hnode) { + parent_hnode->stats_info.stats.delta_user_count += user_count; + } else { + hnode->root_id = objagg_hints->root_count++; + hnode->stats_info.is_root = true; + } + hnode->stats_info.objagg_obj = objagg_obj; + + err = rhashtable_insert_fast(&objagg_hints->node_ht, &hnode->ht_node, + objagg_hints->ht_params); + if (err) + goto err_ht_insert; + + list_add(&hnode->list, &objagg_hints->node_list); + hnode->parent = parent_hnode; + objagg_hints->node_count++; + + return hnode; + +err_ht_insert: + kfree(hnode); + return ERR_PTR(err); +} + +static void objagg_hints_flush(struct objagg_hints *objagg_hints) +{ + struct objagg_hints_node *hnode, *tmp; + + list_for_each_entry_safe(hnode, tmp, &objagg_hints->node_list, list) { + list_del(&hnode->list); + rhashtable_remove_fast(&objagg_hints->node_ht, &hnode->ht_node, + objagg_hints->ht_params); + kfree(hnode); + } +} + +struct objagg_tmp_node { + struct objagg_obj *objagg_obj; + bool crossed_out; +}; + +struct objagg_tmp_graph { + struct objagg_tmp_node *nodes; + unsigned long nodes_count; + unsigned long *edges; +}; + +static int objagg_tmp_graph_edge_index(struct objagg_tmp_graph *graph, + int parent_index, int index) +{ + return index * graph->nodes_count + parent_index; +} + +static void objagg_tmp_graph_edge_set(struct objagg_tmp_graph *graph, + int parent_index, int index) +{ + int edge_index = objagg_tmp_graph_edge_index(graph, index, + parent_index); + + __set_bit(edge_index, graph->edges); +} + +static bool objagg_tmp_graph_is_edge(struct objagg_tmp_graph *graph, + int parent_index, int index) +{ + int edge_index = objagg_tmp_graph_edge_index(graph, index, + parent_index); + + return test_bit(edge_index, graph->edges); +} + +static unsigned int objagg_tmp_graph_node_weight(struct objagg_tmp_graph *graph, + unsigned int index) +{ + struct objagg_tmp_node *node = &graph->nodes[index]; + unsigned int weight = node->objagg_obj->stats.user_count; + int j; + + /* Node weight is sum of node users and all other nodes users + * that this node can represent with delta. + */ + + for (j = 0; j < graph->nodes_count; j++) { + if (!objagg_tmp_graph_is_edge(graph, index, j)) + continue; + node = &graph->nodes[j]; + if (node->crossed_out) + continue; + weight += node->objagg_obj->stats.user_count; + } + return weight; +} + +static int objagg_tmp_graph_node_max_weight(struct objagg_tmp_graph *graph) +{ + struct objagg_tmp_node *node; + unsigned int max_weight = 0; + unsigned int weight; + int max_index = -1; + int i; + + for (i = 0; i < graph->nodes_count; i++) { + node = &graph->nodes[i]; + if (node->crossed_out) + continue; + weight = objagg_tmp_graph_node_weight(graph, i); + if (weight >= max_weight) { + max_weight = weight; + max_index = i; + } + } + return max_index; +} + +static struct objagg_tmp_graph *objagg_tmp_graph_create(struct objagg *objagg) +{ + unsigned int nodes_count = objagg->obj_count; + struct objagg_tmp_graph *graph; + struct objagg_tmp_node *node; + struct objagg_tmp_node *pnode; + struct objagg_obj *objagg_obj; + size_t alloc_size; + int i, j; + + graph = kzalloc(sizeof(*graph), GFP_KERNEL); + if (!graph) + return NULL; + + graph->nodes = kcalloc(nodes_count, sizeof(*graph->nodes), GFP_KERNEL); + if (!graph->nodes) + goto err_nodes_alloc; + graph->nodes_count = nodes_count; + + alloc_size = BITS_TO_LONGS(nodes_count * nodes_count) * + sizeof(unsigned long); + graph->edges = kzalloc(alloc_size, GFP_KERNEL); + if (!graph->edges) + goto err_edges_alloc; + + i = 0; + list_for_each_entry(objagg_obj, &objagg->obj_list, list) { + node = &graph->nodes[i++]; + node->objagg_obj = objagg_obj; + } + + /* Assemble a temporary graph. Insert edge X->Y in case Y can be + * in delta of X. + */ + for (i = 0; i < nodes_count; i++) { + for (j = 0; j < nodes_count; j++) { + if (i == j) + continue; + pnode = &graph->nodes[i]; + node = &graph->nodes[j]; + if (objagg->ops->delta_check(objagg->priv, + pnode->objagg_obj->obj, + node->objagg_obj->obj)) { + objagg_tmp_graph_edge_set(graph, i, j); + + } + } + } + return graph; + +err_edges_alloc: + kfree(graph->nodes); +err_nodes_alloc: + kfree(graph); + return NULL; +} + +static void objagg_tmp_graph_destroy(struct objagg_tmp_graph *graph) +{ + kfree(graph->edges); + kfree(graph->nodes); + kfree(graph); +} + +static int +objagg_opt_simple_greedy_fillup_hints(struct objagg_hints *objagg_hints, + struct objagg *objagg) +{ + struct objagg_hints_node *hnode, *parent_hnode; + struct objagg_tmp_graph *graph; + struct objagg_tmp_node *node; + int index; + int j; + int err; + + graph = objagg_tmp_graph_create(objagg); + if (!graph) + return -ENOMEM; + + /* Find the nodes from the ones that can accommodate most users + * and cross them out of the graph. Save them to the hint list. + */ + while ((index = objagg_tmp_graph_node_max_weight(graph)) != -1) { + node = &graph->nodes[index]; + node->crossed_out = true; + hnode = objagg_hints_node_create(objagg_hints, + node->objagg_obj, + objagg->ops->obj_size, + NULL); + if (IS_ERR(hnode)) { + err = PTR_ERR(hnode); + goto out; + } + parent_hnode = hnode; + for (j = 0; j < graph->nodes_count; j++) { + if (!objagg_tmp_graph_is_edge(graph, index, j)) + continue; + node = &graph->nodes[j]; + if (node->crossed_out) + continue; + node->crossed_out = true; + hnode = objagg_hints_node_create(objagg_hints, + node->objagg_obj, + objagg->ops->obj_size, + parent_hnode); + if (IS_ERR(hnode)) { + err = PTR_ERR(hnode); + goto out; + } + } + } + + err = 0; +out: + objagg_tmp_graph_destroy(graph); + return err; +} + +struct objagg_opt_algo { + int (*fillup_hints)(struct objagg_hints *objagg_hints, + struct objagg *objagg); +}; + +static const struct objagg_opt_algo objagg_opt_simple_greedy = { + .fillup_hints = objagg_opt_simple_greedy_fillup_hints, +}; + + +static const struct objagg_opt_algo *objagg_opt_algos[] = { + [OBJAGG_OPT_ALGO_SIMPLE_GREEDY] = &objagg_opt_simple_greedy, +}; + +static int objagg_hints_obj_cmp(struct rhashtable_compare_arg *arg, + const void *obj) +{ + struct rhashtable *ht = arg->ht; + struct objagg_hints *objagg_hints = + container_of(ht, struct objagg_hints, node_ht); + const struct objagg_ops *ops = objagg_hints->ops; + const char *ptr = obj; + + ptr += ht->p.key_offset; + return ops->hints_obj_cmp ? ops->hints_obj_cmp(ptr, arg->key) : + memcmp(ptr, arg->key, ht->p.key_len); +} + +/** + * objagg_hints_get - obtains hints instance + * @objagg: objagg instance + * @opt_algo_type: type of hints finding algorithm + * + * Note: all locking must be provided by the caller. + * + * According to the algo type, the existing objects of objagg instance + * are going to be went-through to assemble an optimal tree. We call this + * tree hints. These hints can be later on used for creation of + * a new objagg instance. There, the future object creations are going + * to be consulted with these hints in order to find out, where exactly + * the new object should be put as a root or delta. + * + * Returns a pointer to hints instance in case of success, + * otherwise it returns pointer error using ERR_PTR macro. + */ +struct objagg_hints *objagg_hints_get(struct objagg *objagg, + enum objagg_opt_algo_type opt_algo_type) +{ + const struct objagg_opt_algo *algo = objagg_opt_algos[opt_algo_type]; + struct objagg_hints *objagg_hints; + int err; + + objagg_hints = kzalloc(sizeof(*objagg_hints), GFP_KERNEL); + if (!objagg_hints) + return ERR_PTR(-ENOMEM); + + objagg_hints->ops = objagg->ops; + objagg_hints->refcount = 1; + + INIT_LIST_HEAD(&objagg_hints->node_list); + + objagg_hints->ht_params.key_len = objagg->ops->obj_size; + objagg_hints->ht_params.key_offset = + offsetof(struct objagg_hints_node, obj); + objagg_hints->ht_params.head_offset = + offsetof(struct objagg_hints_node, ht_node); + objagg_hints->ht_params.obj_cmpfn = objagg_hints_obj_cmp; + + err = rhashtable_init(&objagg_hints->node_ht, &objagg_hints->ht_params); + if (err) + goto err_rhashtable_init; + + err = algo->fillup_hints(objagg_hints, objagg); + if (err) + goto err_fillup_hints; + + if (WARN_ON(objagg_hints->node_count != objagg->obj_count)) { + err = -EINVAL; + goto err_node_count_check; + } + + return objagg_hints; + +err_node_count_check: +err_fillup_hints: + objagg_hints_flush(objagg_hints); + rhashtable_destroy(&objagg_hints->node_ht); +err_rhashtable_init: + kfree(objagg_hints); + return ERR_PTR(err); +} +EXPORT_SYMBOL(objagg_hints_get); + +/** + * objagg_hints_put - puts hints instance + * @objagg_hints: objagg hints instance + * + * Note: all locking must be provided by the caller. + */ +void objagg_hints_put(struct objagg_hints *objagg_hints) +{ + if (--objagg_hints->refcount) + return; + objagg_hints_flush(objagg_hints); + rhashtable_destroy(&objagg_hints->node_ht); + kfree(objagg_hints); +} +EXPORT_SYMBOL(objagg_hints_put); + +/** + * objagg_hints_stats_get - obtains stats of the hints instance + * @objagg_hints: hints instance + * + * Note: all locking must be provided by the caller. + * + * The returned structure contains statistics of all objects + * currently in use, ordered by following rules: + * 1) Root objects are always on lower indexes than the rest. + * 2) Objects with higher delta user count are always on lower + * indexes. + * 3) In case multiple objects have the same delta user count, + * the objects are ordered by user count. + * + * Returns a pointer to stats instance in case of success, + * otherwise it returns pointer error using ERR_PTR macro. + */ +const struct objagg_stats * +objagg_hints_stats_get(struct objagg_hints *objagg_hints) +{ + struct objagg_stats *objagg_stats; + struct objagg_hints_node *hnode; + int i; + + objagg_stats = kzalloc(struct_size(objagg_stats, stats_info, + objagg_hints->node_count), + GFP_KERNEL); + if (!objagg_stats) + return ERR_PTR(-ENOMEM); + + i = 0; + list_for_each_entry(hnode, &objagg_hints->node_list, list) { + memcpy(&objagg_stats->stats_info[i], &hnode->stats_info, + sizeof(objagg_stats->stats_info[0])); + if (objagg_stats->stats_info[i].is_root) + objagg_stats->root_count++; + i++; + } + objagg_stats->stats_info_count = i; + + sort(objagg_stats->stats_info, objagg_stats->stats_info_count, + sizeof(struct objagg_obj_stats_info), + objagg_stats_info_sort_cmp_func, NULL); + + return objagg_stats; +} +EXPORT_SYMBOL(objagg_hints_stats_get); + MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Jiri Pirko <jiri@mellanox.com>"); MODULE_DESCRIPTION("Object aggregation manager"); diff --git a/lib/packing.c b/lib/packing.c new file mode 100644 index 000000000000..50d1e9f2f5a7 --- /dev/null +++ b/lib/packing.c @@ -0,0 +1,213 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2016-2018, NXP Semiconductors + * Copyright (c) 2018-2019, Vladimir Oltean <olteanv@gmail.com> + */ +#include <linux/packing.h> +#include <linux/module.h> +#include <linux/bitops.h> +#include <linux/errno.h> +#include <linux/types.h> + +static int get_le_offset(int offset) +{ + int closest_multiple_of_4; + + closest_multiple_of_4 = (offset / 4) * 4; + offset -= closest_multiple_of_4; + return closest_multiple_of_4 + (3 - offset); +} + +static int get_reverse_lsw32_offset(int offset, size_t len) +{ + int closest_multiple_of_4; + int word_index; + + word_index = offset / 4; + closest_multiple_of_4 = word_index * 4; + offset -= closest_multiple_of_4; + word_index = (len / 4) - word_index - 1; + return word_index * 4 + offset; +} + +static u64 bit_reverse(u64 val, unsigned int width) +{ + u64 new_val = 0; + unsigned int bit; + unsigned int i; + + for (i = 0; i < width; i++) { + bit = (val & (1 << i)) != 0; + new_val |= (bit << (width - i - 1)); + } + return new_val; +} + +static void adjust_for_msb_right_quirk(u64 *to_write, int *box_start_bit, + int *box_end_bit, u8 *box_mask) +{ + int box_bit_width = *box_start_bit - *box_end_bit + 1; + int new_box_start_bit, new_box_end_bit; + + *to_write >>= *box_end_bit; + *to_write = bit_reverse(*to_write, box_bit_width); + *to_write <<= *box_end_bit; + + new_box_end_bit = box_bit_width - *box_start_bit - 1; + new_box_start_bit = box_bit_width - *box_end_bit - 1; + *box_mask = GENMASK_ULL(new_box_start_bit, new_box_end_bit); + *box_start_bit = new_box_start_bit; + *box_end_bit = new_box_end_bit; +} + +/** + * packing - Convert numbers (currently u64) between a packed and an unpacked + * format. Unpacked means laid out in memory in the CPU's native + * understanding of integers, while packed means anything else that + * requires translation. + * + * @pbuf: Pointer to a buffer holding the packed value. + * @uval: Pointer to an u64 holding the unpacked value. + * @startbit: The index (in logical notation, compensated for quirks) where + * the packed value starts within pbuf. Must be larger than, or + * equal to, endbit. + * @endbit: The index (in logical notation, compensated for quirks) where + * the packed value ends within pbuf. Must be smaller than, or equal + * to, startbit. + * @op: If PACK, then uval will be treated as const pointer and copied (packed) + * into pbuf, between startbit and endbit. + * If UNPACK, then pbuf will be treated as const pointer and the logical + * value between startbit and endbit will be copied (unpacked) to uval. + * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and + * QUIRK_MSB_ON_THE_RIGHT. + * + * Return: 0 on success, EINVAL or ERANGE if called incorrectly. Assuming + * correct usage, return code may be discarded. + * If op is PACK, pbuf is modified. + * If op is UNPACK, uval is modified. + */ +int packing(void *pbuf, u64 *uval, int startbit, int endbit, size_t pbuflen, + enum packing_op op, u8 quirks) +{ + /* Number of bits for storing "uval" + * also width of the field to access in the pbuf + */ + u64 value_width; + /* Logical byte indices corresponding to the + * start and end of the field. + */ + int plogical_first_u8, plogical_last_u8, box; + + /* startbit is expected to be larger than endbit */ + if (startbit < endbit) + /* Invalid function call */ + return -EINVAL; + + value_width = startbit - endbit + 1; + if (value_width > 64) + return -ERANGE; + + /* Check if "uval" fits in "value_width" bits. + * If value_width is 64, the check will fail, but any + * 64-bit uval will surely fit. + */ + if (op == PACK && value_width < 64 && (*uval >= (1ull << value_width))) + /* Cannot store "uval" inside "value_width" bits. + * Truncating "uval" is most certainly not desirable, + * so simply erroring out is appropriate. + */ + return -ERANGE; + + /* Initialize parameter */ + if (op == UNPACK) + *uval = 0; + + /* Iterate through an idealistic view of the pbuf as an u64 with + * no quirks, u8 by u8 (aligned at u8 boundaries), from high to low + * logical bit significance. "box" denotes the current logical u8. + */ + plogical_first_u8 = startbit / 8; + plogical_last_u8 = endbit / 8; + + for (box = plogical_first_u8; box >= plogical_last_u8; box--) { + /* Bit indices into the currently accessed 8-bit box */ + int box_start_bit, box_end_bit, box_addr; + u8 box_mask; + /* Corresponding bits from the unpacked u64 parameter */ + int proj_start_bit, proj_end_bit; + u64 proj_mask; + + /* This u8 may need to be accessed in its entirety + * (from bit 7 to bit 0), or not, depending on the + * input arguments startbit and endbit. + */ + if (box == plogical_first_u8) + box_start_bit = startbit % 8; + else + box_start_bit = 7; + if (box == plogical_last_u8) + box_end_bit = endbit % 8; + else + box_end_bit = 0; + + /* We have determined the box bit start and end. + * Now we calculate where this (masked) u8 box would fit + * in the unpacked (CPU-readable) u64 - the u8 box's + * projection onto the unpacked u64. Though the + * box is u8, the projection is u64 because it may fall + * anywhere within the unpacked u64. + */ + proj_start_bit = ((box * 8) + box_start_bit) - endbit; + proj_end_bit = ((box * 8) + box_end_bit) - endbit; + proj_mask = GENMASK_ULL(proj_start_bit, proj_end_bit); + box_mask = GENMASK_ULL(box_start_bit, box_end_bit); + + /* Determine the offset of the u8 box inside the pbuf, + * adjusted for quirks. The adjusted box_addr will be used for + * effective addressing inside the pbuf (so it's not + * logical any longer). + */ + box_addr = pbuflen - box - 1; + if (quirks & QUIRK_LITTLE_ENDIAN) + box_addr = get_le_offset(box_addr); + if (quirks & QUIRK_LSW32_IS_FIRST) + box_addr = get_reverse_lsw32_offset(box_addr, + pbuflen); + + if (op == UNPACK) { + u64 pval; + + /* Read from pbuf, write to uval */ + pval = ((u8 *)pbuf)[box_addr] & box_mask; + if (quirks & QUIRK_MSB_ON_THE_RIGHT) + adjust_for_msb_right_quirk(&pval, + &box_start_bit, + &box_end_bit, + &box_mask); + + pval >>= box_end_bit; + pval <<= proj_end_bit; + *uval &= ~proj_mask; + *uval |= pval; + } else { + u64 pval; + + /* Write to pbuf, read from uval */ + pval = (*uval) & proj_mask; + pval >>= proj_end_bit; + if (quirks & QUIRK_MSB_ON_THE_RIGHT) + adjust_for_msb_right_quirk(&pval, + &box_start_bit, + &box_end_bit, + &box_mask); + + pval <<= box_end_bit; + ((u8 *)pbuf)[box_addr] &= ~box_mask; + ((u8 *)pbuf)[box_addr] |= pval; + } + } + return 0; +} +EXPORT_SYMBOL(packing); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Generic bitfield packing and unpacking"); diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c index 9877682e49c7..da54318d3b55 100644 --- a/lib/percpu-refcount.c +++ b/lib/percpu-refcount.c @@ -151,7 +151,7 @@ static void percpu_ref_switch_to_atomic_rcu(struct rcu_head *rcu) atomic_long_add((long)count - PERCPU_COUNT_BIAS, &ref->count); WARN_ONCE(atomic_long_read(&ref->count) <= 0, - "percpu ref (%pf) <= 0 (%ld) after switching to atomic", + "percpu ref (%ps) <= 0 (%ld) after switching to atomic", ref->release, atomic_long_read(&ref->count)); /* @ref is viewed as dead on all CPUs, send out switch confirmation */ @@ -333,7 +333,7 @@ void percpu_ref_kill_and_confirm(struct percpu_ref *ref, spin_lock_irqsave(&percpu_ref_switch_lock, flags); WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_DEAD, - "%s called more than once on %pf!", __func__, ref->release); + "%s called more than once on %ps!", __func__, ref->release); ref->percpu_count_ptr |= __PERCPU_REF_DEAD; __percpu_ref_switch_mode(ref, confirm_kill); diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile index 4e90d443d1b0..e723eacf7868 100644 --- a/lib/raid6/Makefile +++ b/lib/raid6/Makefile @@ -39,7 +39,7 @@ endif ifeq ($(CONFIG_KERNEL_MODE_NEON),y) NEON_FLAGS := -ffreestanding ifeq ($(ARCH),arm) -NEON_FLAGS += -mfloat-abi=softfp -mfpu=neon +NEON_FLAGS += -march=armv7-a -mfloat-abi=softfp -mfpu=neon endif CFLAGS_recov_neon_inner.o += $(NEON_FLAGS) ifeq ($(ARCH),arm64) diff --git a/lib/raid6/neon.uc b/lib/raid6/neon.uc index d5242f544551..b7c68030da4f 100644 --- a/lib/raid6/neon.uc +++ b/lib/raid6/neon.uc @@ -28,7 +28,6 @@ typedef uint8x16_t unative_t; -#define NBYTES(x) ((unative_t){x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x}) #define NSIZE sizeof(unative_t) /* @@ -61,7 +60,7 @@ void raid6_neon$#_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs) int d, z, z0; register unative_t wd$$, wq$$, wp$$, w1$$, w2$$; - const unative_t x1d = NBYTES(0x1d); + const unative_t x1d = vdupq_n_u8(0x1d); z0 = disks - 3; /* Highest data disk */ p = dptr[z0+1]; /* XOR parity */ @@ -92,7 +91,7 @@ void raid6_neon$#_xor_syndrome_real(int disks, int start, int stop, int d, z, z0; register unative_t wd$$, wq$$, wp$$, w1$$, w2$$; - const unative_t x1d = NBYTES(0x1d); + const unative_t x1d = vdupq_n_u8(0x1d); z0 = stop; /* P/Q right side optimization */ p = dptr[disks-2]; /* XOR parity */ diff --git a/lib/raid6/recov_neon_inner.c b/lib/raid6/recov_neon_inner.c index 8cd20c9f834a..f13c07f82297 100644 --- a/lib/raid6/recov_neon_inner.c +++ b/lib/raid6/recov_neon_inner.c @@ -10,11 +10,6 @@ #include <arm_neon.h> -static const uint8x16_t x0f = { - 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, - 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, -}; - #ifdef CONFIG_ARM /* * AArch32 does not provide this intrinsic natively because it does not @@ -41,6 +36,7 @@ void __raid6_2data_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dp, uint8x16_t pm1 = vld1q_u8(pbmul + 16); uint8x16_t qm0 = vld1q_u8(qmul); uint8x16_t qm1 = vld1q_u8(qmul + 16); + uint8x16_t x0f = vdupq_n_u8(0x0f); /* * while ( bytes-- ) { @@ -60,14 +56,14 @@ void __raid6_2data_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dp, px = veorq_u8(vld1q_u8(p), vld1q_u8(dp)); vx = veorq_u8(vld1q_u8(q), vld1q_u8(dq)); - vy = (uint8x16_t)vshrq_n_s16((int16x8_t)vx, 4); + vy = vshrq_n_u8(vx, 4); vx = vqtbl1q_u8(qm0, vandq_u8(vx, x0f)); - vy = vqtbl1q_u8(qm1, vandq_u8(vy, x0f)); + vy = vqtbl1q_u8(qm1, vy); qx = veorq_u8(vx, vy); - vy = (uint8x16_t)vshrq_n_s16((int16x8_t)px, 4); + vy = vshrq_n_u8(px, 4); vx = vqtbl1q_u8(pm0, vandq_u8(px, x0f)); - vy = vqtbl1q_u8(pm1, vandq_u8(vy, x0f)); + vy = vqtbl1q_u8(pm1, vy); vx = veorq_u8(vx, vy); db = veorq_u8(vx, qx); @@ -87,6 +83,7 @@ void __raid6_datap_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dq, { uint8x16_t qm0 = vld1q_u8(qmul); uint8x16_t qm1 = vld1q_u8(qmul + 16); + uint8x16_t x0f = vdupq_n_u8(0x0f); /* * while (bytes--) { @@ -100,9 +97,9 @@ void __raid6_datap_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dq, vx = veorq_u8(vld1q_u8(q), vld1q_u8(dq)); - vy = (uint8x16_t)vshrq_n_s16((int16x8_t)vx, 4); + vy = vshrq_n_u8(vx, 4); vx = vqtbl1q_u8(qm0, vandq_u8(vx, x0f)); - vy = vqtbl1q_u8(qm1, vandq_u8(vy, x0f)); + vy = vqtbl1q_u8(qm1, vy); vx = veorq_u8(vx, vy); vy = veorq_u8(vx, vld1q_u8(p)); diff --git a/lib/refcount.c b/lib/refcount.c index ebcf8cd49e05..6e904af0fb3e 100644 --- a/lib/refcount.c +++ b/lib/refcount.c @@ -33,6 +33,9 @@ * Note that the allocator is responsible for ordering things between free() * and alloc(). * + * The decrements dec_and_test() and sub_and_test() also provide acquire + * ordering on success. + * */ #include <linux/mutex.h> @@ -164,8 +167,8 @@ EXPORT_SYMBOL(refcount_inc_checked); * at UINT_MAX. * * Provides release memory ordering, such that prior loads and stores are done - * before, and provides a control dependency such that free() must come after. - * See the comment on top. + * before, and provides an acquire ordering on success such that free() + * must come after. * * Use of this function is not recommended for the normal reference counting * use case in which references are taken and released one at a time. In these @@ -190,7 +193,12 @@ bool refcount_sub_and_test_checked(unsigned int i, refcount_t *r) } while (!atomic_try_cmpxchg_release(&r->refs, &val, new)); - return !new; + if (!new) { + smp_acquire__after_ctrl_dep(); + return true; + } + return false; + } EXPORT_SYMBOL(refcount_sub_and_test_checked); @@ -202,8 +210,8 @@ EXPORT_SYMBOL(refcount_sub_and_test_checked); * decrement when saturated at UINT_MAX. * * Provides release memory ordering, such that prior loads and stores are done - * before, and provides a control dependency such that free() must come after. - * See the comment on top. + * before, and provides an acquire ordering on success such that free() + * must come after. * * Return: true if the resulting refcount is 0, false otherwise */ diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 852ffa5160f1..6529fe1b45c1 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -31,11 +31,10 @@ #define HASH_DEFAULT_SIZE 64UL #define HASH_MIN_SIZE 4U -#define BUCKET_LOCKS_PER_CPU 32UL union nested_table { union nested_table __rcu *table; - struct rhash_head __rcu *bucket; + struct rhash_lock_head __rcu *bucket; }; static u32 head_hashfn(struct rhashtable *ht, @@ -56,9 +55,11 @@ EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held); int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash) { - spinlock_t *lock = rht_bucket_lock(tbl, hash); - - return (debug_locks) ? lockdep_is_held(lock) : 1; + if (!debug_locks) + return 1; + if (unlikely(tbl->nest)) + return 1; + return bit_spin_is_locked(0, (unsigned long *)&tbl->buckets[hash]); } EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held); #else @@ -104,7 +105,6 @@ static void bucket_table_free(const struct bucket_table *tbl) if (tbl->nest) nested_bucket_table_free(tbl); - free_bucket_spinlocks(tbl->locks); kvfree(tbl); } @@ -131,9 +131,11 @@ static union nested_table *nested_table_alloc(struct rhashtable *ht, INIT_RHT_NULLS_HEAD(ntbl[i].bucket); } - rcu_assign_pointer(*prev, ntbl); - - return ntbl; + if (cmpxchg(prev, NULL, ntbl) == NULL) + return ntbl; + /* Raced with another thread. */ + kfree(ntbl); + return rcu_dereference(*prev); } static struct bucket_table *nested_bucket_table_alloc(struct rhashtable *ht, @@ -169,11 +171,11 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, gfp_t gfp) { struct bucket_table *tbl = NULL; - size_t size, max_locks; + size_t size; int i; + static struct lock_class_key __key; - size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]); - tbl = kvzalloc(size, gfp); + tbl = kvzalloc(struct_size(tbl, buckets, nbuckets), gfp); size = nbuckets; @@ -185,18 +187,11 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, if (tbl == NULL) return NULL; - tbl->size = size; + lockdep_init_map(&tbl->dep_map, "rhashtable_bucket", &__key, 0); - max_locks = size >> 1; - if (tbl->nest) - max_locks = min_t(size_t, max_locks, 1U << tbl->nest); - - if (alloc_bucket_spinlocks(&tbl->locks, &tbl->locks_mask, max_locks, - ht->p.locks_mul, gfp) < 0) { - bucket_table_free(tbl); - return NULL; - } + tbl->size = size; + rcu_head_init(&tbl->rcu); INIT_LIST_HEAD(&tbl->walkers); tbl->hash_rnd = get_random_u32(); @@ -220,14 +215,15 @@ static struct bucket_table *rhashtable_last_table(struct rhashtable *ht, return new_tbl; } -static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash) +static int rhashtable_rehash_one(struct rhashtable *ht, + struct rhash_lock_head __rcu **bkt, + unsigned int old_hash) { struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); struct bucket_table *new_tbl = rhashtable_last_table(ht, old_tbl); - struct rhash_head __rcu **pprev = rht_bucket_var(old_tbl, old_hash); int err = -EAGAIN; struct rhash_head *head, *next, *entry; - spinlock_t *new_bucket_lock; + struct rhash_head __rcu **pprev = NULL; unsigned int new_hash; if (new_tbl->nest) @@ -235,7 +231,8 @@ static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash) err = -ENOENT; - rht_for_each(entry, old_tbl, old_hash) { + rht_for_each_from(entry, rht_ptr(bkt, old_tbl, old_hash), + old_tbl, old_hash) { err = 0; next = rht_dereference_bucket(entry->next, old_tbl, old_hash); @@ -250,18 +247,19 @@ static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash) new_hash = head_hashfn(ht, new_tbl, entry); - new_bucket_lock = rht_bucket_lock(new_tbl, new_hash); + rht_lock_nested(new_tbl, &new_tbl->buckets[new_hash], SINGLE_DEPTH_NESTING); - spin_lock_nested(new_bucket_lock, SINGLE_DEPTH_NESTING); - head = rht_dereference_bucket(new_tbl->buckets[new_hash], - new_tbl, new_hash); + head = rht_ptr(new_tbl->buckets + new_hash, new_tbl, new_hash); RCU_INIT_POINTER(entry->next, head); - rcu_assign_pointer(new_tbl->buckets[new_hash], entry); - spin_unlock(new_bucket_lock); + rht_assign_unlock(new_tbl, &new_tbl->buckets[new_hash], entry); - rcu_assign_pointer(*pprev, next); + if (pprev) + rcu_assign_pointer(*pprev, next); + else + /* Need to preserved the bit lock. */ + rht_assign_locked(bkt, next); out: return err; @@ -271,20 +269,19 @@ static int rhashtable_rehash_chain(struct rhashtable *ht, unsigned int old_hash) { struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); - spinlock_t *old_bucket_lock; + struct rhash_lock_head __rcu **bkt = rht_bucket_var(old_tbl, old_hash); int err; - old_bucket_lock = rht_bucket_lock(old_tbl, old_hash); + if (!bkt) + return 0; + rht_lock(old_tbl, bkt); - spin_lock_bh(old_bucket_lock); - while (!(err = rhashtable_rehash_one(ht, old_hash))) + while (!(err = rhashtable_rehash_one(ht, bkt, old_hash))) ; - if (err == -ENOENT) { - old_tbl->rehash++; + if (err == -ENOENT) err = 0; - } - spin_unlock_bh(old_bucket_lock); + rht_unlock(old_tbl, bkt); return err; } @@ -330,13 +327,16 @@ static int rhashtable_rehash_table(struct rhashtable *ht) spin_lock(&ht->lock); list_for_each_entry(walker, &old_tbl->walkers, list) walker->tbl = NULL; - spin_unlock(&ht->lock); /* Wait for readers. All new readers will see the new * table, and thus no references to the old table will * remain. + * We do this inside the locked region so that + * rhashtable_walk_stop() can use rcu_head_after_call_rcu() + * to check if it should not re-link the table. */ call_rcu(&old_tbl->rcu, bucket_table_free_rcu); + spin_unlock(&ht->lock); return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0; } @@ -416,8 +416,12 @@ static void rht_deferred_worker(struct work_struct *work) else if (tbl->nest) err = rhashtable_rehash_alloc(ht, tbl, tbl->size); - if (!err) - err = rhashtable_rehash_table(ht); + if (!err || err == -EEXIST) { + int nerr; + + nerr = rhashtable_rehash_table(ht); + err = err ?: nerr; + } mutex_unlock(&ht->mutex); @@ -474,6 +478,7 @@ fail: } static void *rhashtable_lookup_one(struct rhashtable *ht, + struct rhash_lock_head __rcu **bkt, struct bucket_table *tbl, unsigned int hash, const void *key, struct rhash_head *obj) { @@ -481,13 +486,12 @@ static void *rhashtable_lookup_one(struct rhashtable *ht, .ht = ht, .key = key, }; - struct rhash_head __rcu **pprev; + struct rhash_head __rcu **pprev = NULL; struct rhash_head *head; int elasticity; elasticity = RHT_ELASTICITY; - pprev = rht_bucket_var(tbl, hash); - rht_for_each_continue(head, *pprev, tbl, hash) { + rht_for_each_from(head, rht_ptr(bkt, tbl, hash), tbl, hash) { struct rhlist_head *list; struct rhlist_head *plist; @@ -509,7 +513,11 @@ static void *rhashtable_lookup_one(struct rhashtable *ht, RCU_INIT_POINTER(list->next, plist); head = rht_dereference_bucket(head->next, tbl, hash); RCU_INIT_POINTER(list->rhead.next, head); - rcu_assign_pointer(*pprev, obj); + if (pprev) + rcu_assign_pointer(*pprev, obj); + else + /* Need to preserve the bit lock */ + rht_assign_locked(bkt, obj); return NULL; } @@ -521,12 +529,12 @@ static void *rhashtable_lookup_one(struct rhashtable *ht, } static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht, + struct rhash_lock_head __rcu **bkt, struct bucket_table *tbl, unsigned int hash, struct rhash_head *obj, void *data) { - struct rhash_head __rcu **pprev; struct bucket_table *new_tbl; struct rhash_head *head; @@ -549,11 +557,7 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht, if (unlikely(rht_grow_above_100(ht, tbl))) return ERR_PTR(-EAGAIN); - pprev = rht_bucket_insert(ht, tbl, hash); - if (!pprev) - return ERR_PTR(-ENOMEM); - - head = rht_dereference_bucket(*pprev, tbl, hash); + head = rht_ptr(bkt, tbl, hash); RCU_INIT_POINTER(obj->next, head); if (ht->rhlist) { @@ -563,7 +567,10 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht, RCU_INIT_POINTER(list->next, NULL); } - rcu_assign_pointer(*pprev, obj); + /* bkt is always the head of the list, so it holds + * the lock, which we need to preserve + */ + rht_assign_locked(bkt, obj); atomic_inc(&ht->nelems); if (rht_grow_above_75(ht, tbl)) @@ -577,47 +584,35 @@ static void *rhashtable_try_insert(struct rhashtable *ht, const void *key, { struct bucket_table *new_tbl; struct bucket_table *tbl; + struct rhash_lock_head __rcu **bkt; unsigned int hash; - spinlock_t *lock; void *data; - tbl = rcu_dereference(ht->tbl); - - /* All insertions must grab the oldest table containing - * the hashed bucket that is yet to be rehashed. - */ - for (;;) { - hash = rht_head_hashfn(ht, tbl, obj, ht->p); - lock = rht_bucket_lock(tbl, hash); - spin_lock_bh(lock); - - if (tbl->rehash <= hash) - break; - - spin_unlock_bh(lock); - tbl = rht_dereference_rcu(tbl->future_tbl, ht); - } - - data = rhashtable_lookup_one(ht, tbl, hash, key, obj); - new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data); - if (PTR_ERR(new_tbl) != -EEXIST) - data = ERR_CAST(new_tbl); + new_tbl = rcu_dereference(ht->tbl); - while (!IS_ERR_OR_NULL(new_tbl)) { + do { tbl = new_tbl; hash = rht_head_hashfn(ht, tbl, obj, ht->p); - spin_lock_nested(rht_bucket_lock(tbl, hash), - SINGLE_DEPTH_NESTING); - - data = rhashtable_lookup_one(ht, tbl, hash, key, obj); - new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data); - if (PTR_ERR(new_tbl) != -EEXIST) - data = ERR_CAST(new_tbl); - - spin_unlock(rht_bucket_lock(tbl, hash)); - } - - spin_unlock_bh(lock); + if (rcu_access_pointer(tbl->future_tbl)) + /* Failure is OK */ + bkt = rht_bucket_var(tbl, hash); + else + bkt = rht_bucket_insert(ht, tbl, hash); + if (bkt == NULL) { + new_tbl = rht_dereference_rcu(tbl->future_tbl, ht); + data = ERR_PTR(-EAGAIN); + } else { + rht_lock(tbl, bkt); + data = rhashtable_lookup_one(ht, bkt, tbl, + hash, key, obj); + new_tbl = rhashtable_insert_one(ht, bkt, tbl, + hash, obj, data); + if (PTR_ERR(new_tbl) != -EEXIST) + data = ERR_CAST(new_tbl); + + rht_unlock(tbl, bkt); + } + } while (!IS_ERR_OR_NULL(new_tbl)); if (PTR_ERR(data) == -EAGAIN) data = ERR_PTR(rhashtable_insert_rehash(ht, tbl) ?: @@ -682,7 +677,7 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_enter); * rhashtable_walk_exit - Free an iterator * @iter: Hash table Iterator * - * This function frees resources allocated by rhashtable_walk_init. + * This function frees resources allocated by rhashtable_walk_enter. */ void rhashtable_walk_exit(struct rhashtable_iter *iter) { @@ -939,10 +934,11 @@ void rhashtable_walk_stop(struct rhashtable_iter *iter) ht = iter->ht; spin_lock(&ht->lock); - if (tbl->rehash < tbl->size) - list_add(&iter->walker.list, &tbl->walkers); - else + if (rcu_head_after_call_rcu(&tbl->rcu, bucket_table_free_rcu)) + /* This bucket table is being freed, don't re-link it. */ iter->walker.tbl = NULL; + else + list_add(&iter->walker.list, &tbl->walkers); spin_unlock(&ht->lock); out: @@ -1042,11 +1038,6 @@ int rhashtable_init(struct rhashtable *ht, size = rounded_hashtable_size(&ht->p); - if (params->locks_mul) - ht->p.locks_mul = roundup_pow_of_two(params->locks_mul); - else - ht->p.locks_mul = BUCKET_LOCKS_PER_CPU; - ht->key_len = ht->p.key_len; if (!params->hashfn) { ht->p.hashfn = jhash; @@ -1148,7 +1139,7 @@ restart: struct rhash_head *pos, *next; cond_resched(); - for (pos = rht_dereference(*rht_bucket(tbl, i), ht), + for (pos = rht_ptr_exclusive(rht_bucket(tbl, i)), next = !rht_is_a_nulls(pos) ? rht_dereference(pos->next, ht) : NULL; !rht_is_a_nulls(pos); @@ -1175,11 +1166,10 @@ void rhashtable_destroy(struct rhashtable *ht) } EXPORT_SYMBOL_GPL(rhashtable_destroy); -struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl, - unsigned int hash) +struct rhash_lock_head __rcu **__rht_bucket_nested(const struct bucket_table *tbl, + unsigned int hash) { const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); - static struct rhash_head __rcu *rhnull; unsigned int index = hash & ((1 << tbl->nest) - 1); unsigned int size = tbl->size >> tbl->nest; unsigned int subhash = hash; @@ -1197,20 +1187,28 @@ struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl, subhash >>= shift; } - if (!ntbl) { - if (!rhnull) - INIT_RHT_NULLS_HEAD(rhnull); - return &rhnull; - } + if (!ntbl) + return NULL; return &ntbl[subhash].bucket; } +EXPORT_SYMBOL_GPL(__rht_bucket_nested); + +struct rhash_lock_head __rcu **rht_bucket_nested(const struct bucket_table *tbl, + unsigned int hash) +{ + static struct rhash_lock_head __rcu *rhnull; + + if (!rhnull) + INIT_RHT_NULLS_HEAD(rhnull); + return __rht_bucket_nested(tbl, hash) ?: &rhnull; +} EXPORT_SYMBOL_GPL(rht_bucket_nested); -struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht, - struct bucket_table *tbl, - unsigned int hash) +struct rhash_lock_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht, + struct bucket_table *tbl, + unsigned int hash) { const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); unsigned int index = hash & ((1 << tbl->nest) - 1); diff --git a/lib/sbitmap.c b/lib/sbitmap.c index 5b382c1244ed..155fe38756ec 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -591,6 +591,17 @@ EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up); void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, unsigned int cpu) { + /* + * Once the clear bit is set, the bit may be allocated out. + * + * Orders READ/WRITE on the asssociated instance(such as request + * of blk_mq) by this bit for avoiding race with re-allocation, + * and its pair is the memory barrier implied in __sbitmap_get_word. + * + * One invariant is that the clear bit has to be zero when the bit + * is in use. + */ + smp_mb__before_atomic(); sbitmap_deferred_clear_bit(&sbq->sb, nr); /* diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 9ba349e775ef..739dc9fe2c55 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -625,6 +625,32 @@ bool __sg_page_iter_next(struct sg_page_iter *piter) } EXPORT_SYMBOL(__sg_page_iter_next); +static int sg_dma_page_count(struct scatterlist *sg) +{ + return PAGE_ALIGN(sg->offset + sg_dma_len(sg)) >> PAGE_SHIFT; +} + +bool __sg_page_iter_dma_next(struct sg_dma_page_iter *dma_iter) +{ + struct sg_page_iter *piter = &dma_iter->base; + + if (!piter->__nents || !piter->sg) + return false; + + piter->sg_pgoffset += piter->__pg_advance; + piter->__pg_advance = 1; + + while (piter->sg_pgoffset >= sg_dma_page_count(piter->sg)) { + piter->sg_pgoffset -= sg_dma_page_count(piter->sg); + piter->sg = sg_next(piter->sg); + if (!--piter->__nents || !piter->sg) + return false; + } + + return true; +} +EXPORT_SYMBOL(__sg_page_iter_dma_next); + /** * sg_miter_start - start mapping iteration over a sg list * @miter: sg mapping iter to be started diff --git a/lib/siphash.c b/lib/siphash.c index 3ae58b4edad6..c47bb6ff2149 100644 --- a/lib/siphash.c +++ b/lib/siphash.c @@ -68,11 +68,11 @@ u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key) bytemask_from_count(left))); #else switch (left) { - case 7: b |= ((u64)end[6]) << 48; - case 6: b |= ((u64)end[5]) << 40; - case 5: b |= ((u64)end[4]) << 32; + case 7: b |= ((u64)end[6]) << 48; /* fall through */ + case 6: b |= ((u64)end[5]) << 40; /* fall through */ + case 5: b |= ((u64)end[4]) << 32; /* fall through */ case 4: b |= le32_to_cpup(data); break; - case 3: b |= ((u64)end[2]) << 16; + case 3: b |= ((u64)end[2]) << 16; /* fall through */ case 2: b |= le16_to_cpup(data); break; case 1: b |= end[0]; } @@ -101,11 +101,11 @@ u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key) bytemask_from_count(left))); #else switch (left) { - case 7: b |= ((u64)end[6]) << 48; - case 6: b |= ((u64)end[5]) << 40; - case 5: b |= ((u64)end[4]) << 32; + case 7: b |= ((u64)end[6]) << 48; /* fall through */ + case 6: b |= ((u64)end[5]) << 40; /* fall through */ + case 5: b |= ((u64)end[4]) << 32; /* fall through */ case 4: b |= get_unaligned_le32(end); break; - case 3: b |= ((u64)end[2]) << 16; + case 3: b |= ((u64)end[2]) << 16; /* fall through */ case 2: b |= get_unaligned_le16(end); break; case 1: b |= end[0]; } @@ -268,11 +268,11 @@ u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key) bytemask_from_count(left))); #else switch (left) { - case 7: b |= ((u64)end[6]) << 48; - case 6: b |= ((u64)end[5]) << 40; - case 5: b |= ((u64)end[4]) << 32; + case 7: b |= ((u64)end[6]) << 48; /* fall through */ + case 6: b |= ((u64)end[5]) << 40; /* fall through */ + case 5: b |= ((u64)end[4]) << 32; /* fall through */ case 4: b |= le32_to_cpup(data); break; - case 3: b |= ((u64)end[2]) << 16; + case 3: b |= ((u64)end[2]) << 16; /* fall through */ case 2: b |= le16_to_cpup(data); break; case 1: b |= end[0]; } @@ -301,11 +301,11 @@ u32 __hsiphash_unaligned(const void *data, size_t len, bytemask_from_count(left))); #else switch (left) { - case 7: b |= ((u64)end[6]) << 48; - case 6: b |= ((u64)end[5]) << 40; - case 5: b |= ((u64)end[4]) << 32; + case 7: b |= ((u64)end[6]) << 48; /* fall through */ + case 6: b |= ((u64)end[5]) << 40; /* fall through */ + case 5: b |= ((u64)end[4]) << 32; /* fall through */ case 4: b |= get_unaligned_le32(end); break; - case 3: b |= ((u64)end[2]) << 16; + case 3: b |= ((u64)end[2]) << 16; /* fall through */ case 2: b |= get_unaligned_le16(end); break; case 1: b |= end[0]; } @@ -431,7 +431,7 @@ u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key) v0 ^= m; } switch (left) { - case 3: b |= ((u32)end[2]) << 16; + case 3: b |= ((u32)end[2]) << 16; /* fall through */ case 2: b |= le16_to_cpup(data); break; case 1: b |= end[0]; } @@ -454,7 +454,7 @@ u32 __hsiphash_unaligned(const void *data, size_t len, v0 ^= m; } switch (left) { - case 3: b |= ((u32)end[2]) << 16; + case 3: b |= ((u32)end[2]) << 16; /* fall through */ case 2: b |= get_unaligned_le16(end); break; case 1: b |= end[0]; } diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c index 85925aaa4fff..157d9e31f6c2 100644 --- a/lib/smp_processor_id.c +++ b/lib/smp_processor_id.c @@ -5,10 +5,11 @@ * DEBUG_PREEMPT variant of smp_processor_id(). */ #include <linux/export.h> +#include <linux/kprobes.h> #include <linux/sched.h> -notrace static unsigned int check_preemption_disabled(const char *what1, - const char *what2) +notrace static nokprobe_inline +unsigned int check_preemption_disabled(const char *what1, const char *what2) { int this_cpu = raw_smp_processor_id(); @@ -56,9 +57,11 @@ notrace unsigned int debug_smp_processor_id(void) return check_preemption_disabled("smp_processor_id", ""); } EXPORT_SYMBOL(debug_smp_processor_id); +NOKPROBE_SYMBOL(debug_smp_processor_id); notrace void __this_cpu_preempt_check(const char *op) { check_preemption_disabled("__this_cpu_", op); } EXPORT_SYMBOL(__this_cpu_preempt_check); +NOKPROBE_SYMBOL(__this_cpu_preempt_check); diff --git a/lib/stackdepot.c b/lib/stackdepot.c index e513459a5601..605c61f65d94 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -194,40 +194,52 @@ static inline struct stack_record *find_stack(struct stack_record *bucket, return NULL; } -void depot_fetch_stack(depot_stack_handle_t handle, struct stack_trace *trace) +/** + * stack_depot_fetch - Fetch stack entries from a depot + * + * @handle: Stack depot handle which was returned from + * stack_depot_save(). + * @entries: Pointer to store the entries address + * + * Return: The number of trace entries for this depot. + */ +unsigned int stack_depot_fetch(depot_stack_handle_t handle, + unsigned long **entries) { union handle_parts parts = { .handle = handle }; void *slab = stack_slabs[parts.slabindex]; size_t offset = parts.offset << STACK_ALLOC_ALIGN; struct stack_record *stack = slab + offset; - trace->nr_entries = trace->max_entries = stack->size; - trace->entries = stack->entries; - trace->skip = 0; + *entries = stack->entries; + return stack->size; } -EXPORT_SYMBOL_GPL(depot_fetch_stack); +EXPORT_SYMBOL_GPL(stack_depot_fetch); /** - * depot_save_stack - save stack in a stack depot. - * @trace - the stacktrace to save. - * @alloc_flags - flags for allocating additional memory if required. + * stack_depot_save - Save a stack trace from an array + * + * @entries: Pointer to storage array + * @nr_entries: Size of the storage array + * @alloc_flags: Allocation gfp flags * - * Returns the handle of the stack struct stored in depot. + * Return: The handle of the stack struct stored in depot */ -depot_stack_handle_t depot_save_stack(struct stack_trace *trace, - gfp_t alloc_flags) +depot_stack_handle_t stack_depot_save(unsigned long *entries, + unsigned int nr_entries, + gfp_t alloc_flags) { - u32 hash; - depot_stack_handle_t retval = 0; struct stack_record *found = NULL, **bucket; - unsigned long flags; + depot_stack_handle_t retval = 0; struct page *page = NULL; void *prealloc = NULL; + unsigned long flags; + u32 hash; - if (unlikely(trace->nr_entries == 0)) + if (unlikely(nr_entries == 0)) goto fast_exit; - hash = hash_stack(trace->entries, trace->nr_entries); + hash = hash_stack(entries, nr_entries); bucket = &stack_table[hash & STACK_HASH_MASK]; /* @@ -235,8 +247,8 @@ depot_stack_handle_t depot_save_stack(struct stack_trace *trace, * The smp_load_acquire() here pairs with smp_store_release() to * |bucket| below. */ - found = find_stack(smp_load_acquire(bucket), trace->entries, - trace->nr_entries, hash); + found = find_stack(smp_load_acquire(bucket), entries, + nr_entries, hash); if (found) goto exit; @@ -264,10 +276,10 @@ depot_stack_handle_t depot_save_stack(struct stack_trace *trace, spin_lock_irqsave(&depot_lock, flags); - found = find_stack(*bucket, trace->entries, trace->nr_entries, hash); + found = find_stack(*bucket, entries, nr_entries, hash); if (!found) { struct stack_record *new = - depot_alloc_stack(trace->entries, trace->nr_entries, + depot_alloc_stack(entries, nr_entries, hash, &prealloc, alloc_flags); if (new) { new->next = *bucket; @@ -297,4 +309,4 @@ exit: fast_exit: return retval; } -EXPORT_SYMBOL_GPL(depot_save_stack); +EXPORT_SYMBOL_GPL(stack_depot_save); diff --git a/lib/string.c b/lib/string.c index 38e4ca08e757..6016eb3ac73d 100644 --- a/lib/string.c +++ b/lib/string.c @@ -159,11 +159,9 @@ EXPORT_SYMBOL(strlcpy); * @src: Where to copy the string from * @count: Size of destination buffer * - * Copy the string, or as much of it as fits, into the dest buffer. - * The routine returns the number of characters copied (not including - * the trailing NUL) or -E2BIG if the destination buffer wasn't big enough. - * The behavior is undefined if the string buffers overlap. - * The destination buffer is always NUL terminated, unless it's zero-sized. + * Copy the string, or as much of it as fits, into the dest buffer. The + * behavior is undefined if the string buffers overlap. The destination + * buffer is always NUL terminated, unless it's zero-sized. * * Preferred to strlcpy() since the API doesn't require reading memory * from the src string beyond the specified "count" bytes, and since @@ -173,8 +171,10 @@ EXPORT_SYMBOL(strlcpy); * * Preferred to strncpy() since it always returns a valid string, and * doesn't unnecessarily force the tail of the destination buffer to be - * zeroed. If the zeroing is desired, it's likely cleaner to use strscpy() - * with an overflow test, then just memset() the tail of the dest buffer. + * zeroed. If zeroing is desired please use strscpy_pad(). + * + * Return: The number of characters copied (not including the trailing + * %NUL) or -E2BIG if the destination buffer wasn't big enough. */ ssize_t strscpy(char *dest, const char *src, size_t count) { @@ -237,6 +237,39 @@ ssize_t strscpy(char *dest, const char *src, size_t count) EXPORT_SYMBOL(strscpy); #endif +/** + * strscpy_pad() - Copy a C-string into a sized buffer + * @dest: Where to copy the string to + * @src: Where to copy the string from + * @count: Size of destination buffer + * + * Copy the string, or as much of it as fits, into the dest buffer. The + * behavior is undefined if the string buffers overlap. The destination + * buffer is always %NUL terminated, unless it's zero-sized. + * + * If the source string is shorter than the destination buffer, zeros + * the tail of the destination buffer. + * + * For full explanation of why you may want to consider using the + * 'strscpy' functions please see the function docstring for strscpy(). + * + * Return: The number of characters copied (not including the trailing + * %NUL) or -E2BIG if the destination buffer wasn't big enough. + */ +ssize_t strscpy_pad(char *dest, const char *src, size_t count) +{ + ssize_t written; + + written = strscpy(dest, src, count); + if (written < 0 || written == count - 1) + return written; + + memset(dest + written + 1, 0, count - written - 1); + + return written; +} +EXPORT_SYMBOL(strscpy_pad); + #ifndef __HAVE_ARCH_STRCAT /** * strcat - Append one %NUL-terminated string to another @@ -866,6 +899,26 @@ __visible int memcmp(const void *cs, const void *ct, size_t count) EXPORT_SYMBOL(memcmp); #endif +#ifndef __HAVE_ARCH_BCMP +/** + * bcmp - returns 0 if and only if the buffers have identical contents. + * @a: pointer to first buffer. + * @b: pointer to second buffer. + * @len: size of buffers. + * + * The sign or magnitude of a non-zero return value has no particular + * meaning, and architectures may implement their own more efficient bcmp(). So + * while this particular implementation is a simple (tail) call to memcmp, do + * not rely on anything but whether the return value is zero or non-zero. + */ +#undef bcmp +int bcmp(const void *a, const void *b, size_t len) +{ + return memcmp(a, b, len); +} +EXPORT_SYMBOL(bcmp); +#endif + #ifndef __HAVE_ARCH_MEMSCAN /** * memscan - Find a character in an area of memory. diff --git a/lib/strncpy_from_user.c b/lib/strncpy_from_user.c index 58eacd41526c..023ba9f3b99f 100644 --- a/lib/strncpy_from_user.c +++ b/lib/strncpy_from_user.c @@ -23,10 +23,11 @@ * hit it), 'max' is the address space maximum (and we return * -EFAULT if we hit it). */ -static inline long do_strncpy_from_user(char *dst, const char __user *src, long count, unsigned long max) +static inline long do_strncpy_from_user(char *dst, const char __user *src, + unsigned long count, unsigned long max) { const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS; - long res = 0; + unsigned long res = 0; /* * Truncate 'max' to the user-specified limit, so that diff --git a/lib/strnlen_user.c b/lib/strnlen_user.c index 1c1a1b0e38a5..7f2db3fe311f 100644 --- a/lib/strnlen_user.c +++ b/lib/strnlen_user.c @@ -28,7 +28,7 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count, unsigned long max) { const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS; - long align, res = 0; + unsigned long align, res = 0; unsigned long c; /* @@ -42,7 +42,7 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count, * Do everything aligned. But that means that we * need to also expand the maximum.. */ - align = (sizeof(long) - 1) & (unsigned long)src; + align = (sizeof(unsigned long) - 1) & (unsigned long)src; src -= align; max += align; diff --git a/lib/syscall.c b/lib/syscall.c index 1a7077f20eae..fb328e7ccb08 100644 --- a/lib/syscall.c +++ b/lib/syscall.c @@ -5,16 +5,14 @@ #include <linux/export.h> #include <asm/syscall.h> -static int collect_syscall(struct task_struct *target, long *callno, - unsigned long args[6], unsigned int maxargs, - unsigned long *sp, unsigned long *pc) +static int collect_syscall(struct task_struct *target, struct syscall_info *info) { struct pt_regs *regs; if (!try_get_task_stack(target)) { /* Task has no stack, so the task isn't in a syscall. */ - *sp = *pc = 0; - *callno = -1; + memset(info, 0, sizeof(*info)); + info->data.nr = -1; return 0; } @@ -24,12 +22,13 @@ static int collect_syscall(struct task_struct *target, long *callno, return -EAGAIN; } - *sp = user_stack_pointer(regs); - *pc = instruction_pointer(regs); + info->sp = user_stack_pointer(regs); + info->data.instruction_pointer = instruction_pointer(regs); - *callno = syscall_get_nr(target, regs); - if (*callno != -1L && maxargs > 0) - syscall_get_arguments(target, regs, 0, maxargs, args); + info->data.nr = syscall_get_nr(target, regs); + if (info->data.nr != -1L) + syscall_get_arguments(target, regs, + (unsigned long *)&info->data.args[0]); put_task_stack(target); return 0; @@ -38,41 +37,35 @@ static int collect_syscall(struct task_struct *target, long *callno, /** * task_current_syscall - Discover what a blocked task is doing. * @target: thread to examine - * @callno: filled with system call number or -1 - * @args: filled with @maxargs system call arguments - * @maxargs: number of elements in @args to fill - * @sp: filled with user stack pointer - * @pc: filled with user PC + * @info: structure with the following fields: + * .sp - filled with user stack pointer + * .data.nr - filled with system call number or -1 + * .data.args - filled with @maxargs system call arguments + * .data.instruction_pointer - filled with user PC * - * If @target is blocked in a system call, returns zero with *@callno - * set to the the call's number and @args filled in with its arguments. - * Registers not used for system call arguments may not be available and - * it is not kosher to use &struct user_regset calls while the system + * If @target is blocked in a system call, returns zero with @info.data.nr + * set to the the call's number and @info.data.args filled in with its + * arguments. Registers not used for system call arguments may not be available + * and it is not kosher to use &struct user_regset calls while the system * call is still in progress. Note we may get this result if @target * has finished its system call but not yet returned to user mode, such * as when it's stopped for signal handling or syscall exit tracing. * * If @target is blocked in the kernel during a fault or exception, - * returns zero with *@callno set to -1 and does not fill in @args. - * If so, it's now safe to examine @target using &struct user_regset - * get() calls as long as we're sure @target won't return to user mode. + * returns zero with *@info.data.nr set to -1 and does not fill in + * @info.data.args. If so, it's now safe to examine @target using + * &struct user_regset get() calls as long as we're sure @target won't return + * to user mode. * * Returns -%EAGAIN if @target does not remain blocked. - * - * Returns -%EINVAL if @maxargs is too large (maximum is six). */ -int task_current_syscall(struct task_struct *target, long *callno, - unsigned long args[6], unsigned int maxargs, - unsigned long *sp, unsigned long *pc) +int task_current_syscall(struct task_struct *target, struct syscall_info *info) { long state; unsigned long ncsw; - if (unlikely(maxargs > 6)) - return -EINVAL; - if (target == current) - return collect_syscall(target, callno, args, maxargs, sp, pc); + return collect_syscall(target, info); state = target->state; if (unlikely(!state)) @@ -80,7 +73,7 @@ int task_current_syscall(struct task_struct *target, long *callno, ncsw = wait_task_inactive(target, state); if (unlikely(!ncsw) || - unlikely(collect_syscall(target, callno, args, maxargs, sp, pc)) || + unlikely(collect_syscall(target, info)) || unlikely(wait_task_inactive(target, state) != ncsw)) return -EAGAIN; diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c index 6cd7d0740005..792d90608052 100644 --- a/lib/test_bitmap.c +++ b/lib/test_bitmap.c @@ -12,6 +12,8 @@ #include <linux/slab.h> #include <linux/string.h> +#include "../tools/testing/selftests/kselftest_module.h" + static unsigned total_tests __initdata; static unsigned failed_tests __initdata; @@ -361,7 +363,7 @@ static void noinline __init test_mem_optimisations(void) } } -static int __init test_bitmap_init(void) +static void __init selftest(void) { test_zero_clear(); test_fill_set(); @@ -369,22 +371,8 @@ static int __init test_bitmap_init(void) test_bitmap_arr32(); test_bitmap_parselist(); test_mem_optimisations(); - - if (failed_tests == 0) - pr_info("all %u tests passed\n", total_tests); - else - pr_warn("failed %u out of %u tests\n", - failed_tests, total_tests); - - return failed_tests ? -EINVAL : 0; } -static void __exit test_bitmap_cleanup(void) -{ -} - -module_init(test_bitmap_init); -module_exit(test_bitmap_cleanup); - +KSTM_MODULE_LOADERS(test_bitmap); MODULE_AUTHOR("david decotigny <david.decotigny@googlers.com>"); MODULE_LICENSE("GPL"); diff --git a/lib/test_bpf.c b/lib/test_bpf.c index f3e570722a7e..0845f635f404 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -6668,12 +6668,14 @@ static int __run_one(const struct bpf_prog *fp, const void *data, u64 start, finish; int ret = 0, i; + preempt_disable(); start = ktime_get_ns(); for (i = 0; i < runs; i++) ret = BPF_PROG_RUN(fp, data); finish = ktime_get_ns(); + preempt_enable(); *duration = finish - start; do_div(*duration, runs); diff --git a/lib/test_firmware.c b/lib/test_firmware.c index 7cab9a9869ac..7222093ee00b 100644 --- a/lib/test_firmware.c +++ b/lib/test_firmware.c @@ -631,11 +631,6 @@ static ssize_t trigger_batched_requests_store(struct device *dev, for (i = 0; i < test_fw_config->num_requests; i++) { req = &test_fw_config->reqs[i]; - if (!req) { - WARN_ON(1); - rc = -ENOMEM; - goto out_bail; - } req->fw = NULL; req->idx = i; req->name = test_fw_config->name; @@ -737,10 +732,6 @@ ssize_t trigger_batched_requests_async_store(struct device *dev, for (i = 0; i < test_fw_config->num_requests; i++) { req = &test_fw_config->reqs[i]; - if (!req) { - WARN_ON(1); - goto out_bail; - } req->name = test_fw_config->name; req->fw = NULL; req->idx = i; diff --git a/lib/test_kasan.c b/lib/test_kasan.c index 51b78405bf24..7de2702621dc 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -480,29 +480,6 @@ static noinline void __init copy_user_test(void) kfree(kmem); } -static noinline void __init use_after_scope_test(void) -{ - volatile char *volatile p; - - pr_info("use-after-scope on int\n"); - { - int local = 0; - - p = (char *)&local; - } - p[0] = 1; - p[3] = 1; - - pr_info("use-after-scope on array\n"); - { - char local[1024] = {0}; - - p = local; - } - p[0] = 1; - p[1023] = 1; -} - static noinline void __init kasan_alloca_oob_left(void) { volatile int i = 10; @@ -682,7 +659,6 @@ static int __init kmalloc_tests_init(void) kasan_alloca_oob_right(); ksize_unpoisons_memory(); copy_user_test(); - use_after_scope_test(); kmem_cache_double_free(); kmem_cache_invalid_free(); kasan_memchr(); diff --git a/lib/test_objagg.c b/lib/test_objagg.c index ab57144bb0cd..72c1abfa154d 100644 --- a/lib/test_objagg.c +++ b/lib/test_objagg.c @@ -87,6 +87,15 @@ static void world_obj_put(struct world *world, struct objagg *objagg, #define MAX_KEY_ID_DIFF 5 +static bool delta_check(void *priv, const void *parent_obj, const void *obj) +{ + const struct tokey *parent_key = parent_obj; + const struct tokey *key = obj; + int diff = key->id - parent_key->id; + + return diff >= 0 && diff <= MAX_KEY_ID_DIFF; +} + static void *delta_create(void *priv, void *parent_obj, void *obj) { struct tokey *parent_key = parent_obj; @@ -95,7 +104,7 @@ static void *delta_create(void *priv, void *parent_obj, void *obj) int diff = key->id - parent_key->id; struct delta *delta; - if (diff < 0 || diff > MAX_KEY_ID_DIFF) + if (!delta_check(priv, parent_obj, obj)) return ERR_PTR(-EINVAL); delta = kzalloc(sizeof(*delta), GFP_KERNEL); @@ -115,7 +124,7 @@ static void delta_destroy(void *priv, void *delta_priv) kfree(delta); } -static void *root_create(void *priv, void *obj) +static void *root_create(void *priv, void *obj, unsigned int id) { struct world *world = priv; struct tokey *key = obj; @@ -268,6 +277,12 @@ stats_put: return err; } +static bool delta_check_dummy(void *priv, const void *parent_obj, + const void *obj) +{ + return false; +} + static void *delta_create_dummy(void *priv, void *parent_obj, void *obj) { return ERR_PTR(-EOPNOTSUPP); @@ -279,6 +294,7 @@ static void delta_destroy_dummy(void *priv, void *delta_priv) static const struct objagg_ops nodelta_ops = { .obj_size = sizeof(struct tokey), + .delta_check = delta_check_dummy, .delta_create = delta_create_dummy, .delta_destroy = delta_destroy_dummy, .root_create = root_create, @@ -292,7 +308,7 @@ static int test_nodelta(void) int i; int err; - objagg = objagg_create(&nodelta_ops, &world); + objagg = objagg_create(&nodelta_ops, NULL, &world); if (IS_ERR(objagg)) return PTR_ERR(objagg); @@ -357,6 +373,7 @@ err_stats_second_zero: static const struct objagg_ops delta_ops = { .obj_size = sizeof(struct tokey), + .delta_check = delta_check, .delta_create = delta_create, .delta_destroy = delta_destroy, .root_create = root_create, @@ -728,8 +745,10 @@ static int check_expect_stats(struct objagg *objagg, int err; stats = objagg_stats_get(objagg); - if (IS_ERR(stats)) + if (IS_ERR(stats)) { + *errmsg = "objagg_stats_get() failed."; return PTR_ERR(stats); + } err = __check_expect_stats(stats, expect_stats, errmsg); objagg_stats_put(stats); return err; @@ -769,7 +788,6 @@ static int test_delta_action_item(struct world *world, if (err) goto errout; - errmsg = NULL; err = check_expect_stats(objagg, &action_item->expect_stats, &errmsg); if (err) { pr_err("Key %u: Stats: %s\n", action_item->key_id, errmsg); @@ -793,7 +811,7 @@ static int test_delta(void) int i; int err; - objagg = objagg_create(&delta_ops, &world); + objagg = objagg_create(&delta_ops, NULL, &world); if (IS_ERR(objagg)) return PTR_ERR(objagg); @@ -815,6 +833,170 @@ err_do_action_item: return err; } +struct hints_case { + const unsigned int *key_ids; + size_t key_ids_count; + struct expect_stats expect_stats; + struct expect_stats expect_stats_hints; +}; + +static const unsigned int hints_case_key_ids[] = { + 1, 7, 3, 5, 3, 1, 30, 8, 8, 5, 6, 8, +}; + +static const struct hints_case hints_case = { + .key_ids = hints_case_key_ids, + .key_ids_count = ARRAY_SIZE(hints_case_key_ids), + .expect_stats = + EXPECT_STATS(7, ROOT(1, 2, 7), ROOT(7, 1, 4), ROOT(30, 1, 1), + DELTA(8, 3), DELTA(3, 2), + DELTA(5, 2), DELTA(6, 1)), + .expect_stats_hints = + EXPECT_STATS(7, ROOT(3, 2, 9), ROOT(1, 2, 2), ROOT(30, 1, 1), + DELTA(8, 3), DELTA(5, 2), + DELTA(6, 1), DELTA(7, 1)), +}; + +static void __pr_debug_stats(const struct objagg_stats *stats) +{ + int i; + + for (i = 0; i < stats->stats_info_count; i++) + pr_debug("Stat index %d key %u: u %d, d %d, %s\n", i, + obj_to_key_id(stats->stats_info[i].objagg_obj), + stats->stats_info[i].stats.user_count, + stats->stats_info[i].stats.delta_user_count, + stats->stats_info[i].is_root ? "root" : "noroot"); +} + +static void pr_debug_stats(struct objagg *objagg) +{ + const struct objagg_stats *stats; + + stats = objagg_stats_get(objagg); + if (IS_ERR(stats)) + return; + __pr_debug_stats(stats); + objagg_stats_put(stats); +} + +static void pr_debug_hints_stats(struct objagg_hints *objagg_hints) +{ + const struct objagg_stats *stats; + + stats = objagg_hints_stats_get(objagg_hints); + if (IS_ERR(stats)) + return; + __pr_debug_stats(stats); + objagg_stats_put(stats); +} + +static int check_expect_hints_stats(struct objagg_hints *objagg_hints, + const struct expect_stats *expect_stats, + const char **errmsg) +{ + const struct objagg_stats *stats; + int err; + + stats = objagg_hints_stats_get(objagg_hints); + if (IS_ERR(stats)) + return PTR_ERR(stats); + err = __check_expect_stats(stats, expect_stats, errmsg); + objagg_stats_put(stats); + return err; +} + +static int test_hints_case(const struct hints_case *hints_case) +{ + struct objagg_obj *objagg_obj; + struct objagg_hints *hints; + struct world world2 = {}; + struct world world = {}; + struct objagg *objagg2; + struct objagg *objagg; + const char *errmsg; + int i; + int err; + + objagg = objagg_create(&delta_ops, NULL, &world); + if (IS_ERR(objagg)) + return PTR_ERR(objagg); + + for (i = 0; i < hints_case->key_ids_count; i++) { + objagg_obj = world_obj_get(&world, objagg, + hints_case->key_ids[i]); + if (IS_ERR(objagg_obj)) { + err = PTR_ERR(objagg_obj); + goto err_world_obj_get; + } + } + + pr_debug_stats(objagg); + err = check_expect_stats(objagg, &hints_case->expect_stats, &errmsg); + if (err) { + pr_err("Stats: %s\n", errmsg); + goto err_check_expect_stats; + } + + hints = objagg_hints_get(objagg, OBJAGG_OPT_ALGO_SIMPLE_GREEDY); + if (IS_ERR(hints)) { + err = PTR_ERR(hints); + goto err_hints_get; + } + + pr_debug_hints_stats(hints); + err = check_expect_hints_stats(hints, &hints_case->expect_stats_hints, + &errmsg); + if (err) { + pr_err("Hints stats: %s\n", errmsg); + goto err_check_expect_hints_stats; + } + + objagg2 = objagg_create(&delta_ops, hints, &world2); + if (IS_ERR(objagg2)) + return PTR_ERR(objagg2); + + for (i = 0; i < hints_case->key_ids_count; i++) { + objagg_obj = world_obj_get(&world2, objagg2, + hints_case->key_ids[i]); + if (IS_ERR(objagg_obj)) { + err = PTR_ERR(objagg_obj); + goto err_world2_obj_get; + } + } + + pr_debug_stats(objagg2); + err = check_expect_stats(objagg2, &hints_case->expect_stats_hints, + &errmsg); + if (err) { + pr_err("Stats2: %s\n", errmsg); + goto err_check_expect_stats2; + } + + err = 0; + +err_check_expect_stats2: +err_world2_obj_get: + for (i--; i >= 0; i--) + world_obj_put(&world2, objagg, hints_case->key_ids[i]); + objagg_hints_put(hints); + objagg_destroy(objagg2); + i = hints_case->key_ids_count; +err_check_expect_hints_stats: +err_hints_get: +err_check_expect_stats: +err_world_obj_get: + for (i--; i >= 0; i--) + world_obj_put(&world, objagg, hints_case->key_ids[i]); + + objagg_destroy(objagg); + return err; +} +static int test_hints(void) +{ + return test_hints_case(&hints_case); +} + static int __init test_objagg_init(void) { int err; @@ -822,7 +1004,10 @@ static int __init test_objagg_init(void) err = test_nodelta(); if (err) return err; - return test_delta(); + err = test_delta(); + if (err) + return err; + return test_hints(); } static void __exit test_objagg_exit(void) diff --git a/lib/test_printf.c b/lib/test_printf.c index 659b6cc0d483..93da0a5000ec 100644 --- a/lib/test_printf.c +++ b/lib/test_printf.c @@ -21,6 +21,8 @@ #include <linux/gfp.h> #include <linux/mm.h> +#include "../tools/testing/selftests/kselftest_module.h" + #define BUF_SIZE 256 #define PAD_SIZE 16 #define FILL_CHAR '$' @@ -239,6 +241,7 @@ plain_format(void) #define PTR ((void *)0x456789ab) #define PTR_STR "456789ab" #define PTR_VAL_NO_CRNG "(ptrval)" +#define ZEROS "" static int __init plain_format(void) @@ -268,7 +271,6 @@ plain_hash_to_buffer(const void *p, char *buf, size_t len) return 0; } - static int __init plain_hash(void) { @@ -326,6 +328,24 @@ test_hashed(const char *fmt, const void *p) } static void __init +null_pointer(void) +{ + test_hashed("%p", NULL); + test(ZEROS "00000000", "%px", NULL); + test("(null)", "%pE", NULL); +} + +#define PTR_INVALID ((void *)0x000000ab) + +static void __init +invalid_pointer(void) +{ + test_hashed("%p", PTR_INVALID); + test(ZEROS "000000ab", "%px", PTR_INVALID); + test("(efault)", "%pE", PTR_INVALID); +} + +static void __init symbol_ptr(void) { } @@ -462,8 +482,7 @@ struct_rtc_time(void) .tm_year = 118, }; - test_hashed("%pt", &tm); - + test("(%ptR?)", "%pt", &tm); test("2018-11-26T05:35:43", "%ptR", &tm); test("0118-10-26T05:35:43", "%ptRr", &tm); test("05:35:43|2018-11-26", "%ptRt|%ptRd", &tm, &tm); @@ -481,14 +500,14 @@ static void __init large_bitmap(void) { const int nbits = 1 << 16; - unsigned long *bits = kcalloc(BITS_TO_LONGS(nbits), sizeof(long), GFP_KERNEL); + unsigned long *bits = bitmap_zalloc(nbits, GFP_KERNEL); if (!bits) return; bitmap_set(bits, 1, 20); bitmap_set(bits, 60000, 15); test("1-20,60000-60014", "%*pbl", nbits, bits); - kfree(bits); + bitmap_free(bits); } static void __init @@ -572,6 +591,8 @@ static void __init test_pointer(void) { plain(); + null_pointer(); + invalid_pointer(); symbol_ptr(); kernel_ptr(); struct_resource(); @@ -590,12 +611,11 @@ test_pointer(void) flags(); } -static int __init -test_printf_init(void) +static void __init selftest(void) { alloced_buffer = kmalloc(BUF_SIZE + 2*PAD_SIZE, GFP_KERNEL); if (!alloced_buffer) - return -ENOMEM; + return; test_buffer = alloced_buffer + PAD_SIZE; test_basic(); @@ -604,16 +624,8 @@ test_printf_init(void) test_pointer(); kfree(alloced_buffer); - - if (failed_tests == 0) - pr_info("all %u tests passed\n", total_tests); - else - pr_warn("failed %u out of %u tests\n", failed_tests, total_tests); - - return failed_tests ? -EINVAL : 0; } -module_init(test_printf_init); - +KSTM_MODULE_LOADERS(test_printf); MODULE_AUTHOR("Rasmus Villemoes <linux@rasmusvillemoes.dk>"); MODULE_LICENSE("GPL"); diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index e52f8cafe227..084fe5a6ac57 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -177,16 +177,11 @@ static int __init test_rht_lookup(struct rhashtable *ht, struct test_obj *array, static void test_bucket_stats(struct rhashtable *ht, unsigned int entries) { - unsigned int err, total = 0, chain_len = 0; + unsigned int total = 0, chain_len = 0; struct rhashtable_iter hti; struct rhash_head *pos; - err = rhashtable_walk_init(ht, &hti, GFP_KERNEL); - if (err) { - pr_warn("Test failed: allocation error"); - return; - } - + rhashtable_walk_enter(ht, &hti); rhashtable_walk_start(&hti); while ((pos = rhashtable_walk_next(&hti))) { @@ -395,7 +390,7 @@ static int __init test_rhltable(unsigned int entries) if (WARN(err, "cannot remove element at slot %d", i)) continue; } else { - if (WARN(err != -ENOENT, "removed non-existant element %d, error %d not %d", + if (WARN(err != -ENOENT, "removed non-existent element %d, error %d not %d", i, err, -ENOENT)) continue; } @@ -440,7 +435,7 @@ static int __init test_rhltable(unsigned int entries) if (WARN(err, "cannot remove element at slot %d", i)) continue; } else { - if (WARN(err != -ENOENT, "removed non-existant element, error %d not %d", + if (WARN(err != -ENOENT, "removed non-existent element, error %d not %d", err, -ENOENT)) continue; } @@ -505,7 +500,7 @@ static unsigned int __init print_ht(struct rhltable *rhlt) struct rhash_head *pos, *next; struct test_obj_rhl *p; - pos = rht_dereference(tbl->buckets[i], ht); + pos = rht_ptr_exclusive(tbl->buckets + i); next = !rht_is_a_nulls(pos) ? rht_dereference(pos->next, ht) : NULL; if (!rht_is_a_nulls(pos)) { diff --git a/lib/test_stackinit.c b/lib/test_stackinit.c new file mode 100644 index 000000000000..13115b6f2b88 --- /dev/null +++ b/lib/test_stackinit.c @@ -0,0 +1,378 @@ +// SPDX-Licenses: GPLv2 +/* + * Test cases for compiler-based stack variable zeroing via future + * compiler flags or CONFIG_GCC_PLUGIN_STRUCTLEAK*. + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/string.h> + +/* Exfiltration buffer. */ +#define MAX_VAR_SIZE 128 +static char check_buf[MAX_VAR_SIZE]; + +/* Character array to trigger stack protector in all functions. */ +#define VAR_BUFFER 32 + +/* Volatile mask to convince compiler to copy memory with 0xff. */ +static volatile u8 forced_mask = 0xff; + +/* Location and size tracking to validate fill and test are colocated. */ +static void *fill_start, *target_start; +static size_t fill_size, target_size; + +static bool range_contains(char *haystack_start, size_t haystack_size, + char *needle_start, size_t needle_size) +{ + if (needle_start >= haystack_start && + needle_start + needle_size <= haystack_start + haystack_size) + return true; + return false; +} + +#define DO_NOTHING_TYPE_SCALAR(var_type) var_type +#define DO_NOTHING_TYPE_STRING(var_type) void +#define DO_NOTHING_TYPE_STRUCT(var_type) void + +#define DO_NOTHING_RETURN_SCALAR(ptr) *(ptr) +#define DO_NOTHING_RETURN_STRING(ptr) /**/ +#define DO_NOTHING_RETURN_STRUCT(ptr) /**/ + +#define DO_NOTHING_CALL_SCALAR(var, name) \ + (var) = do_nothing_ ## name(&(var)) +#define DO_NOTHING_CALL_STRING(var, name) \ + do_nothing_ ## name(var) +#define DO_NOTHING_CALL_STRUCT(var, name) \ + do_nothing_ ## name(&(var)) + +#define FETCH_ARG_SCALAR(var) &var +#define FETCH_ARG_STRING(var) var +#define FETCH_ARG_STRUCT(var) &var + +#define FILL_SIZE_STRING 16 + +#define INIT_CLONE_SCALAR /**/ +#define INIT_CLONE_STRING [FILL_SIZE_STRING] +#define INIT_CLONE_STRUCT /**/ + +#define INIT_SCALAR_none /**/ +#define INIT_SCALAR_zero = 0 + +#define INIT_STRING_none [FILL_SIZE_STRING] /**/ +#define INIT_STRING_zero [FILL_SIZE_STRING] = { } + +#define INIT_STRUCT_none /**/ +#define INIT_STRUCT_zero = { } +#define INIT_STRUCT_static_partial = { .two = 0, } +#define INIT_STRUCT_static_all = { .one = arg->one, \ + .two = arg->two, \ + .three = arg->three, \ + .four = arg->four, \ + } +#define INIT_STRUCT_dynamic_partial = { .two = arg->two, } +#define INIT_STRUCT_dynamic_all = { .one = arg->one, \ + .two = arg->two, \ + .three = arg->three, \ + .four = arg->four, \ + } +#define INIT_STRUCT_runtime_partial ; \ + var.two = 0 +#define INIT_STRUCT_runtime_all ; \ + var.one = 0; \ + var.two = 0; \ + var.three = 0; \ + memset(&var.four, 0, \ + sizeof(var.four)) + +/* + * @name: unique string name for the test + * @var_type: type to be tested for zeroing initialization + * @which: is this a SCALAR, STRING, or STRUCT type? + * @init_level: what kind of initialization is performed + */ +#define DEFINE_TEST_DRIVER(name, var_type, which) \ +/* Returns 0 on success, 1 on failure. */ \ +static noinline __init int test_ ## name (void) \ +{ \ + var_type zero INIT_CLONE_ ## which; \ + int ignored; \ + u8 sum = 0, i; \ + \ + /* Notice when a new test is larger than expected. */ \ + BUILD_BUG_ON(sizeof(zero) > MAX_VAR_SIZE); \ + \ + /* Fill clone type with zero for per-field init. */ \ + memset(&zero, 0x00, sizeof(zero)); \ + /* Fill stack with 0xFF. */ \ + ignored = leaf_ ##name((unsigned long)&ignored, 1, \ + FETCH_ARG_ ## which(zero)); \ + /* Clear entire check buffer for later bit tests. */ \ + memset(check_buf, 0x00, sizeof(check_buf)); \ + /* Extract stack-defined variable contents. */ \ + ignored = leaf_ ##name((unsigned long)&ignored, 0, \ + FETCH_ARG_ ## which(zero)); \ + \ + /* Validate that compiler lined up fill and target. */ \ + if (!range_contains(fill_start, fill_size, \ + target_start, target_size)) { \ + pr_err(#name ": stack fill missed target!?\n"); \ + pr_err(#name ": fill %zu wide\n", fill_size); \ + pr_err(#name ": target offset by %d\n", \ + (int)((ssize_t)(uintptr_t)fill_start - \ + (ssize_t)(uintptr_t)target_start)); \ + return 1; \ + } \ + \ + /* Look for any set bits in the check region. */ \ + for (i = 0; i < sizeof(check_buf); i++) \ + sum += (check_buf[i] != 0); \ + \ + if (sum == 0) \ + pr_info(#name " ok\n"); \ + else \ + pr_warn(#name " FAIL (uninit bytes: %d)\n", \ + sum); \ + \ + return (sum != 0); \ +} +#define DEFINE_TEST(name, var_type, which, init_level) \ +/* no-op to force compiler into ignoring "uninitialized" vars */\ +static noinline __init DO_NOTHING_TYPE_ ## which(var_type) \ +do_nothing_ ## name(var_type *ptr) \ +{ \ + /* Will always be true, but compiler doesn't know. */ \ + if ((unsigned long)ptr > 0x2) \ + return DO_NOTHING_RETURN_ ## which(ptr); \ + else \ + return DO_NOTHING_RETURN_ ## which(ptr + 1); \ +} \ +static noinline __init int leaf_ ## name(unsigned long sp, \ + bool fill, \ + var_type *arg) \ +{ \ + char buf[VAR_BUFFER]; \ + var_type var INIT_ ## which ## _ ## init_level; \ + \ + target_start = &var; \ + target_size = sizeof(var); \ + /* \ + * Keep this buffer around to make sure we've got a \ + * stack frame of SOME kind... \ + */ \ + memset(buf, (char)(sp && 0xff), sizeof(buf)); \ + /* Fill variable with 0xFF. */ \ + if (fill) { \ + fill_start = &var; \ + fill_size = sizeof(var); \ + memset(fill_start, \ + (char)((sp && 0xff) | forced_mask), \ + fill_size); \ + } \ + \ + /* Silence "never initialized" warnings. */ \ + DO_NOTHING_CALL_ ## which(var, name); \ + \ + /* Exfiltrate "var". */ \ + memcpy(check_buf, target_start, target_size); \ + \ + return (int)buf[0] | (int)buf[sizeof(buf) - 1]; \ +} \ +DEFINE_TEST_DRIVER(name, var_type, which) + +/* Structure with no padding. */ +struct test_packed { + unsigned long one; + unsigned long two; + unsigned long three; + unsigned long four; +}; + +/* Simple structure with padding likely to be covered by compiler. */ +struct test_small_hole { + size_t one; + char two; + /* 3 byte padding hole here. */ + int three; + unsigned long four; +}; + +/* Try to trigger unhandled padding in a structure. */ +struct test_aligned { + u32 internal1; + u64 internal2; +} __aligned(64); + +struct test_big_hole { + u8 one; + u8 two; + u8 three; + /* 61 byte padding hole here. */ + struct test_aligned four; +} __aligned(64); + +struct test_trailing_hole { + char *one; + char *two; + char *three; + char four; + /* "sizeof(unsigned long) - 1" byte padding hole here. */ +}; + +/* Test if STRUCTLEAK is clearing structs with __user fields. */ +struct test_user { + u8 one; + unsigned long two; + char __user *three; + unsigned long four; +}; + +#define DEFINE_SCALAR_TEST(name, init) \ + DEFINE_TEST(name ## _ ## init, name, SCALAR, init) + +#define DEFINE_SCALAR_TESTS(init) \ + DEFINE_SCALAR_TEST(u8, init); \ + DEFINE_SCALAR_TEST(u16, init); \ + DEFINE_SCALAR_TEST(u32, init); \ + DEFINE_SCALAR_TEST(u64, init); \ + DEFINE_TEST(char_array_ ## init, unsigned char, STRING, init) + +#define DEFINE_STRUCT_TEST(name, init) \ + DEFINE_TEST(name ## _ ## init, \ + struct test_ ## name, STRUCT, init) + +#define DEFINE_STRUCT_TESTS(init) \ + DEFINE_STRUCT_TEST(small_hole, init); \ + DEFINE_STRUCT_TEST(big_hole, init); \ + DEFINE_STRUCT_TEST(trailing_hole, init); \ + DEFINE_STRUCT_TEST(packed, init) + +/* These should be fully initialized all the time! */ +DEFINE_SCALAR_TESTS(zero); +DEFINE_STRUCT_TESTS(zero); +/* Static initialization: padding may be left uninitialized. */ +DEFINE_STRUCT_TESTS(static_partial); +DEFINE_STRUCT_TESTS(static_all); +/* Dynamic initialization: padding may be left uninitialized. */ +DEFINE_STRUCT_TESTS(dynamic_partial); +DEFINE_STRUCT_TESTS(dynamic_all); +/* Runtime initialization: padding may be left uninitialized. */ +DEFINE_STRUCT_TESTS(runtime_partial); +DEFINE_STRUCT_TESTS(runtime_all); +/* No initialization without compiler instrumentation. */ +DEFINE_SCALAR_TESTS(none); +DEFINE_STRUCT_TESTS(none); +DEFINE_TEST(user, struct test_user, STRUCT, none); + +/* + * Check two uses through a variable declaration outside either path, + * which was noticed as a special case in porting earlier stack init + * compiler logic. + */ +static int noinline __leaf_switch_none(int path, bool fill) +{ + switch (path) { + uint64_t var; + + case 1: + target_start = &var; + target_size = sizeof(var); + if (fill) { + fill_start = &var; + fill_size = sizeof(var); + + memset(fill_start, forced_mask | 0x55, fill_size); + } + memcpy(check_buf, target_start, target_size); + break; + case 2: + target_start = &var; + target_size = sizeof(var); + if (fill) { + fill_start = &var; + fill_size = sizeof(var); + + memset(fill_start, forced_mask | 0xaa, fill_size); + } + memcpy(check_buf, target_start, target_size); + break; + default: + var = 5; + return var & forced_mask; + } + return 0; +} + +static noinline __init int leaf_switch_1_none(unsigned long sp, bool fill, + uint64_t *arg) +{ + return __leaf_switch_none(1, fill); +} + +static noinline __init int leaf_switch_2_none(unsigned long sp, bool fill, + uint64_t *arg) +{ + return __leaf_switch_none(2, fill); +} + +DEFINE_TEST_DRIVER(switch_1_none, uint64_t, SCALAR); +DEFINE_TEST_DRIVER(switch_2_none, uint64_t, SCALAR); + +static int __init test_stackinit_init(void) +{ + unsigned int failures = 0; + +#define test_scalars(init) do { \ + failures += test_u8_ ## init (); \ + failures += test_u16_ ## init (); \ + failures += test_u32_ ## init (); \ + failures += test_u64_ ## init (); \ + failures += test_char_array_ ## init (); \ + } while (0) + +#define test_structs(init) do { \ + failures += test_small_hole_ ## init (); \ + failures += test_big_hole_ ## init (); \ + failures += test_trailing_hole_ ## init (); \ + failures += test_packed_ ## init (); \ + } while (0) + + /* These are explicitly initialized and should always pass. */ + test_scalars(zero); + test_structs(zero); + /* Padding here appears to be accidentally always initialized? */ + test_structs(dynamic_partial); + /* Padding initialization depends on compiler behaviors. */ + test_structs(static_partial); + test_structs(static_all); + test_structs(dynamic_all); + test_structs(runtime_partial); + test_structs(runtime_all); + + /* STRUCTLEAK_BYREF_ALL should cover everything from here down. */ + test_scalars(none); + failures += test_switch_1_none(); + failures += test_switch_2_none(); + + /* STRUCTLEAK_BYREF should cover from here down. */ + test_structs(none); + + /* STRUCTLEAK will only cover this. */ + failures += test_user(); + + if (failures == 0) + pr_info("all tests passed!\n"); + else + pr_err("failures: %u\n", failures); + + return failures ? -EINVAL : 0; +} +module_init(test_stackinit_init); + +static void __exit test_stackinit_exit(void) +{ } +module_exit(test_stackinit_exit); + +MODULE_LICENSE("GPL"); diff --git a/lib/test_strscpy.c b/lib/test_strscpy.c new file mode 100644 index 000000000000..a827f94601f5 --- /dev/null +++ b/lib/test_strscpy.c @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/string.h> + +#include "../tools/testing/selftests/kselftest_module.h" + +/* + * Kernel module for testing 'strscpy' family of functions. + */ + +KSTM_MODULE_GLOBALS(); + +/* + * tc() - Run a specific test case. + * @src: Source string, argument to strscpy_pad() + * @count: Size of destination buffer, argument to strscpy_pad() + * @expected: Expected return value from call to strscpy_pad() + * @terminator: 1 if there should be a terminating null byte 0 otherwise. + * @chars: Number of characters from the src string expected to be + * written to the dst buffer. + * @pad: Number of pad characters expected (in the tail of dst buffer). + * (@pad does not include the null terminator byte.) + * + * Calls strscpy_pad() and verifies the return value and state of the + * destination buffer after the call returns. + */ +static int __init tc(char *src, int count, int expected, + int chars, int terminator, int pad) +{ + int nr_bytes_poison; + int max_expected; + int max_count; + int written; + char buf[6]; + int index, i; + const char POISON = 'z'; + + total_tests++; + + if (!src) { + pr_err("null source string not supported\n"); + return -1; + } + + memset(buf, POISON, sizeof(buf)); + /* Future proofing test suite, validate args */ + max_count = sizeof(buf) - 2; /* Space for null and to verify overflow */ + max_expected = count - 1; /* Space for the null */ + if (count > max_count) { + pr_err("count (%d) is too big (%d) ... aborting", count, max_count); + return -1; + } + if (expected > max_expected) { + pr_warn("expected (%d) is bigger than can possibly be returned (%d)", + expected, max_expected); + } + + written = strscpy_pad(buf, src, count); + if ((written) != (expected)) { + pr_err("%d != %d (written, expected)\n", written, expected); + goto fail; + } + + if (count && written == -E2BIG) { + if (strncmp(buf, src, count - 1) != 0) { + pr_err("buffer state invalid for -E2BIG\n"); + goto fail; + } + if (buf[count - 1] != '\0') { + pr_err("too big string is not null terminated correctly\n"); + goto fail; + } + } + + for (i = 0; i < chars; i++) { + if (buf[i] != src[i]) { + pr_err("buf[i]==%c != src[i]==%c\n", buf[i], src[i]); + goto fail; + } + } + + if (terminator) { + if (buf[count - 1] != '\0') { + pr_err("string is not null terminated correctly\n"); + goto fail; + } + } + + for (i = 0; i < pad; i++) { + index = chars + terminator + i; + if (buf[index] != '\0') { + pr_err("padding missing at index: %d\n", i); + goto fail; + } + } + + nr_bytes_poison = sizeof(buf) - chars - terminator - pad; + for (i = 0; i < nr_bytes_poison; i++) { + index = sizeof(buf) - 1 - i; /* Check from the end back */ + if (buf[index] != POISON) { + pr_err("poison value missing at index: %d\n", i); + goto fail; + } + } + + return 0; +fail: + failed_tests++; + return -1; +} + +static void __init selftest(void) +{ + /* + * tc() uses a destination buffer of size 6 and needs at + * least 2 characters spare (one for null and one to check for + * overflow). This means we should only call tc() with + * strings up to a maximum of 4 characters long and 'count' + * should not exceed 4. To test with longer strings increase + * the buffer size in tc(). + */ + + /* tc(src, count, expected, chars, terminator, pad) */ + KSTM_CHECK_ZERO(tc("a", 0, -E2BIG, 0, 0, 0)); + KSTM_CHECK_ZERO(tc("", 0, -E2BIG, 0, 0, 0)); + + KSTM_CHECK_ZERO(tc("a", 1, -E2BIG, 0, 1, 0)); + KSTM_CHECK_ZERO(tc("", 1, 0, 0, 1, 0)); + + KSTM_CHECK_ZERO(tc("ab", 2, -E2BIG, 1, 1, 0)); + KSTM_CHECK_ZERO(tc("a", 2, 1, 1, 1, 0)); + KSTM_CHECK_ZERO(tc("", 2, 0, 0, 1, 1)); + + KSTM_CHECK_ZERO(tc("abc", 3, -E2BIG, 2, 1, 0)); + KSTM_CHECK_ZERO(tc("ab", 3, 2, 2, 1, 0)); + KSTM_CHECK_ZERO(tc("a", 3, 1, 1, 1, 1)); + KSTM_CHECK_ZERO(tc("", 3, 0, 0, 1, 2)); + + KSTM_CHECK_ZERO(tc("abcd", 4, -E2BIG, 3, 1, 0)); + KSTM_CHECK_ZERO(tc("abc", 4, 3, 3, 1, 0)); + KSTM_CHECK_ZERO(tc("ab", 4, 2, 2, 1, 1)); + KSTM_CHECK_ZERO(tc("a", 4, 1, 1, 1, 2)); + KSTM_CHECK_ZERO(tc("", 4, 0, 0, 1, 3)); +} + +KSTM_MODULE_LOADERS(test_strscpy); +MODULE_AUTHOR("Tobin C. Harding <tobin@kernel.org>"); +MODULE_LICENSE("GPL"); diff --git a/lib/test_ubsan.c b/lib/test_ubsan.c index 280f4979d00e..9ea10adf7a66 100644 --- a/lib/test_ubsan.c +++ b/lib/test_ubsan.c @@ -42,14 +42,6 @@ static void test_ubsan_divrem_overflow(void) val /= val2; } -static void test_ubsan_vla_bound_not_positive(void) -{ - volatile int size = -1; - char buf[size]; - - (void)buf; -} - static void test_ubsan_shift_out_of_bounds(void) { volatile int val = -1; @@ -61,7 +53,7 @@ static void test_ubsan_shift_out_of_bounds(void) static void test_ubsan_out_of_bounds(void) { volatile int i = 4, j = 5; - volatile int arr[i]; + volatile int arr[4]; arr[j] = i; } @@ -113,7 +105,6 @@ static const test_ubsan_fp test_ubsan_array[] = { test_ubsan_mul_overflow, test_ubsan_negate_overflow, test_ubsan_divrem_overflow, - test_ubsan_vla_bound_not_positive, test_ubsan_shift_out_of_bounds, test_ubsan_out_of_bounds, test_ubsan_load_invalid_value, diff --git a/lib/test_vmalloc.c b/lib/test_vmalloc.c new file mode 100644 index 000000000000..f832b095afba --- /dev/null +++ b/lib/test_vmalloc.c @@ -0,0 +1,551 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Test module for stress and analyze performance of vmalloc allocator. + * (C) 2018 Uladzislau Rezki (Sony) <urezki@gmail.com> + */ +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/vmalloc.h> +#include <linux/random.h> +#include <linux/kthread.h> +#include <linux/moduleparam.h> +#include <linux/completion.h> +#include <linux/delay.h> +#include <linux/rwsem.h> +#include <linux/mm.h> + +#define __param(type, name, init, msg) \ + static type name = init; \ + module_param(name, type, 0444); \ + MODULE_PARM_DESC(name, msg) \ + +__param(bool, single_cpu_test, false, + "Use single first online CPU to run tests"); + +__param(bool, sequential_test_order, false, + "Use sequential stress tests order"); + +__param(int, test_repeat_count, 1, + "Set test repeat counter"); + +__param(int, test_loop_count, 1000000, + "Set test loop counter"); + +__param(int, run_test_mask, INT_MAX, + "Set tests specified in the mask.\n\n" + "\t\tid: 1, name: fix_size_alloc_test\n" + "\t\tid: 2, name: full_fit_alloc_test\n" + "\t\tid: 4, name: long_busy_list_alloc_test\n" + "\t\tid: 8, name: random_size_alloc_test\n" + "\t\tid: 16, name: fix_align_alloc_test\n" + "\t\tid: 32, name: random_size_align_alloc_test\n" + "\t\tid: 64, name: align_shift_alloc_test\n" + "\t\tid: 128, name: pcpu_alloc_test\n" + /* Add a new test case description here. */ +); + +/* + * Depends on single_cpu_test parameter. If it is true, then + * use first online CPU to trigger a test on, otherwise go with + * all online CPUs. + */ +static cpumask_t cpus_run_test_mask = CPU_MASK_NONE; + +/* + * Read write semaphore for synchronization of setup + * phase that is done in main thread and workers. + */ +static DECLARE_RWSEM(prepare_for_test_rwsem); + +/* + * Completion tracking for worker threads. + */ +static DECLARE_COMPLETION(test_all_done_comp); +static atomic_t test_n_undone = ATOMIC_INIT(0); + +static inline void +test_report_one_done(void) +{ + if (atomic_dec_and_test(&test_n_undone)) + complete(&test_all_done_comp); +} + +static int random_size_align_alloc_test(void) +{ + unsigned long size, align, rnd; + void *ptr; + int i; + + for (i = 0; i < test_loop_count; i++) { + get_random_bytes(&rnd, sizeof(rnd)); + + /* + * Maximum 1024 pages, if PAGE_SIZE is 4096. + */ + align = 1 << (rnd % 23); + + /* + * Maximum 10 pages. + */ + size = ((rnd % 10) + 1) * PAGE_SIZE; + + ptr = __vmalloc_node_range(size, align, + VMALLOC_START, VMALLOC_END, + GFP_KERNEL | __GFP_ZERO, + PAGE_KERNEL, + 0, 0, __builtin_return_address(0)); + + if (!ptr) + return -1; + + vfree(ptr); + } + + return 0; +} + +/* + * This test case is supposed to be failed. + */ +static int align_shift_alloc_test(void) +{ + unsigned long align; + void *ptr; + int i; + + for (i = 0; i < BITS_PER_LONG; i++) { + align = ((unsigned long) 1) << i; + + ptr = __vmalloc_node_range(PAGE_SIZE, align, + VMALLOC_START, VMALLOC_END, + GFP_KERNEL | __GFP_ZERO, + PAGE_KERNEL, + 0, 0, __builtin_return_address(0)); + + if (!ptr) + return -1; + + vfree(ptr); + } + + return 0; +} + +static int fix_align_alloc_test(void) +{ + void *ptr; + int i; + + for (i = 0; i < test_loop_count; i++) { + ptr = __vmalloc_node_range(5 * PAGE_SIZE, + THREAD_ALIGN << 1, + VMALLOC_START, VMALLOC_END, + GFP_KERNEL | __GFP_ZERO, + PAGE_KERNEL, + 0, 0, __builtin_return_address(0)); + + if (!ptr) + return -1; + + vfree(ptr); + } + + return 0; +} + +static int random_size_alloc_test(void) +{ + unsigned int n; + void *p; + int i; + + for (i = 0; i < test_loop_count; i++) { + get_random_bytes(&n, sizeof(i)); + n = (n % 100) + 1; + + p = vmalloc(n * PAGE_SIZE); + + if (!p) + return -1; + + *((__u8 *)p) = 1; + vfree(p); + } + + return 0; +} + +static int long_busy_list_alloc_test(void) +{ + void *ptr_1, *ptr_2; + void **ptr; + int rv = -1; + int i; + + ptr = vmalloc(sizeof(void *) * 15000); + if (!ptr) + return rv; + + for (i = 0; i < 15000; i++) + ptr[i] = vmalloc(1 * PAGE_SIZE); + + for (i = 0; i < test_loop_count; i++) { + ptr_1 = vmalloc(100 * PAGE_SIZE); + if (!ptr_1) + goto leave; + + ptr_2 = vmalloc(1 * PAGE_SIZE); + if (!ptr_2) { + vfree(ptr_1); + goto leave; + } + + *((__u8 *)ptr_1) = 0; + *((__u8 *)ptr_2) = 1; + + vfree(ptr_1); + vfree(ptr_2); + } + + /* Success */ + rv = 0; + +leave: + for (i = 0; i < 15000; i++) + vfree(ptr[i]); + + vfree(ptr); + return rv; +} + +static int full_fit_alloc_test(void) +{ + void **ptr, **junk_ptr, *tmp; + int junk_length; + int rv = -1; + int i; + + junk_length = fls(num_online_cpus()); + junk_length *= (32 * 1024 * 1024 / PAGE_SIZE); + + ptr = vmalloc(sizeof(void *) * junk_length); + if (!ptr) + return rv; + + junk_ptr = vmalloc(sizeof(void *) * junk_length); + if (!junk_ptr) { + vfree(ptr); + return rv; + } + + for (i = 0; i < junk_length; i++) { + ptr[i] = vmalloc(1 * PAGE_SIZE); + junk_ptr[i] = vmalloc(1 * PAGE_SIZE); + } + + for (i = 0; i < junk_length; i++) + vfree(junk_ptr[i]); + + for (i = 0; i < test_loop_count; i++) { + tmp = vmalloc(1 * PAGE_SIZE); + + if (!tmp) + goto error; + + *((__u8 *)tmp) = 1; + vfree(tmp); + } + + /* Success */ + rv = 0; + +error: + for (i = 0; i < junk_length; i++) + vfree(ptr[i]); + + vfree(ptr); + vfree(junk_ptr); + + return rv; +} + +static int fix_size_alloc_test(void) +{ + void *ptr; + int i; + + for (i = 0; i < test_loop_count; i++) { + ptr = vmalloc(3 * PAGE_SIZE); + + if (!ptr) + return -1; + + *((__u8 *)ptr) = 0; + + vfree(ptr); + } + + return 0; +} + +static int +pcpu_alloc_test(void) +{ + int rv = 0; +#ifndef CONFIG_NEED_PER_CPU_KM + void __percpu **pcpu; + size_t size, align; + int i; + + pcpu = vmalloc(sizeof(void __percpu *) * 35000); + if (!pcpu) + return -1; + + for (i = 0; i < 35000; i++) { + unsigned int r; + + get_random_bytes(&r, sizeof(i)); + size = (r % (PAGE_SIZE / 4)) + 1; + + /* + * Maximum PAGE_SIZE + */ + get_random_bytes(&r, sizeof(i)); + align = 1 << ((i % 11) + 1); + + pcpu[i] = __alloc_percpu(size, align); + if (!pcpu[i]) + rv = -1; + } + + for (i = 0; i < 35000; i++) + free_percpu(pcpu[i]); + + vfree(pcpu); +#endif + return rv; +} + +struct test_case_desc { + const char *test_name; + int (*test_func)(void); +}; + +static struct test_case_desc test_case_array[] = { + { "fix_size_alloc_test", fix_size_alloc_test }, + { "full_fit_alloc_test", full_fit_alloc_test }, + { "long_busy_list_alloc_test", long_busy_list_alloc_test }, + { "random_size_alloc_test", random_size_alloc_test }, + { "fix_align_alloc_test", fix_align_alloc_test }, + { "random_size_align_alloc_test", random_size_align_alloc_test }, + { "align_shift_alloc_test", align_shift_alloc_test }, + { "pcpu_alloc_test", pcpu_alloc_test }, + /* Add a new test case here. */ +}; + +struct test_case_data { + int test_failed; + int test_passed; + u64 time; +}; + +/* Split it to get rid of: WARNING: line over 80 characters */ +static struct test_case_data + per_cpu_test_data[NR_CPUS][ARRAY_SIZE(test_case_array)]; + +static struct test_driver { + struct task_struct *task; + unsigned long start; + unsigned long stop; + int cpu; +} per_cpu_test_driver[NR_CPUS]; + +static void shuffle_array(int *arr, int n) +{ + unsigned int rnd; + int i, j, x; + + for (i = n - 1; i > 0; i--) { + get_random_bytes(&rnd, sizeof(rnd)); + + /* Cut the range. */ + j = rnd % i; + + /* Swap indexes. */ + x = arr[i]; + arr[i] = arr[j]; + arr[j] = x; + } +} + +static int test_func(void *private) +{ + struct test_driver *t = private; + int random_array[ARRAY_SIZE(test_case_array)]; + int index, i, j, ret; + ktime_t kt; + u64 delta; + + ret = set_cpus_allowed_ptr(current, cpumask_of(t->cpu)); + if (ret < 0) + pr_err("Failed to set affinity to %d CPU\n", t->cpu); + + for (i = 0; i < ARRAY_SIZE(test_case_array); i++) + random_array[i] = i; + + if (!sequential_test_order) + shuffle_array(random_array, ARRAY_SIZE(test_case_array)); + + /* + * Block until initialization is done. + */ + down_read(&prepare_for_test_rwsem); + + t->start = get_cycles(); + for (i = 0; i < ARRAY_SIZE(test_case_array); i++) { + index = random_array[i]; + + /* + * Skip tests if run_test_mask has been specified. + */ + if (!((run_test_mask & (1 << index)) >> index)) + continue; + + kt = ktime_get(); + for (j = 0; j < test_repeat_count; j++) { + ret = test_case_array[index].test_func(); + if (!ret) + per_cpu_test_data[t->cpu][index].test_passed++; + else + per_cpu_test_data[t->cpu][index].test_failed++; + } + + /* + * Take an average time that test took. + */ + delta = (u64) ktime_us_delta(ktime_get(), kt); + do_div(delta, (u32) test_repeat_count); + + per_cpu_test_data[t->cpu][index].time = delta; + } + t->stop = get_cycles(); + + up_read(&prepare_for_test_rwsem); + test_report_one_done(); + + /* + * Wait for the kthread_stop() call. + */ + while (!kthread_should_stop()) + msleep(10); + + return 0; +} + +static void +init_test_configurtion(void) +{ + /* + * Reset all data of all CPUs. + */ + memset(per_cpu_test_data, 0, sizeof(per_cpu_test_data)); + + if (single_cpu_test) + cpumask_set_cpu(cpumask_first(cpu_online_mask), + &cpus_run_test_mask); + else + cpumask_and(&cpus_run_test_mask, cpu_online_mask, + cpu_online_mask); + + if (test_repeat_count <= 0) + test_repeat_count = 1; + + if (test_loop_count <= 0) + test_loop_count = 1; +} + +static void do_concurrent_test(void) +{ + int cpu, ret; + + /* + * Set some basic configurations plus sanity check. + */ + init_test_configurtion(); + + /* + * Put on hold all workers. + */ + down_write(&prepare_for_test_rwsem); + + for_each_cpu(cpu, &cpus_run_test_mask) { + struct test_driver *t = &per_cpu_test_driver[cpu]; + + t->cpu = cpu; + t->task = kthread_run(test_func, t, "vmalloc_test/%d", cpu); + + if (!IS_ERR(t->task)) + /* Success. */ + atomic_inc(&test_n_undone); + else + pr_err("Failed to start kthread for %d CPU\n", cpu); + } + + /* + * Now let the workers do their job. + */ + up_write(&prepare_for_test_rwsem); + + /* + * Sleep quiet until all workers are done with 1 second + * interval. Since the test can take a lot of time we + * can run into a stack trace of the hung task. That is + * why we go with completion_timeout and HZ value. + */ + do { + ret = wait_for_completion_timeout(&test_all_done_comp, HZ); + } while (!ret); + + for_each_cpu(cpu, &cpus_run_test_mask) { + struct test_driver *t = &per_cpu_test_driver[cpu]; + int i; + + if (!IS_ERR(t->task)) + kthread_stop(t->task); + + for (i = 0; i < ARRAY_SIZE(test_case_array); i++) { + if (!((run_test_mask & (1 << i)) >> i)) + continue; + + pr_info( + "Summary: %s passed: %d failed: %d repeat: %d loops: %d avg: %llu usec\n", + test_case_array[i].test_name, + per_cpu_test_data[cpu][i].test_passed, + per_cpu_test_data[cpu][i].test_failed, + test_repeat_count, test_loop_count, + per_cpu_test_data[cpu][i].time); + } + + pr_info("All test took CPU%d=%lu cycles\n", + cpu, t->stop - t->start); + } +} + +static int vmalloc_test_init(void) +{ + do_concurrent_test(); + return -EAGAIN; /* Fail will directly unload the module */ +} + +static void vmalloc_test_exit(void) +{ +} + +module_init(vmalloc_test_init) +module_exit(vmalloc_test_exit) + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Uladzislau Rezki"); +MODULE_DESCRIPTION("vmalloc test module"); diff --git a/lib/test_xarray.c b/lib/test_xarray.c index c596a957f764..5d4bad8bd96a 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -40,9 +40,9 @@ static void *xa_store_index(struct xarray *xa, unsigned long index, gfp_t gfp) static void xa_alloc_index(struct xarray *xa, unsigned long index, gfp_t gfp) { - u32 id = 0; + u32 id; - XA_BUG_ON(xa, xa_alloc(xa, &id, UINT_MAX, xa_mk_index(index), + XA_BUG_ON(xa, xa_alloc(xa, &id, xa_mk_index(index), xa_limit_32b, gfp) != 0); XA_BUG_ON(xa, id != index); } @@ -107,8 +107,11 @@ static noinline void check_xas_retry(struct xarray *xa) XA_BUG_ON(xa, xas.xa_node != XAS_RESTART); XA_BUG_ON(xa, xas_next_entry(&xas, ULONG_MAX) != xa_mk_value(0)); XA_BUG_ON(xa, xas.xa_node != NULL); + rcu_read_unlock(); XA_BUG_ON(xa, xa_store_index(xa, 1, GFP_KERNEL) != NULL); + + rcu_read_lock(); XA_BUG_ON(xa, !xa_is_internal(xas_reload(&xas))); xas.xa_node = XAS_RESTART; XA_BUG_ON(xa, xas_next_entry(&xas, ULONG_MAX) != xa_mk_value(0)); @@ -343,7 +346,7 @@ static noinline void check_cmpxchg(struct xarray *xa) XA_BUG_ON(xa, !xa_empty(xa)); XA_BUG_ON(xa, xa_store_index(xa, 12345678, GFP_KERNEL) != NULL); - XA_BUG_ON(xa, xa_insert(xa, 12345678, xa, GFP_KERNEL) != -EEXIST); + XA_BUG_ON(xa, xa_insert(xa, 12345678, xa, GFP_KERNEL) != -EBUSY); XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, SIX, FIVE, GFP_KERNEL) != LOTS); XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, LOTS, FIVE, GFP_KERNEL) != LOTS); XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, FIVE, LOTS, GFP_KERNEL) != FIVE); @@ -358,46 +361,65 @@ static noinline void check_reserve(struct xarray *xa) { void *entry; unsigned long index; + int count; /* An array with a reserved entry is not empty */ XA_BUG_ON(xa, !xa_empty(xa)); - xa_reserve(xa, 12345678, GFP_KERNEL); + XA_BUG_ON(xa, xa_reserve(xa, 12345678, GFP_KERNEL) != 0); XA_BUG_ON(xa, xa_empty(xa)); XA_BUG_ON(xa, xa_load(xa, 12345678)); xa_release(xa, 12345678); XA_BUG_ON(xa, !xa_empty(xa)); /* Releasing a used entry does nothing */ - xa_reserve(xa, 12345678, GFP_KERNEL); + XA_BUG_ON(xa, xa_reserve(xa, 12345678, GFP_KERNEL) != 0); XA_BUG_ON(xa, xa_store_index(xa, 12345678, GFP_NOWAIT) != NULL); xa_release(xa, 12345678); xa_erase_index(xa, 12345678); XA_BUG_ON(xa, !xa_empty(xa)); - /* cmpxchg sees a reserved entry as NULL */ - xa_reserve(xa, 12345678, GFP_KERNEL); - XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, NULL, xa_mk_value(12345678), - GFP_NOWAIT) != NULL); + /* cmpxchg sees a reserved entry as ZERO */ + XA_BUG_ON(xa, xa_reserve(xa, 12345678, GFP_KERNEL) != 0); + XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, XA_ZERO_ENTRY, + xa_mk_value(12345678), GFP_NOWAIT) != NULL); xa_release(xa, 12345678); xa_erase_index(xa, 12345678); XA_BUG_ON(xa, !xa_empty(xa)); - /* But xa_insert does not */ - xa_reserve(xa, 12345678, GFP_KERNEL); + /* xa_insert treats it as busy */ + XA_BUG_ON(xa, xa_reserve(xa, 12345678, GFP_KERNEL) != 0); XA_BUG_ON(xa, xa_insert(xa, 12345678, xa_mk_value(12345678), 0) != - -EEXIST); + -EBUSY); XA_BUG_ON(xa, xa_empty(xa)); XA_BUG_ON(xa, xa_erase(xa, 12345678) != NULL); XA_BUG_ON(xa, !xa_empty(xa)); /* Can iterate through a reserved entry */ xa_store_index(xa, 5, GFP_KERNEL); - xa_reserve(xa, 6, GFP_KERNEL); + XA_BUG_ON(xa, xa_reserve(xa, 6, GFP_KERNEL) != 0); xa_store_index(xa, 7, GFP_KERNEL); + count = 0; xa_for_each(xa, index, entry) { XA_BUG_ON(xa, index != 5 && index != 7); + count++; + } + XA_BUG_ON(xa, count != 2); + + /* If we free a reserved entry, we should be able to allocate it */ + if (xa->xa_flags & XA_FLAGS_ALLOC) { + u32 id; + + XA_BUG_ON(xa, xa_alloc(xa, &id, xa_mk_value(8), + XA_LIMIT(5, 10), GFP_KERNEL) != 0); + XA_BUG_ON(xa, id != 8); + + xa_release(xa, 6); + XA_BUG_ON(xa, xa_alloc(xa, &id, xa_mk_value(6), + XA_LIMIT(5, 10), GFP_KERNEL) != 0); + XA_BUG_ON(xa, id != 6); } + xa_destroy(xa); } @@ -586,64 +608,194 @@ static noinline void check_multi_store(struct xarray *xa) #endif } -static DEFINE_XARRAY_ALLOC(xa0); - -static noinline void check_xa_alloc(void) +static noinline void check_xa_alloc_1(struct xarray *xa, unsigned int base) { int i; u32 id; - /* An empty array should assign 0 to the first alloc */ - xa_alloc_index(&xa0, 0, GFP_KERNEL); + XA_BUG_ON(xa, !xa_empty(xa)); + /* An empty array should assign %base to the first alloc */ + xa_alloc_index(xa, base, GFP_KERNEL); /* Erasing it should make the array empty again */ - xa_erase_index(&xa0, 0); - XA_BUG_ON(&xa0, !xa_empty(&xa0)); + xa_erase_index(xa, base); + XA_BUG_ON(xa, !xa_empty(xa)); + + /* And it should assign %base again */ + xa_alloc_index(xa, base, GFP_KERNEL); + + /* Allocating and then erasing a lot should not lose base */ + for (i = base + 1; i < 2 * XA_CHUNK_SIZE; i++) + xa_alloc_index(xa, i, GFP_KERNEL); + for (i = base; i < 2 * XA_CHUNK_SIZE; i++) + xa_erase_index(xa, i); + xa_alloc_index(xa, base, GFP_KERNEL); - /* And it should assign 0 again */ - xa_alloc_index(&xa0, 0, GFP_KERNEL); + /* Destroying the array should do the same as erasing */ + xa_destroy(xa); + + /* And it should assign %base again */ + xa_alloc_index(xa, base, GFP_KERNEL); - /* The next assigned ID should be 1 */ - xa_alloc_index(&xa0, 1, GFP_KERNEL); - xa_erase_index(&xa0, 1); + /* The next assigned ID should be base+1 */ + xa_alloc_index(xa, base + 1, GFP_KERNEL); + xa_erase_index(xa, base + 1); /* Storing a value should mark it used */ - xa_store_index(&xa0, 1, GFP_KERNEL); - xa_alloc_index(&xa0, 2, GFP_KERNEL); + xa_store_index(xa, base + 1, GFP_KERNEL); + xa_alloc_index(xa, base + 2, GFP_KERNEL); - /* If we then erase 0, it should be free */ - xa_erase_index(&xa0, 0); - xa_alloc_index(&xa0, 0, GFP_KERNEL); + /* If we then erase base, it should be free */ + xa_erase_index(xa, base); + xa_alloc_index(xa, base, GFP_KERNEL); - xa_erase_index(&xa0, 1); - xa_erase_index(&xa0, 2); + xa_erase_index(xa, base + 1); + xa_erase_index(xa, base + 2); for (i = 1; i < 5000; i++) { - xa_alloc_index(&xa0, i, GFP_KERNEL); + xa_alloc_index(xa, base + i, GFP_KERNEL); } - xa_destroy(&xa0); + xa_destroy(xa); - id = 0xfffffffeU; - XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, UINT_MAX, xa_mk_index(id), + /* Check that we fail properly at the limit of allocation */ + XA_BUG_ON(xa, xa_alloc(xa, &id, xa_mk_index(UINT_MAX - 1), + XA_LIMIT(UINT_MAX - 1, UINT_MAX), GFP_KERNEL) != 0); - XA_BUG_ON(&xa0, id != 0xfffffffeU); - XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, UINT_MAX, xa_mk_index(id), + XA_BUG_ON(xa, id != 0xfffffffeU); + XA_BUG_ON(xa, xa_alloc(xa, &id, xa_mk_index(UINT_MAX), + XA_LIMIT(UINT_MAX - 1, UINT_MAX), GFP_KERNEL) != 0); - XA_BUG_ON(&xa0, id != 0xffffffffU); - XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, UINT_MAX, xa_mk_index(id), - GFP_KERNEL) != -ENOSPC); - XA_BUG_ON(&xa0, id != 0xffffffffU); - xa_destroy(&xa0); - - id = 10; - XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, 5, xa_mk_index(id), - GFP_KERNEL) != -ENOSPC); - XA_BUG_ON(&xa0, xa_store_index(&xa0, 3, GFP_KERNEL) != 0); - XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, 5, xa_mk_index(id), - GFP_KERNEL) != -ENOSPC); - xa_erase_index(&xa0, 3); - XA_BUG_ON(&xa0, !xa_empty(&xa0)); + XA_BUG_ON(xa, id != 0xffffffffU); + id = 3; + XA_BUG_ON(xa, xa_alloc(xa, &id, xa_mk_index(0), + XA_LIMIT(UINT_MAX - 1, UINT_MAX), + GFP_KERNEL) != -EBUSY); + XA_BUG_ON(xa, id != 3); + xa_destroy(xa); + + XA_BUG_ON(xa, xa_alloc(xa, &id, xa_mk_index(10), XA_LIMIT(10, 5), + GFP_KERNEL) != -EBUSY); + XA_BUG_ON(xa, xa_store_index(xa, 3, GFP_KERNEL) != 0); + XA_BUG_ON(xa, xa_alloc(xa, &id, xa_mk_index(10), XA_LIMIT(10, 5), + GFP_KERNEL) != -EBUSY); + xa_erase_index(xa, 3); + XA_BUG_ON(xa, !xa_empty(xa)); +} + +static noinline void check_xa_alloc_2(struct xarray *xa, unsigned int base) +{ + unsigned int i, id; + unsigned long index; + void *entry; + + /* Allocate and free a NULL and check xa_empty() behaves */ + XA_BUG_ON(xa, !xa_empty(xa)); + XA_BUG_ON(xa, xa_alloc(xa, &id, NULL, xa_limit_32b, GFP_KERNEL) != 0); + XA_BUG_ON(xa, id != base); + XA_BUG_ON(xa, xa_empty(xa)); + XA_BUG_ON(xa, xa_erase(xa, id) != NULL); + XA_BUG_ON(xa, !xa_empty(xa)); + + /* Ditto, but check destroy instead of erase */ + XA_BUG_ON(xa, !xa_empty(xa)); + XA_BUG_ON(xa, xa_alloc(xa, &id, NULL, xa_limit_32b, GFP_KERNEL) != 0); + XA_BUG_ON(xa, id != base); + XA_BUG_ON(xa, xa_empty(xa)); + xa_destroy(xa); + XA_BUG_ON(xa, !xa_empty(xa)); + + for (i = base; i < base + 10; i++) { + XA_BUG_ON(xa, xa_alloc(xa, &id, NULL, xa_limit_32b, + GFP_KERNEL) != 0); + XA_BUG_ON(xa, id != i); + } + + XA_BUG_ON(xa, xa_store(xa, 3, xa_mk_index(3), GFP_KERNEL) != NULL); + XA_BUG_ON(xa, xa_store(xa, 4, xa_mk_index(4), GFP_KERNEL) != NULL); + XA_BUG_ON(xa, xa_store(xa, 4, NULL, GFP_KERNEL) != xa_mk_index(4)); + XA_BUG_ON(xa, xa_erase(xa, 5) != NULL); + XA_BUG_ON(xa, xa_alloc(xa, &id, NULL, xa_limit_32b, GFP_KERNEL) != 0); + XA_BUG_ON(xa, id != 5); + + xa_for_each(xa, index, entry) { + xa_erase_index(xa, index); + } + + for (i = base; i < base + 9; i++) { + XA_BUG_ON(xa, xa_erase(xa, i) != NULL); + XA_BUG_ON(xa, xa_empty(xa)); + } + XA_BUG_ON(xa, xa_erase(xa, 8) != NULL); + XA_BUG_ON(xa, xa_empty(xa)); + XA_BUG_ON(xa, xa_erase(xa, base + 9) != NULL); + XA_BUG_ON(xa, !xa_empty(xa)); + + xa_destroy(xa); +} + +static noinline void check_xa_alloc_3(struct xarray *xa, unsigned int base) +{ + struct xa_limit limit = XA_LIMIT(1, 0x3fff); + u32 next = 0; + unsigned int i, id; + unsigned long index; + void *entry; + + XA_BUG_ON(xa, xa_alloc_cyclic(xa, &id, xa_mk_index(1), limit, + &next, GFP_KERNEL) != 0); + XA_BUG_ON(xa, id != 1); + + next = 0x3ffd; + XA_BUG_ON(xa, xa_alloc_cyclic(xa, &id, xa_mk_index(0x3ffd), limit, + &next, GFP_KERNEL) != 0); + XA_BUG_ON(xa, id != 0x3ffd); + xa_erase_index(xa, 0x3ffd); + xa_erase_index(xa, 1); + XA_BUG_ON(xa, !xa_empty(xa)); + + for (i = 0x3ffe; i < 0x4003; i++) { + if (i < 0x4000) + entry = xa_mk_index(i); + else + entry = xa_mk_index(i - 0x3fff); + XA_BUG_ON(xa, xa_alloc_cyclic(xa, &id, entry, limit, + &next, GFP_KERNEL) != (id == 1)); + XA_BUG_ON(xa, xa_mk_index(id) != entry); + } + + /* Check wrap-around is handled correctly */ + if (base != 0) + xa_erase_index(xa, base); + xa_erase_index(xa, base + 1); + next = UINT_MAX; + XA_BUG_ON(xa, xa_alloc_cyclic(xa, &id, xa_mk_index(UINT_MAX), + xa_limit_32b, &next, GFP_KERNEL) != 0); + XA_BUG_ON(xa, id != UINT_MAX); + XA_BUG_ON(xa, xa_alloc_cyclic(xa, &id, xa_mk_index(base), + xa_limit_32b, &next, GFP_KERNEL) != 1); + XA_BUG_ON(xa, id != base); + XA_BUG_ON(xa, xa_alloc_cyclic(xa, &id, xa_mk_index(base + 1), + xa_limit_32b, &next, GFP_KERNEL) != 0); + XA_BUG_ON(xa, id != base + 1); + + xa_for_each(xa, index, entry) + xa_erase_index(xa, index); + + XA_BUG_ON(xa, !xa_empty(xa)); +} + +static DEFINE_XARRAY_ALLOC(xa0); +static DEFINE_XARRAY_ALLOC1(xa1); + +static noinline void check_xa_alloc(void) +{ + check_xa_alloc_1(&xa0, 0); + check_xa_alloc_1(&xa1, 1); + check_xa_alloc_2(&xa0, 0); + check_xa_alloc_2(&xa1, 1); + check_xa_alloc_3(&xa0, 0); + check_xa_alloc_3(&xa1, 1); } static noinline void __check_store_iter(struct xarray *xa, unsigned long start, @@ -1194,9 +1346,8 @@ static void check_align_1(struct xarray *xa, char *name) void *entry; for (i = 0; i < 8; i++) { - id = 0; - XA_BUG_ON(xa, xa_alloc(xa, &id, UINT_MAX, name + i, GFP_KERNEL) - != 0); + XA_BUG_ON(xa, xa_alloc(xa, &id, name + i, xa_limit_32b, + GFP_KERNEL) != 0); XA_BUG_ON(xa, id != i); } xa_for_each(xa, index, entry) @@ -1204,6 +1355,30 @@ static void check_align_1(struct xarray *xa, char *name) xa_destroy(xa); } +/* + * We should always be able to store without allocating memory after + * reserving a slot. + */ +static void check_align_2(struct xarray *xa, char *name) +{ + int i; + + XA_BUG_ON(xa, !xa_empty(xa)); + + for (i = 0; i < 8; i++) { + XA_BUG_ON(xa, xa_store(xa, 0, name + i, GFP_KERNEL) != NULL); + xa_erase(xa, 0); + } + + for (i = 0; i < 8; i++) { + XA_BUG_ON(xa, xa_reserve(xa, 0, GFP_KERNEL) != 0); + XA_BUG_ON(xa, xa_store(xa, 0, name + i, 0) != NULL); + xa_erase(xa, 0); + } + + XA_BUG_ON(xa, !xa_empty(xa)); +} + static noinline void check_align(struct xarray *xa) { char name[] = "Motorola 68000"; @@ -1212,7 +1387,7 @@ static noinline void check_align(struct xarray *xa) check_align_1(xa, name + 1); check_align_1(xa, name + 2); check_align_1(xa, name + 3); -// check_align_2(xa, name); + check_align_2(xa, name); } static LIST_HEAD(shadow_nodes); @@ -1354,6 +1529,7 @@ static int xarray_checks(void) check_xas_erase(&array); check_cmpxchg(&array); check_reserve(&array); + check_reserve(&xa0); check_multi_store(&array); check_xa_alloc(); check_find(&array); diff --git a/lib/ubsan.c b/lib/ubsan.c index e4162f59a81c..ecc179338094 100644 --- a/lib/ubsan.c +++ b/lib/ubsan.c @@ -17,6 +17,7 @@ #include <linux/kernel.h> #include <linux/types.h> #include <linux/sched.h> +#include <linux/uaccess.h> #include "ubsan.h" @@ -86,11 +87,13 @@ static bool is_inline_int(struct type_descriptor *type) return bits <= inline_bits; } -static s_max get_signed_val(struct type_descriptor *type, unsigned long val) +static s_max get_signed_val(struct type_descriptor *type, void *val) { if (is_inline_int(type)) { unsigned extra_bits = sizeof(s_max)*8 - type_bit_width(type); - return ((s_max)val) << extra_bits >> extra_bits; + unsigned long ulong_val = (unsigned long)val; + + return ((s_max)ulong_val) << extra_bits >> extra_bits; } if (type_bit_width(type) == 64) @@ -99,15 +102,15 @@ static s_max get_signed_val(struct type_descriptor *type, unsigned long val) return *(s_max *)val; } -static bool val_is_negative(struct type_descriptor *type, unsigned long val) +static bool val_is_negative(struct type_descriptor *type, void *val) { return type_is_signed(type) && get_signed_val(type, val) < 0; } -static u_max get_unsigned_val(struct type_descriptor *type, unsigned long val) +static u_max get_unsigned_val(struct type_descriptor *type, void *val) { if (is_inline_int(type)) - return val; + return (unsigned long)val; if (type_bit_width(type) == 64) return *(u64 *)val; @@ -116,7 +119,7 @@ static u_max get_unsigned_val(struct type_descriptor *type, unsigned long val) } static void val_to_string(char *str, size_t size, struct type_descriptor *type, - unsigned long value) + void *value) { if (type_is_int(type)) { if (type_bit_width(type) == 128) { @@ -163,8 +166,8 @@ static void ubsan_epilogue(unsigned long *flags) current->in_ubsan--; } -static void handle_overflow(struct overflow_data *data, unsigned long lhs, - unsigned long rhs, char op) +static void handle_overflow(struct overflow_data *data, void *lhs, + void *rhs, char op) { struct type_descriptor *type = data->type; @@ -191,8 +194,7 @@ static void handle_overflow(struct overflow_data *data, unsigned long lhs, } void __ubsan_handle_add_overflow(struct overflow_data *data, - unsigned long lhs, - unsigned long rhs) + void *lhs, void *rhs) { handle_overflow(data, lhs, rhs, '+'); @@ -200,23 +202,21 @@ void __ubsan_handle_add_overflow(struct overflow_data *data, EXPORT_SYMBOL(__ubsan_handle_add_overflow); void __ubsan_handle_sub_overflow(struct overflow_data *data, - unsigned long lhs, - unsigned long rhs) + void *lhs, void *rhs) { handle_overflow(data, lhs, rhs, '-'); } EXPORT_SYMBOL(__ubsan_handle_sub_overflow); void __ubsan_handle_mul_overflow(struct overflow_data *data, - unsigned long lhs, - unsigned long rhs) + void *lhs, void *rhs) { handle_overflow(data, lhs, rhs, '*'); } EXPORT_SYMBOL(__ubsan_handle_mul_overflow); void __ubsan_handle_negate_overflow(struct overflow_data *data, - unsigned long old_val) + void *old_val) { unsigned long flags; char old_val_str[VALUE_LENGTH]; @@ -237,8 +237,7 @@ EXPORT_SYMBOL(__ubsan_handle_negate_overflow); void __ubsan_handle_divrem_overflow(struct overflow_data *data, - unsigned long lhs, - unsigned long rhs) + void *lhs, void *rhs) { unsigned long flags; char rhs_val_str[VALUE_LENGTH]; @@ -313,6 +312,7 @@ static void handle_object_size_mismatch(struct type_mismatch_data_common *data, static void ubsan_type_mismatch_common(struct type_mismatch_data_common *data, unsigned long ptr) { + unsigned long flags = user_access_save(); if (!ptr) handle_null_ptr_deref(data); @@ -320,10 +320,12 @@ static void ubsan_type_mismatch_common(struct type_mismatch_data_common *data, handle_misaligned_access(data, ptr); else handle_object_size_mismatch(data, ptr); + + user_access_restore(flags); } void __ubsan_handle_type_mismatch(struct type_mismatch_data *data, - unsigned long ptr) + void *ptr) { struct type_mismatch_data_common common_data = { .location = &data->location, @@ -332,12 +334,12 @@ void __ubsan_handle_type_mismatch(struct type_mismatch_data *data, .type_check_kind = data->type_check_kind }; - ubsan_type_mismatch_common(&common_data, ptr); + ubsan_type_mismatch_common(&common_data, (unsigned long)ptr); } EXPORT_SYMBOL(__ubsan_handle_type_mismatch); void __ubsan_handle_type_mismatch_v1(struct type_mismatch_data_v1 *data, - unsigned long ptr) + void *ptr) { struct type_mismatch_data_common common_data = { @@ -347,30 +349,11 @@ void __ubsan_handle_type_mismatch_v1(struct type_mismatch_data_v1 *data, .type_check_kind = data->type_check_kind }; - ubsan_type_mismatch_common(&common_data, ptr); + ubsan_type_mismatch_common(&common_data, (unsigned long)ptr); } EXPORT_SYMBOL(__ubsan_handle_type_mismatch_v1); -void __ubsan_handle_vla_bound_not_positive(struct vla_bound_data *data, - unsigned long bound) -{ - unsigned long flags; - char bound_str[VALUE_LENGTH]; - - if (suppress_report(&data->location)) - return; - - ubsan_prologue(&data->location, &flags); - - val_to_string(bound_str, sizeof(bound_str), data->type, bound); - pr_err("variable length array bound value %s <= 0\n", bound_str); - - ubsan_epilogue(&flags); -} -EXPORT_SYMBOL(__ubsan_handle_vla_bound_not_positive); - -void __ubsan_handle_out_of_bounds(struct out_of_bounds_data *data, - unsigned long index) +void __ubsan_handle_out_of_bounds(struct out_of_bounds_data *data, void *index) { unsigned long flags; char index_str[VALUE_LENGTH]; @@ -388,7 +371,7 @@ void __ubsan_handle_out_of_bounds(struct out_of_bounds_data *data, EXPORT_SYMBOL(__ubsan_handle_out_of_bounds); void __ubsan_handle_shift_out_of_bounds(struct shift_out_of_bounds_data *data, - unsigned long lhs, unsigned long rhs) + void *lhs, void *rhs) { unsigned long flags; struct type_descriptor *rhs_type = data->rhs_type; @@ -439,7 +422,7 @@ void __ubsan_handle_builtin_unreachable(struct unreachable_data *data) EXPORT_SYMBOL(__ubsan_handle_builtin_unreachable); void __ubsan_handle_load_invalid_value(struct invalid_value_data *data, - unsigned long val) + void *val) { unsigned long flags; char val_str[VALUE_LENGTH]; diff --git a/lib/ubsan.h b/lib/ubsan.h index f4d8d0bd4016..b8fa83864467 100644 --- a/lib/ubsan.h +++ b/lib/ubsan.h @@ -57,11 +57,6 @@ struct nonnull_arg_data { int arg_index; }; -struct vla_bound_data { - struct source_location location; - struct type_descriptor *type; -}; - struct out_of_bounds_data { struct source_location location; struct type_descriptor *array_type; diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 3add92329bae..2f003cfe340e 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -17,6 +17,7 @@ */ #include <stdarg.h> +#include <linux/build_bug.h> #include <linux/clk.h> #include <linux/clk-provider.h> #include <linux/module.h> /* for KSYM_SYMBOL_LEN */ @@ -405,6 +406,8 @@ struct printf_spec { unsigned int base:8; /* number base, 8, 10 or 16 only */ signed int precision:16; /* # of digits/chars */ } __packed; +static_assert(sizeof(struct printf_spec) == 8); + #define FIELD_WIDTH_MAX ((1 << 23) - 1) #define PRECISION_MAX ((1 << 15) - 1) @@ -422,8 +425,6 @@ char *number(char *buf, char *end, unsigned long long num, int field_width = spec.field_width; int precision = spec.precision; - BUILD_BUG_ON(sizeof(struct printf_spec) != 8); - /* locase = 0 or 0x20. ORing digits or letters with 'locase' * produces same digits or (maybe lowercased) letters */ locase = (spec.flags & SMALL); @@ -592,15 +593,13 @@ char *widen_string(char *buf, int n, char *end, struct printf_spec spec) return buf; } -static noinline_for_stack -char *string(char *buf, char *end, const char *s, struct printf_spec spec) +/* Handle string from a well known address. */ +static char *string_nocheck(char *buf, char *end, const char *s, + struct printf_spec spec) { int len = 0; size_t lim = spec.precision; - if ((unsigned long)s < PAGE_SIZE) - s = "(null)"; - while (lim--) { char c = *s++; if (!c) @@ -613,9 +612,64 @@ char *string(char *buf, char *end, const char *s, struct printf_spec spec) return widen_string(buf, len, end, spec); } +/* Be careful: error messages must fit into the given buffer. */ +static char *error_string(char *buf, char *end, const char *s, + struct printf_spec spec) +{ + /* + * Hard limit to avoid a completely insane messages. It actually + * works pretty well because most error messages are in + * the many pointer format modifiers. + */ + if (spec.precision == -1) + spec.precision = 2 * sizeof(void *); + + return string_nocheck(buf, end, s, spec); +} + +/* + * Do not call any complex external code here. Nested printk()/vsprintf() + * might cause infinite loops. Failures might break printk() and would + * be hard to debug. + */ +static const char *check_pointer_msg(const void *ptr) +{ + if (!ptr) + return "(null)"; + + if ((unsigned long)ptr < PAGE_SIZE || IS_ERR_VALUE(ptr)) + return "(efault)"; + + return NULL; +} + +static int check_pointer(char **buf, char *end, const void *ptr, + struct printf_spec spec) +{ + const char *err_msg; + + err_msg = check_pointer_msg(ptr); + if (err_msg) { + *buf = error_string(*buf, end, err_msg, spec); + return -EFAULT; + } + + return 0; +} + static noinline_for_stack -char *pointer_string(char *buf, char *end, const void *ptr, - struct printf_spec spec) +char *string(char *buf, char *end, const char *s, + struct printf_spec spec) +{ + if (check_pointer(&buf, end, s, spec)) + return buf; + + return string_nocheck(buf, end, s, spec); +} + +static char *pointer_string(char *buf, char *end, + const void *ptr, + struct printf_spec spec) { spec.base = 16; spec.flags |= SMALL; @@ -700,7 +754,7 @@ static char *ptr_to_id(char *buf, char *end, const void *ptr, if (static_branch_unlikely(¬_filled_random_ptr_key)) { spec.field_width = 2 * sizeof(ptr); /* string length must be less than default_width */ - return string(buf, end, str, spec); + return error_string(buf, end, str, spec); } #ifdef CONFIG_64BIT @@ -716,6 +770,55 @@ static char *ptr_to_id(char *buf, char *end, const void *ptr, return pointer_string(buf, end, (const void *)hashval, spec); } +int kptr_restrict __read_mostly; + +static noinline_for_stack +char *restricted_pointer(char *buf, char *end, const void *ptr, + struct printf_spec spec) +{ + switch (kptr_restrict) { + case 0: + /* Handle as %p, hash and do _not_ leak addresses. */ + return ptr_to_id(buf, end, ptr, spec); + case 1: { + const struct cred *cred; + + /* + * kptr_restrict==1 cannot be used in IRQ context + * because its test for CAP_SYSLOG would be meaningless. + */ + if (in_irq() || in_serving_softirq() || in_nmi()) { + if (spec.field_width == -1) + spec.field_width = 2 * sizeof(ptr); + return error_string(buf, end, "pK-error", spec); + } + + /* + * Only print the real pointer value if the current + * process has CAP_SYSLOG and is running with the + * same credentials it started with. This is because + * access to files is checked at open() time, but %pK + * checks permission at read() time. We don't want to + * leak pointer values if a binary opens a file using + * %pK and then elevates privileges before reading it. + */ + cred = current_cred(); + if (!has_capability_noaudit(current, CAP_SYSLOG) || + !uid_eq(cred->euid, cred->uid) || + !gid_eq(cred->egid, cred->gid)) + ptr = NULL; + break; + } + case 2: + default: + /* Always print 0's for %pK */ + ptr = NULL; + break; + } + + return pointer_string(buf, end, ptr, spec); +} + static noinline_for_stack char *dentry_name(char *buf, char *end, const struct dentry *d, struct printf_spec spec, const char *fmt) @@ -735,6 +838,11 @@ char *dentry_name(char *buf, char *end, const struct dentry *d, struct printf_sp rcu_read_lock(); for (i = 0; i < depth; i++, d = p) { + if (check_pointer(&buf, end, d, spec)) { + rcu_read_unlock(); + return buf; + } + p = READ_ONCE(d->d_parent); array[i] = READ_ONCE(d->d_name.name); if (p == d) { @@ -765,8 +873,12 @@ static noinline_for_stack char *bdev_name(char *buf, char *end, struct block_device *bdev, struct printf_spec spec, const char *fmt) { - struct gendisk *hd = bdev->bd_disk; - + struct gendisk *hd; + + if (check_pointer(&buf, end, bdev, spec)) + return buf; + + hd = bdev->bd_disk; buf = string(buf, end, hd->disk_name, spec); if (bdev->bd_part->partno) { if (isdigit(hd->disk_name[strlen(hd->disk_name)-1])) { @@ -801,7 +913,7 @@ char *symbol_string(char *buf, char *end, void *ptr, else sprint_symbol_no_offset(sym, value); - return string(buf, end, sym, spec); + return string_nocheck(buf, end, sym, spec); #else return special_hex_number(buf, end, value, sizeof(void *)); #endif @@ -885,29 +997,32 @@ char *resource_string(char *buf, char *end, struct resource *res, int decode = (fmt[0] == 'R') ? 1 : 0; const struct printf_spec *specp; + if (check_pointer(&buf, end, res, spec)) + return buf; + *p++ = '['; if (res->flags & IORESOURCE_IO) { - p = string(p, pend, "io ", str_spec); + p = string_nocheck(p, pend, "io ", str_spec); specp = &io_spec; } else if (res->flags & IORESOURCE_MEM) { - p = string(p, pend, "mem ", str_spec); + p = string_nocheck(p, pend, "mem ", str_spec); specp = &mem_spec; } else if (res->flags & IORESOURCE_IRQ) { - p = string(p, pend, "irq ", str_spec); + p = string_nocheck(p, pend, "irq ", str_spec); specp = &default_dec_spec; } else if (res->flags & IORESOURCE_DMA) { - p = string(p, pend, "dma ", str_spec); + p = string_nocheck(p, pend, "dma ", str_spec); specp = &default_dec_spec; } else if (res->flags & IORESOURCE_BUS) { - p = string(p, pend, "bus ", str_spec); + p = string_nocheck(p, pend, "bus ", str_spec); specp = &bus_spec; } else { - p = string(p, pend, "??? ", str_spec); + p = string_nocheck(p, pend, "??? ", str_spec); specp = &mem_spec; decode = 0; } if (decode && res->flags & IORESOURCE_UNSET) { - p = string(p, pend, "size ", str_spec); + p = string_nocheck(p, pend, "size ", str_spec); p = number(p, pend, resource_size(res), *specp); } else { p = number(p, pend, res->start, *specp); @@ -918,21 +1033,21 @@ char *resource_string(char *buf, char *end, struct resource *res, } if (decode) { if (res->flags & IORESOURCE_MEM_64) - p = string(p, pend, " 64bit", str_spec); + p = string_nocheck(p, pend, " 64bit", str_spec); if (res->flags & IORESOURCE_PREFETCH) - p = string(p, pend, " pref", str_spec); + p = string_nocheck(p, pend, " pref", str_spec); if (res->flags & IORESOURCE_WINDOW) - p = string(p, pend, " window", str_spec); + p = string_nocheck(p, pend, " window", str_spec); if (res->flags & IORESOURCE_DISABLED) - p = string(p, pend, " disabled", str_spec); + p = string_nocheck(p, pend, " disabled", str_spec); } else { - p = string(p, pend, " flags ", str_spec); + p = string_nocheck(p, pend, " flags ", str_spec); p = number(p, pend, res->flags, default_flag_spec); } *p++ = ']'; *p = '\0'; - return string(buf, end, sym, spec); + return string_nocheck(buf, end, sym, spec); } static noinline_for_stack @@ -947,9 +1062,8 @@ char *hex_string(char *buf, char *end, u8 *addr, struct printf_spec spec, /* nothing to print */ return buf; - if (ZERO_OR_NULL_PTR(addr)) - /* NULL pointer */ - return string(buf, end, NULL, spec); + if (check_pointer(&buf, end, addr, spec)) + return buf; switch (fmt[1]) { case 'C': @@ -996,6 +1110,9 @@ char *bitmap_string(char *buf, char *end, unsigned long *bitmap, int i, chunksz; bool first = true; + if (check_pointer(&buf, end, bitmap, spec)) + return buf; + /* reused to print numbers */ spec = (struct printf_spec){ .flags = SMALL | ZEROPAD, .base = 16 }; @@ -1037,6 +1154,9 @@ char *bitmap_list_string(char *buf, char *end, unsigned long *bitmap, int cur, rbot, rtop; bool first = true; + if (check_pointer(&buf, end, bitmap, spec)) + return buf; + rbot = cur = find_first_bit(bitmap, nr_bits); while (cur < nr_bits) { rtop = cur; @@ -1075,6 +1195,9 @@ char *mac_address_string(char *buf, char *end, u8 *addr, char separator; bool reversed = false; + if (check_pointer(&buf, end, addr, spec)) + return buf; + switch (fmt[1]) { case 'F': separator = '-'; @@ -1100,7 +1223,7 @@ char *mac_address_string(char *buf, char *end, u8 *addr, } *p = '\0'; - return string(buf, end, mac_addr, spec); + return string_nocheck(buf, end, mac_addr, spec); } static noinline_for_stack @@ -1263,7 +1386,7 @@ char *ip6_addr_string(char *buf, char *end, const u8 *addr, else ip6_string(ip6_addr, addr, fmt); - return string(buf, end, ip6_addr, spec); + return string_nocheck(buf, end, ip6_addr, spec); } static noinline_for_stack @@ -1274,7 +1397,7 @@ char *ip4_addr_string(char *buf, char *end, const u8 *addr, ip4_string(ip4_addr, addr, fmt); - return string(buf, end, ip4_addr, spec); + return string_nocheck(buf, end, ip4_addr, spec); } static noinline_for_stack @@ -1336,7 +1459,7 @@ char *ip6_addr_string_sa(char *buf, char *end, const struct sockaddr_in6 *sa, } *p = '\0'; - return string(buf, end, ip6_addr, spec); + return string_nocheck(buf, end, ip6_addr, spec); } static noinline_for_stack @@ -1371,7 +1494,42 @@ char *ip4_addr_string_sa(char *buf, char *end, const struct sockaddr_in *sa, } *p = '\0'; - return string(buf, end, ip4_addr, spec); + return string_nocheck(buf, end, ip4_addr, spec); +} + +static noinline_for_stack +char *ip_addr_string(char *buf, char *end, const void *ptr, + struct printf_spec spec, const char *fmt) +{ + char *err_fmt_msg; + + if (check_pointer(&buf, end, ptr, spec)) + return buf; + + switch (fmt[1]) { + case '6': + return ip6_addr_string(buf, end, ptr, spec, fmt); + case '4': + return ip4_addr_string(buf, end, ptr, spec, fmt); + case 'S': { + const union { + struct sockaddr raw; + struct sockaddr_in v4; + struct sockaddr_in6 v6; + } *sa = ptr; + + switch (sa->raw.sa_family) { + case AF_INET: + return ip4_addr_string_sa(buf, end, &sa->v4, spec, fmt); + case AF_INET6: + return ip6_addr_string_sa(buf, end, &sa->v6, spec, fmt); + default: + return error_string(buf, end, "(einval)", spec); + }} + } + + err_fmt_msg = fmt[0] == 'i' ? "(%pi?)" : "(%pI?)"; + return error_string(buf, end, err_fmt_msg, spec); } static noinline_for_stack @@ -1386,9 +1544,8 @@ char *escaped_string(char *buf, char *end, u8 *addr, struct printf_spec spec, if (spec.field_width == 0) return buf; /* nothing to print */ - if (ZERO_OR_NULL_PTR(addr)) - return string(buf, end, NULL, spec); /* NULL pointer */ - + if (check_pointer(&buf, end, addr, spec)) + return buf; do { switch (fmt[count++]) { @@ -1434,6 +1591,21 @@ char *escaped_string(char *buf, char *end, u8 *addr, struct printf_spec spec, return buf; } +static char *va_format(char *buf, char *end, struct va_format *va_fmt, + struct printf_spec spec, const char *fmt) +{ + va_list va; + + if (check_pointer(&buf, end, va_fmt, spec)) + return buf; + + va_copy(va, *va_fmt->va); + buf += vsnprintf(buf, end > buf ? end - buf : 0, va_fmt->fmt, va); + va_end(va); + + return buf; +} + static noinline_for_stack char *uuid_string(char *buf, char *end, const u8 *addr, struct printf_spec spec, const char *fmt) @@ -1444,6 +1616,9 @@ char *uuid_string(char *buf, char *end, const u8 *addr, const u8 *index = uuid_index; bool uc = false; + if (check_pointer(&buf, end, addr, spec)) + return buf; + switch (*(++fmt)) { case 'L': uc = true; /* fall-through */ @@ -1472,56 +1647,7 @@ char *uuid_string(char *buf, char *end, const u8 *addr, *p = 0; - return string(buf, end, uuid, spec); -} - -int kptr_restrict __read_mostly; - -static noinline_for_stack -char *restricted_pointer(char *buf, char *end, const void *ptr, - struct printf_spec spec) -{ - switch (kptr_restrict) { - case 0: - /* Always print %pK values */ - break; - case 1: { - const struct cred *cred; - - /* - * kptr_restrict==1 cannot be used in IRQ context - * because its test for CAP_SYSLOG would be meaningless. - */ - if (in_irq() || in_serving_softirq() || in_nmi()) { - if (spec.field_width == -1) - spec.field_width = 2 * sizeof(ptr); - return string(buf, end, "pK-error", spec); - } - - /* - * Only print the real pointer value if the current - * process has CAP_SYSLOG and is running with the - * same credentials it started with. This is because - * access to files is checked at open() time, but %pK - * checks permission at read() time. We don't want to - * leak pointer values if a binary opens a file using - * %pK and then elevates privileges before reading it. - */ - cred = current_cred(); - if (!has_capability_noaudit(current, CAP_SYSLOG) || - !uid_eq(cred->euid, cred->uid) || - !gid_eq(cred->egid, cred->gid)) - ptr = NULL; - break; - } - case 2: - default: - /* Always print 0's for %pK */ - ptr = NULL; - break; - } - - return pointer_string(buf, end, ptr, spec); + return string_nocheck(buf, end, uuid, spec); } static noinline_for_stack @@ -1531,24 +1657,31 @@ char *netdev_bits(char *buf, char *end, const void *addr, unsigned long long num; int size; + if (check_pointer(&buf, end, addr, spec)) + return buf; + switch (fmt[1]) { case 'F': num = *(const netdev_features_t *)addr; size = sizeof(netdev_features_t); break; default: - return ptr_to_id(buf, end, addr, spec); + return error_string(buf, end, "(%pN?)", spec); } return special_hex_number(buf, end, num, size); } static noinline_for_stack -char *address_val(char *buf, char *end, const void *addr, const char *fmt) +char *address_val(char *buf, char *end, const void *addr, + struct printf_spec spec, const char *fmt) { unsigned long long num; int size; + if (check_pointer(&buf, end, addr, spec)) + return buf; + switch (fmt[1]) { case 'd': num = *(const dma_addr_t *)addr; @@ -1600,12 +1733,16 @@ char *time_str(char *buf, char *end, const struct rtc_time *tm, bool r) } static noinline_for_stack -char *rtc_str(char *buf, char *end, const struct rtc_time *tm, const char *fmt) +char *rtc_str(char *buf, char *end, const struct rtc_time *tm, + struct printf_spec spec, const char *fmt) { bool have_t = true, have_d = true; bool raw = false; int count = 2; + if (check_pointer(&buf, end, tm, spec)) + return buf; + switch (fmt[count]) { case 'd': have_t = false; @@ -1639,9 +1776,9 @@ char *time_and_date(char *buf, char *end, void *ptr, struct printf_spec spec, { switch (fmt[1]) { case 'R': - return rtc_str(buf, end, (const struct rtc_time *)ptr, fmt); + return rtc_str(buf, end, (const struct rtc_time *)ptr, spec, fmt); default: - return ptr_to_id(buf, end, ptr, spec); + return error_string(buf, end, "(%ptR?)", spec); } } @@ -1649,8 +1786,11 @@ static noinline_for_stack char *clock(char *buf, char *end, struct clk *clk, struct printf_spec spec, const char *fmt) { - if (!IS_ENABLED(CONFIG_HAVE_CLK) || !clk) - return string(buf, end, NULL, spec); + if (!IS_ENABLED(CONFIG_HAVE_CLK)) + return error_string(buf, end, "(%pC?)", spec); + + if (check_pointer(&buf, end, clk, spec)) + return buf; switch (fmt[1]) { case 'n': @@ -1658,7 +1798,7 @@ char *clock(char *buf, char *end, struct clk *clk, struct printf_spec spec, #ifdef CONFIG_COMMON_CLK return string(buf, end, __clk_get_name(clk), spec); #else - return ptr_to_id(buf, end, clk, spec); + return error_string(buf, end, "(%pC?)", spec); #endif } } @@ -1691,11 +1831,15 @@ char *format_flags(char *buf, char *end, unsigned long flags, } static noinline_for_stack -char *flags_string(char *buf, char *end, void *flags_ptr, const char *fmt) +char *flags_string(char *buf, char *end, void *flags_ptr, + struct printf_spec spec, const char *fmt) { unsigned long flags; const struct trace_print_flags *names; + if (check_pointer(&buf, end, flags_ptr, spec)) + return buf; + switch (fmt[1]) { case 'p': flags = *(unsigned long *)flags_ptr; @@ -1712,8 +1856,7 @@ char *flags_string(char *buf, char *end, void *flags_ptr, const char *fmt) names = gfpflag_names; break; default: - WARN_ONCE(1, "Unsupported flags modifier: %c\n", fmt[1]); - return buf; + return error_string(buf, end, "(%pG?)", spec); } return format_flags(buf, end, flags, names); @@ -1735,13 +1878,13 @@ char *device_node_gen_full_name(const struct device_node *np, char *buf, char *e /* special case for root node */ if (!parent) - return string(buf, end, "/", default_str_spec); + return string_nocheck(buf, end, "/", default_str_spec); for (depth = 0; parent->parent; depth++) parent = parent->parent; for ( ; depth >= 0; depth--) { - buf = string(buf, end, "/", default_str_spec); + buf = string_nocheck(buf, end, "/", default_str_spec); buf = string(buf, end, device_node_name_for_depth(np, depth), default_str_spec); } @@ -1769,10 +1912,10 @@ char *device_node_string(char *buf, char *end, struct device_node *dn, str_spec.field_width = -1; if (!IS_ENABLED(CONFIG_OF)) - return string(buf, end, "(!OF)", spec); + return error_string(buf, end, "(%pOF?)", spec); - if ((unsigned long)dn < PAGE_SIZE) - return string(buf, end, "(null)", spec); + if (check_pointer(&buf, end, dn, spec)) + return buf; /* simple case without anything any more format specifiers */ fmt++; @@ -1813,7 +1956,7 @@ char *device_node_string(char *buf, char *end, struct device_node *dn, tbuf[2] = of_node_check_flag(dn, OF_POPULATED) ? 'P' : '-'; tbuf[3] = of_node_check_flag(dn, OF_POPULATED_BUS) ? 'B' : '-'; tbuf[4] = 0; - buf = string(buf, end, tbuf, str_spec); + buf = string_nocheck(buf, end, tbuf, str_spec); break; case 'c': /* major compatible string */ ret = of_property_read_string(dn, "compatible", &p); @@ -1824,10 +1967,10 @@ char *device_node_string(char *buf, char *end, struct device_node *dn, has_mult = false; of_property_for_each_string(dn, "compatible", prop, p) { if (has_mult) - buf = string(buf, end, ",", str_spec); - buf = string(buf, end, "\"", str_spec); + buf = string_nocheck(buf, end, ",", str_spec); + buf = string_nocheck(buf, end, "\"", str_spec); buf = string(buf, end, p, str_spec); - buf = string(buf, end, "\"", str_spec); + buf = string_nocheck(buf, end, "\"", str_spec); has_mult = true; } @@ -1840,6 +1983,17 @@ char *device_node_string(char *buf, char *end, struct device_node *dn, return widen_string(buf, buf - buf_start, end, spec); } +static char *kobject_string(char *buf, char *end, void *ptr, + struct printf_spec spec, const char *fmt) +{ + switch (fmt[1]) { + case 'F': + return device_node_string(buf, end, ptr, spec, fmt + 1); + } + + return error_string(buf, end, "(%pO?)", spec); +} + /* * Show a '%p' thing. A kernel extension is that the '%p' is followed * by an extra set of alphanumeric characters that are extended format @@ -1930,7 +2084,6 @@ char *device_node_string(char *buf, char *end, struct device_node *dn, * (legacy clock framework) of the clock * - 'Cn' For a clock, it prints the name (Common Clock Framework) or address * (legacy clock framework) of the clock - * - 'Cr' For a clock, it prints the current rate of the clock * - 'G' For flags to be printed as a collection of symbolic strings that would * construct the specific value. Supported flags given by option: * p page flags (see struct page) given as pointer to unsigned long @@ -1957,18 +2110,6 @@ static noinline_for_stack char *pointer(const char *fmt, char *buf, char *end, void *ptr, struct printf_spec spec) { - const int default_width = 2 * sizeof(void *); - - if (!ptr && *fmt != 'K' && *fmt != 'x') { - /* - * Print (null) with the same width as a pointer so it makes - * tabular output look nice. - */ - if (spec.field_width == -1) - spec.field_width = default_width; - return string(buf, end, "(null)", spec); - } - switch (*fmt) { case 'F': case 'f': @@ -2004,50 +2145,19 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, * 4: 001.002.003.004 * 6: 000102...0f */ - switch (fmt[1]) { - case '6': - return ip6_addr_string(buf, end, ptr, spec, fmt); - case '4': - return ip4_addr_string(buf, end, ptr, spec, fmt); - case 'S': { - const union { - struct sockaddr raw; - struct sockaddr_in v4; - struct sockaddr_in6 v6; - } *sa = ptr; - - switch (sa->raw.sa_family) { - case AF_INET: - return ip4_addr_string_sa(buf, end, &sa->v4, spec, fmt); - case AF_INET6: - return ip6_addr_string_sa(buf, end, &sa->v6, spec, fmt); - default: - return string(buf, end, "(invalid address)", spec); - }} - } - break; + return ip_addr_string(buf, end, ptr, spec, fmt); case 'E': return escaped_string(buf, end, ptr, spec, fmt); case 'U': return uuid_string(buf, end, ptr, spec, fmt); case 'V': - { - va_list va; - - va_copy(va, *((struct va_format *)ptr)->va); - buf += vsnprintf(buf, end > buf ? end - buf : 0, - ((struct va_format *)ptr)->fmt, va); - va_end(va); - return buf; - } + return va_format(buf, end, ptr, spec, fmt); case 'K': - if (!kptr_restrict) - break; return restricted_pointer(buf, end, ptr, spec); case 'N': return netdev_bits(buf, end, ptr, spec, fmt); case 'a': - return address_val(buf, end, ptr, fmt); + return address_val(buf, end, ptr, spec, fmt); case 'd': return dentry_name(buf, end, ptr, spec, fmt); case 't': @@ -2064,13 +2174,9 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, #endif case 'G': - return flags_string(buf, end, ptr, fmt); + return flags_string(buf, end, ptr, spec, fmt); case 'O': - switch (fmt[1]) { - case 'F': - return device_node_string(buf, end, ptr, spec, fmt + 1); - } - break; + return kobject_string(buf, end, ptr, spec, fmt); case 'x': return pointer_string(buf, end, ptr, spec); } @@ -2685,11 +2791,13 @@ int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args) case FORMAT_TYPE_STR: { const char *save_str = va_arg(args, char *); + const char *err_msg; size_t len; - if ((unsigned long)save_str > (unsigned long)-PAGE_SIZE - || (unsigned long)save_str < PAGE_SIZE) - save_str = "(null)"; + err_msg = check_pointer_msg(save_str); + if (err_msg) + save_str = err_msg; + len = strlen(save_str) + 1; if (str + len < end) memcpy(str, save_str, len); diff --git a/lib/xarray.c b/lib/xarray.c index 81c3171ddde9..6be3acbb861f 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -57,6 +57,11 @@ static inline bool xa_track_free(const struct xarray *xa) return xa->xa_flags & XA_FLAGS_TRACK_FREE; } +static inline bool xa_zero_busy(const struct xarray *xa) +{ + return xa->xa_flags & XA_FLAGS_ZERO_BUSY; +} + static inline void xa_mark_set(struct xarray *xa, xa_mark_t mark) { if (!(xa->xa_flags & XA_FLAGS_MARK(mark))) @@ -432,6 +437,8 @@ static void xas_shrink(struct xa_state *xas) break; if (!xa_is_node(entry) && node->shift) break; + if (xa_is_zero(entry) && xa_zero_busy(xa)) + entry = NULL; xas->xa_node = XAS_BOUNDS; RCU_INIT_POINTER(xa->xa_head, entry); @@ -628,6 +635,8 @@ static void *xas_create(struct xa_state *xas, bool allow_root) if (xas_top(node)) { entry = xa_head_locked(xa); xas->xa_node = NULL; + if (!entry && xa_zero_busy(xa)) + entry = XA_ZERO_ENTRY; shift = xas_expand(xas, entry); if (shift < 0) return NULL; @@ -758,10 +767,12 @@ void *xas_store(struct xa_state *xas, void *entry) void *first, *next; bool value = xa_is_value(entry); - if (entry) - first = xas_create(xas, !xa_is_node(entry)); - else + if (entry) { + bool allow_root = !xa_is_node(entry) && !xa_is_zero(entry); + first = xas_create(xas, allow_root); + } else { first = xas_load(xas); + } if (xas_invalid(xas)) return first; @@ -791,7 +802,7 @@ void *xas_store(struct xa_state *xas, void *entry) * entry is set to NULL. */ rcu_assign_pointer(*slot, entry); - if (xa_is_node(next)) + if (xa_is_node(next) && (!node || node->shift)) xas_free_nodes(xas, xa_to_node(next)); if (!node) break; @@ -1294,13 +1305,12 @@ static void *xas_result(struct xa_state *xas, void *curr) * @xa: XArray. * @index: Index into array. * - * If the entry at this index is a multi-index entry then all indices will - * be erased, and the entry will no longer be a multi-index entry. - * This function expects the xa_lock to be held on entry. + * After this function returns, loading from @index will return %NULL. + * If the index is part of a multi-index entry, all indices will be erased + * and none of the entries will be part of a multi-index entry. * - * Context: Any context. Expects xa_lock to be held on entry. May - * release and reacquire xa_lock if @gfp flags permit. - * Return: The old entry at this index. + * Context: Any context. Expects xa_lock to be held on entry. + * Return: The entry which used to be at this index. */ void *__xa_erase(struct xarray *xa, unsigned long index) { @@ -1314,9 +1324,9 @@ EXPORT_SYMBOL(__xa_erase); * @xa: XArray. * @index: Index of entry. * - * This function is the equivalent of calling xa_store() with %NULL as - * the third argument. The XArray does not need to allocate memory, so - * the user does not need to provide GFP flags. + * After this function returns, loading from @index will return %NULL. + * If the index is part of a multi-index entry, all indices will be erased + * and none of the entries will be part of a multi-index entry. * * Context: Any context. Takes and releases the xa_lock. * Return: The entry which used to be at this index. @@ -1421,16 +1431,12 @@ void *__xa_cmpxchg(struct xarray *xa, unsigned long index, if (WARN_ON_ONCE(xa_is_advanced(entry))) return XA_ERROR(-EINVAL); - if (xa_track_free(xa) && !entry) - entry = XA_ZERO_ENTRY; do { curr = xas_load(&xas); - if (curr == XA_ZERO_ENTRY) - curr = NULL; if (curr == old) { xas_store(&xas, entry); - if (xa_track_free(xa)) + if (xa_track_free(xa) && entry && !curr) xas_clear_mark(&xas, XA_FREE_MARK); } } while (__xas_nomem(&xas, gfp)); @@ -1452,7 +1458,7 @@ EXPORT_SYMBOL(__xa_cmpxchg); * * Context: Any context. Expects xa_lock to be held on entry. May * release and reacquire xa_lock if @gfp flags permit. - * Return: 0 if the store succeeded. -EEXIST if another entry was present. + * Return: 0 if the store succeeded. -EBUSY if another entry was present. * -ENOMEM if memory could not be allocated. */ int __xa_insert(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp) @@ -1472,7 +1478,7 @@ int __xa_insert(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp) if (xa_track_free(xa)) xas_clear_mark(&xas, XA_FREE_MARK); } else { - xas_set_err(&xas, -EEXIST); + xas_set_err(&xas, -EBUSY); } } while (__xas_nomem(&xas, gfp)); @@ -1480,42 +1486,6 @@ int __xa_insert(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp) } EXPORT_SYMBOL(__xa_insert); -/** - * __xa_reserve() - Reserve this index in the XArray. - * @xa: XArray. - * @index: Index into array. - * @gfp: Memory allocation flags. - * - * Ensures there is somewhere to store an entry at @index in the array. - * If there is already something stored at @index, this function does - * nothing. If there was nothing there, the entry is marked as reserved. - * Loading from a reserved entry returns a %NULL pointer. - * - * If you do not use the entry that you have reserved, call xa_release() - * or xa_erase() to free any unnecessary memory. - * - * Context: Any context. Expects the xa_lock to be held on entry. May - * release the lock, sleep and reacquire the lock if the @gfp flags permit. - * Return: 0 if the reservation succeeded or -ENOMEM if it failed. - */ -int __xa_reserve(struct xarray *xa, unsigned long index, gfp_t gfp) -{ - XA_STATE(xas, xa, index); - void *curr; - - do { - curr = xas_load(&xas); - if (!curr) { - xas_store(&xas, XA_ZERO_ENTRY); - if (xa_track_free(xa)) - xas_clear_mark(&xas, XA_FREE_MARK); - } - } while (__xas_nomem(&xas, gfp)); - - return xas_error(&xas); -} -EXPORT_SYMBOL(__xa_reserve); - #ifdef CONFIG_XARRAY_MULTI static void xas_set_range(struct xa_state *xas, unsigned long first, unsigned long last) @@ -1607,23 +1577,23 @@ EXPORT_SYMBOL(xa_store_range); * __xa_alloc() - Find somewhere to store this entry in the XArray. * @xa: XArray. * @id: Pointer to ID. - * @max: Maximum ID to allocate (inclusive). + * @limit: Range for allocated ID. * @entry: New entry. * @gfp: Memory allocation flags. * - * Allocates an unused ID in the range specified by @id and @max. - * Updates the @id pointer with the index, then stores the entry at that - * index. A concurrent lookup will not see an uninitialised @id. + * Finds an empty entry in @xa between @limit.min and @limit.max, + * stores the index into the @id pointer, then stores the entry at + * that index. A concurrent lookup will not see an uninitialised @id. * * Context: Any context. Expects xa_lock to be held on entry. May * release and reacquire xa_lock if @gfp flags permit. - * Return: 0 on success, -ENOMEM if memory allocation fails or -ENOSPC if - * there is no more space in the XArray. + * Return: 0 on success, -ENOMEM if memory could not be allocated or + * -EBUSY if there are no free entries in @limit. */ -int __xa_alloc(struct xarray *xa, u32 *id, u32 max, void *entry, gfp_t gfp) +int __xa_alloc(struct xarray *xa, u32 *id, void *entry, + struct xa_limit limit, gfp_t gfp) { XA_STATE(xas, xa, 0); - int err; if (WARN_ON_ONCE(xa_is_advanced(entry))) return -EINVAL; @@ -1634,22 +1604,71 @@ int __xa_alloc(struct xarray *xa, u32 *id, u32 max, void *entry, gfp_t gfp) entry = XA_ZERO_ENTRY; do { - xas.xa_index = *id; - xas_find_marked(&xas, max, XA_FREE_MARK); + xas.xa_index = limit.min; + xas_find_marked(&xas, limit.max, XA_FREE_MARK); if (xas.xa_node == XAS_RESTART) - xas_set_err(&xas, -ENOSPC); + xas_set_err(&xas, -EBUSY); + else + *id = xas.xa_index; xas_store(&xas, entry); xas_clear_mark(&xas, XA_FREE_MARK); } while (__xas_nomem(&xas, gfp)); - err = xas_error(&xas); - if (!err) - *id = xas.xa_index; - return err; + return xas_error(&xas); } EXPORT_SYMBOL(__xa_alloc); /** + * __xa_alloc_cyclic() - Find somewhere to store this entry in the XArray. + * @xa: XArray. + * @id: Pointer to ID. + * @entry: New entry. + * @limit: Range of allocated ID. + * @next: Pointer to next ID to allocate. + * @gfp: Memory allocation flags. + * + * Finds an empty entry in @xa between @limit.min and @limit.max, + * stores the index into the @id pointer, then stores the entry at + * that index. A concurrent lookup will not see an uninitialised @id. + * The search for an empty entry will start at @next and will wrap + * around if necessary. + * + * Context: Any context. Expects xa_lock to be held on entry. May + * release and reacquire xa_lock if @gfp flags permit. + * Return: 0 if the allocation succeeded without wrapping. 1 if the + * allocation succeeded after wrapping, -ENOMEM if memory could not be + * allocated or -EBUSY if there are no free entries in @limit. + */ +int __xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry, + struct xa_limit limit, u32 *next, gfp_t gfp) +{ + u32 min = limit.min; + int ret; + + limit.min = max(min, *next); + ret = __xa_alloc(xa, id, entry, limit, gfp); + if ((xa->xa_flags & XA_FLAGS_ALLOC_WRAPPED) && ret == 0) { + xa->xa_flags &= ~XA_FLAGS_ALLOC_WRAPPED; + ret = 1; + } + + if (ret < 0 && limit.min > min) { + limit.min = min; + ret = __xa_alloc(xa, id, entry, limit, gfp); + if (ret == 0) + ret = 1; + } + + if (ret >= 0) { + *next = *id + 1; + if (*next == 0) + xa->xa_flags |= XA_FLAGS_ALLOC_WRAPPED; + } + return ret; +} +EXPORT_SYMBOL(__xa_alloc_cyclic); + +/** * __xa_set_mark() - Set this mark on this entry while locked. * @xa: XArray. * @index: Index of entry. @@ -1943,6 +1962,8 @@ void xa_destroy(struct xarray *xa) entry = xa_head_locked(xa); RCU_INIT_POINTER(xa->xa_head, NULL); xas_init_marks(&xas); + if (xa_zero_busy(xa)) + xa_mark_clear(xa, XA_FREE_MARK); /* lockdep checks we're still holding the lock in xas_free_nodes() */ if (xa_is_node(entry)) xas_free_nodes(&xas, xa_to_node(entry)); diff --git a/lib/zstd/bitstream.h b/lib/zstd/bitstream.h index a826b99e1d63..3a49784d5c61 100644 --- a/lib/zstd/bitstream.h +++ b/lib/zstd/bitstream.h @@ -259,10 +259,15 @@ ZSTD_STATIC size_t BIT_initDStream(BIT_DStream_t *bitD, const void *srcBuffer, s bitD->bitContainer = *(const BYTE *)(bitD->start); switch (srcSize) { case 7: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[6]) << (sizeof(bitD->bitContainer) * 8 - 16); + /* fall through */ case 6: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[5]) << (sizeof(bitD->bitContainer) * 8 - 24); + /* fall through */ case 5: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[4]) << (sizeof(bitD->bitContainer) * 8 - 32); + /* fall through */ case 4: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[3]) << 24; + /* fall through */ case 3: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[2]) << 16; + /* fall through */ case 2: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[1]) << 8; default:; } diff --git a/lib/zstd/compress.c b/lib/zstd/compress.c index f9166cf4f7a9..5e0b67003e55 100644 --- a/lib/zstd/compress.c +++ b/lib/zstd/compress.c @@ -3182,6 +3182,7 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream *zcs, void *dst, size_t * zcs->outBuffFlushedSize = 0; zcs->stage = zcss_flush; /* pass-through to flush stage */ } + /* fall through */ case zcss_flush: { size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize; diff --git a/lib/zstd/decompress.c b/lib/zstd/decompress.c index b17846725ca0..269ee9a796c1 100644 --- a/lib/zstd/decompress.c +++ b/lib/zstd/decompress.c @@ -1768,6 +1768,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, c return 0; } dctx->expected = 0; /* not necessary to copy more */ + /* fall through */ case ZSTDds_decodeFrameHeader: memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_prefix, src, dctx->expected); @@ -2375,7 +2376,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream *zds, ZSTD_outBuffer *output, ZSTD_inB } zds->stage = zdss_read; } - /* pass-through */ + /* fall through */ case zdss_read: { size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds->dctx); @@ -2404,6 +2405,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream *zds, ZSTD_outBuffer *output, ZSTD_inB zds->stage = zdss_load; /* pass-through */ } + /* fall through */ case zdss_load: { size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds->dctx); @@ -2436,6 +2438,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream *zds, ZSTD_outBuffer *output, ZSTD_inB /* pass-through */ } } + /* fall through */ case zdss_flush: { size_t const toFlushSize = zds->outEnd - zds->outStart; diff --git a/lib/zstd/huf_compress.c b/lib/zstd/huf_compress.c index 40055a7016e6..e727812d12aa 100644 --- a/lib/zstd/huf_compress.c +++ b/lib/zstd/huf_compress.c @@ -556,7 +556,9 @@ size_t HUF_compress1X_usingCTable(void *dst, size_t dstSize, const void *src, si n = srcSize & ~3; /* join to mod 4 */ switch (srcSize & 3) { case 3: HUF_encodeSymbol(&bitC, ip[n + 2], CTable); HUF_FLUSHBITS_2(&bitC); + /* fall through */ case 2: HUF_encodeSymbol(&bitC, ip[n + 1], CTable); HUF_FLUSHBITS_1(&bitC); + /* fall through */ case 1: HUF_encodeSymbol(&bitC, ip[n + 0], CTable); HUF_FLUSHBITS(&bitC); case 0: default:; |