From 0b6ec8c0a3708f0a54b75ee1200772ec5226ec49 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 27 Jan 2016 15:37:58 +0100 Subject: debugobjects: Allow bigger number of early boot objects On my bigger s390 systems I always get "Out of memory. ODEBUG disabled". Since the number of objects is needed at compile time, we can not change the size dynamically before the caches etc are available. Doubling the size seems to do the trick. Since it is init data it will be freed anyway, this should be ok. Signed-off-by: Christian Borntraeger Link: http://lkml.kernel.org/r/1453905478-13409-1-git-send-email-borntraeger@de.ibm.com Signed-off-by: Thomas Gleixner --- lib/debugobjects.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 547f7f923dbc..519b5a10fd70 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -21,7 +21,7 @@ #define ODEBUG_HASH_BITS 14 #define ODEBUG_HASH_SIZE (1 << ODEBUG_HASH_BITS) -#define ODEBUG_POOL_SIZE 512 +#define ODEBUG_POOL_SIZE 1024 #define ODEBUG_POOL_MIN_LEVEL 256 #define ODEBUG_CHUNK_SHIFT PAGE_SHIFT -- cgit From cd4d09ec6f6c12a2cc3db5b7d8876a325a53545b Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 26 Jan 2016 22:12:04 +0100 Subject: x86/cpufeature: Carve out X86_FEATURE_* Move them to a separate header and have the following dependency: x86/cpufeatures.h <- x86/processor.h <- x86/cpufeature.h This makes it easier to use the header in asm code and not include the whole cpufeature.h and add guards for asm. Suggested-by: H. Peter Anvin Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1453842730-28463-5-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- lib/atomic64_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c index d62de8bf022d..123481814320 100644 --- a/lib/atomic64_test.c +++ b/lib/atomic64_test.c @@ -17,7 +17,7 @@ #include #ifdef CONFIG_X86 -#include /* for boot_cpu_has below */ +#include /* for boot_cpu_has below */ #endif #define TEST(bit, op, c_op, val) \ -- cgit From 72676bb53f33fd0ef3a1484fc1ecfd306dc6ff40 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 2 Feb 2016 16:57:18 -0800 Subject: lib/test-string_helpers.c: fix and improve string_get_size() tests Recently added commit 564b026fbd0d ("string_helpers: fix precision loss for some inputs") fixed precision issues for string_get_size() and broke tests. Fix and improve them: test both STRING_UNITS_2 and STRING_UNITS_10 at a time, better failure reporting, test small an huge values. Fixes: 564b026fbd0d28e9 ("string_helpers: fix precision loss for some inputs") Signed-off-by: Vitaly Kuznetsov Cc: Andy Shevchenko Cc: Rasmus Villemoes Cc: James Bottomley Cc: James Bottomley Cc: "James E.J. Bottomley" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test-string_helpers.c | 67 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 49 insertions(+), 18 deletions(-) (limited to 'lib') diff --git a/lib/test-string_helpers.c b/lib/test-string_helpers.c index 98866a770770..25b5cbfb7615 100644 --- a/lib/test-string_helpers.c +++ b/lib/test-string_helpers.c @@ -327,36 +327,67 @@ out: } #define string_get_size_maxbuf 16 -#define test_string_get_size_one(size, blk_size, units, exp_result) \ +#define test_string_get_size_one(size, blk_size, exp_result10, exp_result2) \ do { \ - BUILD_BUG_ON(sizeof(exp_result) >= string_get_size_maxbuf); \ - __test_string_get_size((size), (blk_size), (units), \ - (exp_result)); \ + BUILD_BUG_ON(sizeof(exp_result10) >= string_get_size_maxbuf); \ + BUILD_BUG_ON(sizeof(exp_result2) >= string_get_size_maxbuf); \ + __test_string_get_size((size), (blk_size), (exp_result10), \ + (exp_result2)); \ } while (0) -static __init void __test_string_get_size(const u64 size, const u64 blk_size, - const enum string_size_units units, - const char *exp_result) +static __init void test_string_get_size_check(const char *units, + const char *exp, + char *res, + const u64 size, + const u64 blk_size) { - char buf[string_get_size_maxbuf]; - - string_get_size(size, blk_size, units, buf, sizeof(buf)); - if (!memcmp(buf, exp_result, strlen(exp_result) + 1)) + if (!memcmp(res, exp, strlen(exp) + 1)) return; - buf[sizeof(buf) - 1] = '\0'; - pr_warn("Test 'test_string_get_size_one' failed!\n"); - pr_warn("string_get_size(size = %llu, blk_size = %llu, units = %d\n", + res[string_get_size_maxbuf - 1] = '\0'; + + pr_warn("Test 'test_string_get_size' failed!\n"); + pr_warn("string_get_size(size = %llu, blk_size = %llu, units = %s)\n", size, blk_size, units); - pr_warn("expected: '%s', got '%s'\n", exp_result, buf); + pr_warn("expected: '%s', got '%s'\n", exp, res); +} + +static __init void __test_string_get_size(const u64 size, const u64 blk_size, + const char *exp_result10, + const char *exp_result2) +{ + char buf10[string_get_size_maxbuf]; + char buf2[string_get_size_maxbuf]; + + string_get_size(size, blk_size, STRING_UNITS_10, buf10, sizeof(buf10)); + string_get_size(size, blk_size, STRING_UNITS_2, buf2, sizeof(buf2)); + + test_string_get_size_check("STRING_UNITS_10", exp_result10, buf10, + size, blk_size); + + test_string_get_size_check("STRING_UNITS_2", exp_result2, buf2, + size, blk_size); } static __init void test_string_get_size(void) { - test_string_get_size_one(16384, 512, STRING_UNITS_2, "8.00 MiB"); - test_string_get_size_one(8192, 4096, STRING_UNITS_10, "32.7 MB"); - test_string_get_size_one(1, 512, STRING_UNITS_10, "512 B"); + /* small values */ + test_string_get_size_one(0, 512, "0 B", "0 B"); + test_string_get_size_one(1, 512, "512 B", "512 B"); + test_string_get_size_one(1100, 1, "1.10 kB", "1.07 KiB"); + + /* normal values */ + test_string_get_size_one(16384, 512, "8.39 MB", "8.00 MiB"); + test_string_get_size_one(500118192, 512, "256 GB", "238 GiB"); + test_string_get_size_one(8192, 4096, "33.6 MB", "32.0 MiB"); + + /* weird block sizes */ + test_string_get_size_one(3000, 1900, "5.70 MB", "5.44 MiB"); + + /* huge values */ + test_string_get_size_one(U64_MAX, 4096, "75.6 ZB", "64.0 ZiB"); + test_string_get_size_one(4096, U64_MAX, "75.6 ZB", "64.0 ZiB"); } static int __init test_string_helpers_init(void) -- cgit From 46437f9a554fbe3e110580ca08ab703b59f2f95a Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 2 Feb 2016 16:57:52 -0800 Subject: radix-tree: fix race in gang lookup If the indirect_ptr bit is set on a slot, that indicates we need to redo the lookup. Introduce a new function radix_tree_iter_retry() which forces the loop to retry the lookup by setting 'slot' to NULL and turning the iterator back to point at the problematic entry. This is a pretty rare problem to hit at the moment; the lookup has to race with a grow of the radix tree from a height of 0. The consequences of hitting this race are that gang lookup could return a pointer to a radix_tree_node instead of a pointer to whatever the user had inserted in the tree. Fixes: cebbd29e1c2f ("radix-tree: rewrite gang lookup using iterator") Signed-off-by: Matthew Wilcox Cc: Hugh Dickins Cc: Ohad Ben-Cohen Cc: Konstantin Khlebnikov Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index fcf5d98574ce..6b79e9026e24 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -1019,9 +1019,13 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results, return 0; radix_tree_for_each_slot(slot, root, &iter, first_index) { - results[ret] = indirect_to_ptr(rcu_dereference_raw(*slot)); + results[ret] = rcu_dereference_raw(*slot); if (!results[ret]) continue; + if (radix_tree_is_indirect_ptr(results[ret])) { + slot = radix_tree_iter_retry(&iter); + continue; + } if (++ret == max_items) break; } @@ -1098,9 +1102,13 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, return 0; radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) { - results[ret] = indirect_to_ptr(rcu_dereference_raw(*slot)); + results[ret] = rcu_dereference_raw(*slot); if (!results[ret]) continue; + if (radix_tree_is_indirect_ptr(results[ret])) { + slot = radix_tree_iter_retry(&iter); + continue; + } if (++ret == max_items) break; } -- cgit From d7ce36924344ace0dbdc855b1206cacc46b36d45 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 5 Feb 2016 15:36:16 -0800 Subject: dump_stack: avoid potential deadlocks Some servers experienced fatal deadlocks because of a combination of bugs, leading to multiple cpus calling dump_stack(). The checksumming bug was fixed in commit 34ae6a1aa054 ("ipv6: update skb->csum when CE mark is propagated"). The second problem is a faulty locking in dump_stack() CPU1 runs in process context and calls dump_stack(), grabs dump_lock. CPU2 receives a TCP packet under softirq, grabs socket spinlock, and call dump_stack() from netdev_rx_csum_fault(). dump_stack() spins on atomic_cmpxchg(&dump_lock, -1, 2), since dump_lock is owned by CPU1 While dumping its stack, CPU1 is interrupted by a softirq, and happens to process a packet for the TCP socket locked by CPU2. CPU1 spins forever in spin_lock() : deadlock Stack trace on CPU1 looked like : NMI backtrace for cpu 1 RIP: _raw_spin_lock+0x25/0x30 ... Call Trace: tcp_v6_rcv+0x243/0x620 ip6_input_finish+0x11f/0x330 ip6_input+0x38/0x40 ip6_rcv_finish+0x3c/0x90 ipv6_rcv+0x2a9/0x500 process_backlog+0x461/0xaa0 net_rx_action+0x147/0x430 __do_softirq+0x167/0x2d0 call_softirq+0x1c/0x30 do_softirq+0x3f/0x80 irq_exit+0x6e/0xc0 smp_call_function_single_interrupt+0x35/0x40 call_function_single_interrupt+0x6a/0x70 printk+0x4d/0x4f printk_address+0x31/0x33 print_trace_address+0x33/0x3c print_context_stack+0x7f/0x119 dump_trace+0x26b/0x28e show_trace_log_lvl+0x4f/0x5c show_stack_log_lvl+0x104/0x113 show_stack+0x42/0x44 dump_stack+0x46/0x58 netdev_rx_csum_fault+0x38/0x3c __skb_checksum_complete_head+0x6e/0x80 __skb_checksum_complete+0x11/0x20 tcp_rcv_established+0x2bd5/0x2fd0 tcp_v6_do_rcv+0x13c/0x620 sk_backlog_rcv+0x15/0x30 release_sock+0xd2/0x150 tcp_recvmsg+0x1c1/0xfc0 inet_recvmsg+0x7d/0x90 sock_recvmsg+0xaf/0xe0 ___sys_recvmsg+0x111/0x3b0 SyS_recvmsg+0x5c/0xb0 system_call_fastpath+0x16/0x1b Fixes: b58d977432c8 ("dump_stack: serialize the output from dump_stack()") Signed-off-by: Eric Dumazet Cc: Alex Thorlton Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/dump_stack.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/dump_stack.c b/lib/dump_stack.c index 6745c6230db3..c30d07e99dba 100644 --- a/lib/dump_stack.c +++ b/lib/dump_stack.c @@ -25,6 +25,7 @@ static atomic_t dump_lock = ATOMIC_INIT(-1); asmlinkage __visible void dump_stack(void) { + unsigned long flags; int was_locked; int old; int cpu; @@ -33,9 +34,8 @@ asmlinkage __visible void dump_stack(void) * Permit this cpu to perform nested stack dumps while serialising * against other CPUs */ - preempt_disable(); - retry: + local_irq_save(flags); cpu = smp_processor_id(); old = atomic_cmpxchg(&dump_lock, -1, cpu); if (old == -1) { @@ -43,6 +43,7 @@ retry: } else if (old == cpu) { was_locked = 1; } else { + local_irq_restore(flags); cpu_relax(); goto retry; } @@ -52,7 +53,7 @@ retry: if (!was_locked) atomic_set(&dump_lock, -1); - preempt_enable(); + local_irq_restore(flags); } #else asmlinkage __visible void dump_stack(void) -- cgit From 00cd29b799e3449f0c68b1cc77cd4a5f95b42d17 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Wed, 13 Jan 2016 08:10:31 -0800 Subject: klist: fix starting point removed bug in klist iterators The starting node for a klist iteration is often passed in from somewhere way above the klist infrastructure, meaning there's no guarantee the node is still on the list. We've seen this in SCSI where we use bus_find_device() to iterate through a list of devices. In the face of heavy hotplug activity, the last device returned by bus_find_device() can be removed before the next call. This leads to Dec 3 13:22:02 localhost kernel: WARNING: CPU: 2 PID: 28073 at include/linux/kref.h:47 klist_iter_init_node+0x3d/0x50() Dec 3 13:22:02 localhost kernel: Modules linked in: scsi_debug x86_pkg_temp_thermal kvm_intel kvm irqbypass crc32c_intel joydev iTCO_wdt dcdbas ipmi_devintf acpi_power_meter iTCO_vendor_support ipmi_si imsghandler pcspkr wmi acpi_cpufreq tpm_tis tpm shpchp lpc_ich mfd_core nfsd nfs_acl lockd grace sunrpc tg3 ptp pps_core Dec 3 13:22:02 localhost kernel: CPU: 2 PID: 28073 Comm: cat Not tainted 4.4.0-rc1+ #2 Dec 3 13:22:02 localhost kernel: Hardware name: Dell Inc. PowerEdge R320/08VT7V, BIOS 2.0.22 11/19/2013 Dec 3 13:22:02 localhost kernel: ffffffff81a20e77 ffff880613acfd18 ffffffff81321eef 0000000000000000 Dec 3 13:22:02 localhost kernel: ffff880613acfd50 ffffffff8107ca52 ffff88061176b198 0000000000000000 Dec 3 13:22:02 localhost kernel: ffffffff814542b0 ffff880610cfb100 ffff88061176b198 ffff880613acfd60 Dec 3 13:22:02 localhost kernel: Call Trace: Dec 3 13:22:02 localhost kernel: [] dump_stack+0x44/0x55 Dec 3 13:22:02 localhost kernel: [] warn_slowpath_common+0x82/0xc0 Dec 3 13:22:02 localhost kernel: [] ? proc_scsi_show+0x20/0x20 Dec 3 13:22:02 localhost kernel: [] warn_slowpath_null+0x1a/0x20 Dec 3 13:22:02 localhost kernel: [] klist_iter_init_node+0x3d/0x50 Dec 3 13:22:02 localhost kernel: [] bus_find_device+0x51/0xb0 Dec 3 13:22:02 localhost kernel: [] scsi_seq_next+0x2d/0x40 [...] And an eventual crash. It can actually occur in any hotplug system which has a device finder and a starting device. We can fix this globally by making sure the starting node for klist_iter_init_node() is actually a member of the list before using it (and by starting from the beginning if it isn't). Reported-by: Ewan D. Milne Tested-by: Ewan D. Milne Cc: stable@vger.kernel.org Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- lib/klist.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/klist.c b/lib/klist.c index d74cf7a29afd..0507fa5d84c5 100644 --- a/lib/klist.c +++ b/lib/klist.c @@ -282,9 +282,9 @@ void klist_iter_init_node(struct klist *k, struct klist_iter *i, struct klist_node *n) { i->i_klist = k; - i->i_cur = n; - if (n) - kref_get(&n->n_ref); + i->i_cur = NULL; + if (n && kref_get_unless_zero(&n->n_ref)) + i->i_cur = n; } EXPORT_SYMBOL_GPL(klist_iter_init_node); -- cgit From 20af74ef140f0fbc477f90817c58241107782e10 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Sun, 27 Dec 2015 18:46:05 +0800 Subject: devres: use to_pci_dev() Use to_pci_dev() instead of open-coding it. Signed-off-by: Geliang Tang Acked-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- lib/devres.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/devres.c b/lib/devres.c index 8c85672639d3..cb1464c411a2 100644 --- a/lib/devres.c +++ b/lib/devres.c @@ -236,7 +236,7 @@ struct pcim_iomap_devres { static void pcim_iomap_release(struct device *gendev, void *res) { - struct pci_dev *dev = container_of(gendev, struct pci_dev, dev); + struct pci_dev *dev = to_pci_dev(gendev); struct pcim_iomap_devres *this = res; int i; -- cgit From 4ba6a2b28f111e4c9621487612056d10f3f4a6ca Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 8 Feb 2016 16:09:08 +0900 Subject: scatterlist: fix a typo in comment block of sg_miter_stop() Fix the doubled "started" and tidy up the following sentences. Signed-off-by: Masahiro Yamada Signed-off-by: Linus Torvalds --- lib/scatterlist.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/scatterlist.c b/lib/scatterlist.c index bafa9933fa76..004fc70fc56a 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -598,9 +598,9 @@ EXPORT_SYMBOL(sg_miter_next); * * Description: * Stops mapping iterator @miter. @miter should have been started - * started using sg_miter_start(). A stopped iteration can be - * resumed by calling sg_miter_next() on it. This is useful when - * resources (kmap) need to be released during iteration. + * using sg_miter_start(). A stopped iteration can be resumed by + * calling sg_miter_next() on it. This is useful when resources (kmap) + * need to be released during iteration. * * Context: * Preemption disabled if the SG_MITER_ATOMIC is set. Don't care -- cgit From 975db45e9cc561bf8a7eddfa0705d3a078ec184f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 8 Feb 2016 15:36:52 +0100 Subject: locking/static_keys: Avoid nested functions clang does not support nested functions inside of an array definition: lib/test_static_keys.c:105:16: error: function definition is not allowed here .test_key = test_key_func(&old_true_key, static_key_true), lib/test_static_keys.c:50:20: note: expanded from macro 'test_key_func' ({bool func(void) { return branch(key); } func; }) That code appears to be a little too clever, so this simplifies it a bit by defining functions outside of the array. Signed-off-by: Arnd Bergmann Acked-by: Jason Baron Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1454942223-2781480-1-git-send-email-arnd@arndb.de Signed-off-by: Ingo Molnar --- lib/test_static_keys.c | 62 ++++++++++++++++++++++++++++++++++---------------- 1 file changed, 42 insertions(+), 20 deletions(-) (limited to 'lib') diff --git a/lib/test_static_keys.c b/lib/test_static_keys.c index c61b299e367f..915d75df2086 100644 --- a/lib/test_static_keys.c +++ b/lib/test_static_keys.c @@ -46,8 +46,11 @@ struct test_key { bool (*test_key)(void); }; -#define test_key_func(key, branch) \ - ({bool func(void) { return branch(key); } func; }) +#define test_key_func(key, branch) \ +static bool key ## _ ## branch(void) \ +{ \ + return branch(&key); \ +} static void invert_key(struct static_key *key) { @@ -92,6 +95,25 @@ static int verify_keys(struct test_key *keys, int size, bool invert) return 0; } +test_key_func(old_true_key, static_key_true) +test_key_func(old_false_key, static_key_false) +test_key_func(true_key, static_branch_likely) +test_key_func(true_key, static_branch_unlikely) +test_key_func(false_key, static_branch_likely) +test_key_func(false_key, static_branch_unlikely) +test_key_func(base_old_true_key, static_key_true) +test_key_func(base_inv_old_true_key, static_key_true) +test_key_func(base_old_false_key, static_key_false) +test_key_func(base_inv_old_false_key, static_key_false) +test_key_func(base_true_key, static_branch_likely) +test_key_func(base_true_key, static_branch_unlikely) +test_key_func(base_inv_true_key, static_branch_likely) +test_key_func(base_inv_true_key, static_branch_unlikely) +test_key_func(base_false_key, static_branch_likely) +test_key_func(base_false_key, static_branch_unlikely) +test_key_func(base_inv_false_key, static_branch_likely) +test_key_func(base_inv_false_key, static_branch_unlikely) + static int __init test_static_key_init(void) { int ret; @@ -102,95 +124,95 @@ static int __init test_static_key_init(void) { .init_state = true, .key = &old_true_key, - .test_key = test_key_func(&old_true_key, static_key_true), + .test_key = &old_true_key_static_key_true, }, { .init_state = false, .key = &old_false_key, - .test_key = test_key_func(&old_false_key, static_key_false), + .test_key = &old_false_key_static_key_false, }, /* internal keys - new keys */ { .init_state = true, .key = &true_key.key, - .test_key = test_key_func(&true_key, static_branch_likely), + .test_key = &true_key_static_branch_likely, }, { .init_state = true, .key = &true_key.key, - .test_key = test_key_func(&true_key, static_branch_unlikely), + .test_key = &true_key_static_branch_unlikely, }, { .init_state = false, .key = &false_key.key, - .test_key = test_key_func(&false_key, static_branch_likely), + .test_key = &false_key_static_branch_likely, }, { .init_state = false, .key = &false_key.key, - .test_key = test_key_func(&false_key, static_branch_unlikely), + .test_key = &false_key_static_branch_unlikely, }, /* external keys - old keys */ { .init_state = true, .key = &base_old_true_key, - .test_key = test_key_func(&base_old_true_key, static_key_true), + .test_key = &base_old_true_key_static_key_true, }, { .init_state = false, .key = &base_inv_old_true_key, - .test_key = test_key_func(&base_inv_old_true_key, static_key_true), + .test_key = &base_inv_old_true_key_static_key_true, }, { .init_state = false, .key = &base_old_false_key, - .test_key = test_key_func(&base_old_false_key, static_key_false), + .test_key = &base_old_false_key_static_key_false, }, { .init_state = true, .key = &base_inv_old_false_key, - .test_key = test_key_func(&base_inv_old_false_key, static_key_false), + .test_key = &base_inv_old_false_key_static_key_false, }, /* external keys - new keys */ { .init_state = true, .key = &base_true_key.key, - .test_key = test_key_func(&base_true_key, static_branch_likely), + .test_key = &base_true_key_static_branch_likely, }, { .init_state = true, .key = &base_true_key.key, - .test_key = test_key_func(&base_true_key, static_branch_unlikely), + .test_key = &base_true_key_static_branch_unlikely, }, { .init_state = false, .key = &base_inv_true_key.key, - .test_key = test_key_func(&base_inv_true_key, static_branch_likely), + .test_key = &base_inv_true_key_static_branch_likely, }, { .init_state = false, .key = &base_inv_true_key.key, - .test_key = test_key_func(&base_inv_true_key, static_branch_unlikely), + .test_key = &base_inv_true_key_static_branch_unlikely, }, { .init_state = false, .key = &base_false_key.key, - .test_key = test_key_func(&base_false_key, static_branch_likely), + .test_key = &base_false_key_static_branch_likely, }, { .init_state = false, .key = &base_false_key.key, - .test_key = test_key_func(&base_false_key, static_branch_unlikely), + .test_key = &base_false_key_static_branch_unlikely, }, { .init_state = true, .key = &base_inv_false_key.key, - .test_key = test_key_func(&base_inv_false_key, static_branch_likely), + .test_key = &base_inv_false_key_static_branch_likely, }, { .init_state = true, .key = &base_inv_false_key.key, - .test_key = test_key_func(&base_inv_false_key, static_branch_unlikely), + .test_key = &base_inv_false_key_static_branch_unlikely, }, }; -- cgit From f303fccb82928790ec58eea82722bd5c54d300b3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 9 Feb 2016 17:59:38 -0500 Subject: workqueue: implement "workqueue.debug_force_rr_cpu" debug feature Workqueue used to guarantee local execution for work items queued without explicit target CPU. The guarantee is gone now which can break some usages in subtle ways. To flush out those cases, this patch implements a debug feature which forces round-robin CPU selection for all such work items. The debug feature defaults to off and can be enabled with a kernel parameter. The default can be flipped with a debug config option. If you hit this commit during bisection, please refer to 041bd12e272c ("Revert "workqueue: make sure delayed work run in local cpu"") for more information and ping me. Signed-off-by: Tejun Heo --- lib/Kconfig.debug | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index ecb9e75614bf..8bfd1aca7a3d 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1400,6 +1400,21 @@ config RCU_EQS_DEBUG endmenu # "RCU Debugging" +config DEBUG_WQ_FORCE_RR_CPU + bool "Force round-robin CPU selection for unbound work items" + depends on DEBUG_KERNEL + default n + help + Workqueue used to implicitly guarantee that work items queued + without explicit CPU specified are put on the local CPU. This + guarantee is no longer true and while local CPU is still + preferred work items may be put on foreign CPUs. Kernel + parameter "workqueue.debug_force_rr_cpu" is added to force + round-robin CPU selection to flush out usages which depend on the + now broken guarantee. This config option enables the debug + feature by default. When enabled, memory and cache locality will + be impacted. + config DEBUG_BLOCK_EXT_DEVT bool "Force extended block device numbers and spread them" depends on DEBUG_KERNEL -- cgit From 2fe829aca9d7bed5fd6b49c6a1452e5e486b6cc3 Mon Sep 17 00:00:00 2001 From: Gabriel Somlo Date: Thu, 28 Jan 2016 09:23:12 -0500 Subject: kobject: export kset_find_obj() for module use Signed-off-by: Gabriel Somlo Signed-off-by: Greg Kroah-Hartman --- lib/kobject.c | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/kobject.c b/lib/kobject.c index 7cbccd2b4c72..445dcaeb0f56 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -861,6 +861,7 @@ struct kobject *kset_find_obj(struct kset *kset, const char *name) spin_unlock(&kset->list_lock); return ret; } +EXPORT_SYMBOL_GPL(kset_find_obj); static void kset_release(struct kobject *kobj) { -- cgit From 73500267c930baadadb0d02284909731baf151f7 Mon Sep 17 00:00:00 2001 From: Peter Jones Date: Mon, 8 Feb 2016 14:48:11 -0500 Subject: lib/ucs2_string: Add ucs2 -> utf8 helper functions This adds ucs2_utf8size(), which tells us how big our ucs2 string is in bytes, and ucs2_as_utf8, which translates from ucs2 to utf8.. Signed-off-by: Peter Jones Tested-by: Lee, Chun-Yi Acked-by: Matthew Garrett Signed-off-by: Matt Fleming --- lib/ucs2_string.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) (limited to 'lib') diff --git a/lib/ucs2_string.c b/lib/ucs2_string.c index 6f500ef2301d..17dd74e21ef9 100644 --- a/lib/ucs2_string.c +++ b/lib/ucs2_string.c @@ -49,3 +49,65 @@ ucs2_strncmp(const ucs2_char_t *a, const ucs2_char_t *b, size_t len) } } EXPORT_SYMBOL(ucs2_strncmp); + +unsigned long +ucs2_utf8size(const ucs2_char_t *src) +{ + unsigned long i; + unsigned long j = 0; + + for (i = 0; i < ucs2_strlen(src); i++) { + u16 c = src[i]; + + if (c > 0x800) + j += 3; + else if (c > 0x80) + j += 2; + else + j += 1; + } + + return j; +} +EXPORT_SYMBOL(ucs2_utf8size); + +/* + * copy at most maxlength bytes of whole utf8 characters to dest from the + * ucs2 string src. + * + * The return value is the number of characters copied, not including the + * final NUL character. + */ +unsigned long +ucs2_as_utf8(u8 *dest, const ucs2_char_t *src, unsigned long maxlength) +{ + unsigned int i; + unsigned long j = 0; + unsigned long limit = ucs2_strnlen(src, maxlength); + + for (i = 0; maxlength && i < limit; i++) { + u16 c = src[i]; + + if (c > 0x800) { + if (maxlength < 3) + break; + maxlength -= 3; + dest[j++] = 0xe0 | (c & 0xf000) >> 12; + dest[j++] = 0x80 | (c & 0x0fc0) >> 8; + dest[j++] = 0x80 | (c & 0x003f); + } else if (c > 0x80) { + if (maxlength < 2) + break; + maxlength -= 2; + dest[j++] = 0xc0 | (c & 0xfe0) >> 5; + dest[j++] = 0x80 | (c & 0x01f); + } else { + maxlength -= 1; + dest[j++] = c & 0x7f; + } + } + if (maxlength) + dest[j] = '\0'; + return j; +} +EXPORT_SYMBOL(ucs2_as_utf8); -- cgit From 7707535ab95e2231b6d7f2bfb4f27558e83c4dc2 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Thu, 11 Feb 2016 16:12:55 -0800 Subject: ubsan: cosmetic fix to Kconfig text When enabling UBSAN_SANITIZE_ALL, the kernel image size gets increased significantly (~3x). So, it sounds better to have some note in Kconfig. And, fixed a typo. Signed-off-by: Yang Shi Acked-by: Andrey Ryabinin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.ubsan | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan index 49518fb48cab..e07c1ba9ba13 100644 --- a/lib/Kconfig.ubsan +++ b/lib/Kconfig.ubsan @@ -18,6 +18,8 @@ config UBSAN_SANITIZE_ALL This option activates instrumentation for the entire kernel. If you don't enable this option, you have to explicitly specify UBSAN_SANITIZE := y for the files/directories you want to check for UB. + Enabling this option will get kernel image size increased + significantly. config UBSAN_ALIGNMENT bool "Enable checking of pointers alignment" @@ -25,5 +27,5 @@ config UBSAN_ALIGNMENT default y if !HAVE_EFFICIENT_UNALIGNED_ACCESS help This option enables detection of unaligned memory accesses. - Enabling this option on architectures that support unalligned + Enabling this option on architectures that support unaligned accesses may produce a lot of false positives. -- cgit From 7eb391299419a03cbe0fa5ab0e6b0932e42c7a36 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 11 Feb 2016 16:13:00 -0800 Subject: vsprintf: kptr_restrict is okay in IRQ when 2 The kptr_restrict flag, when set to 1, only prints the kernel address when the user has CAP_SYSLOG. When it is set to 2, the kernel address is always printed as zero. When set to 1, this needs to check whether or not we're in IRQ. However, when set to 2, this check is unneccessary, and produces confusing results in dmesg. Thus, only make sure we're not in IRQ when mode 1 is used, but not mode 2. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Jason A. Donenfeld Cc: Rasmus Villemoes Cc: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/vsprintf.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 48ff9c36644d..f44e178e6ede 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1590,22 +1590,23 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, return buf; } case 'K': - /* - * %pK cannot be used in IRQ context because its test - * for CAP_SYSLOG would be meaningless. - */ - if (kptr_restrict && (in_irq() || in_serving_softirq() || - in_nmi())) { - if (spec.field_width == -1) - spec.field_width = default_width; - return string(buf, end, "pK-error", spec); - } - switch (kptr_restrict) { case 0: /* Always print %pK values */ break; case 1: { + const struct cred *cred; + + /* + * kptr_restrict==1 cannot be used in IRQ context + * because its test for CAP_SYSLOG would be meaningless. + */ + if (in_irq() || in_serving_softirq() || in_nmi()) { + if (spec.field_width == -1) + spec.field_width = default_width; + return string(buf, end, "pK-error", spec); + } + /* * Only print the real pointer value if the current * process has CAP_SYSLOG and is running with the @@ -1615,8 +1616,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, * leak pointer values if a binary opens a file using * %pK and then elevates privileges before reading it. */ - const struct cred *cred = current_cred(); - + cred = current_cred(); if (!has_capability_noaudit(current, CAP_SYSLOG) || !uid_eq(cred->euid, cred->uid) || !gid_eq(cred->egid, cred->gid)) -- cgit From fc4fa6e112c0f999fab022a4eb7f6614bb47c7ab Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Sun, 13 Dec 2015 15:26:11 +0900 Subject: treewide: Fix typo in printk This patch fix spelling typos found in printk and Kconfig. Signed-off-by: Masanari Iida Acked-by: Randy Dunlap Signed-off-by: Jiri Kosina --- lib/842/842_decompress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/842/842_decompress.c b/lib/842/842_decompress.c index 8881dad2a6a0..a2a941f8112d 100644 --- a/lib/842/842_decompress.c +++ b/lib/842/842_decompress.c @@ -250,7 +250,7 @@ static int do_op(struct sw842_param *p, u8 o) case OP_ACTION_NOOP: break; default: - pr_err("Interal error, invalid op %x\n", op); + pr_err("Internal error, invalid op %x\n", op); return -EINVAL; } -- cgit From bdb428c82aab5d8aba6fbb29dde2cf678eadc432 Mon Sep 17 00:00:00 2001 From: Bogdan Sikora Date: Sun, 27 Dec 2015 14:58:23 +0100 Subject: lib+mm: fix few spelling mistakes All are in comments. Signed-off-by: Bogdan Sikora Cc: Cc: Rafael Aquini Cc: Kent Overstreet Cc: Jan Kara [jkosina@suse.cz: more fixup] Acked-by: Rafael Aquini Signed-off-by: Jiri Slaby Signed-off-by: Jiri Kosina --- lib/flex_proportions.c | 2 +- lib/percpu-refcount.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/flex_proportions.c b/lib/flex_proportions.c index 8f25652f40d4..a71cf1bdd4c9 100644 --- a/lib/flex_proportions.c +++ b/lib/flex_proportions.c @@ -17,7 +17,7 @@ * * \Sum_{j} p_{j} = 1, * - * This formula can be straightforwardly computed by maintaing denominator + * This formula can be straightforwardly computed by maintaining denominator * (let's call it 'd') and for each event type its numerator (let's call it * 'n_j'). When an event of type 'j' happens, we simply need to do: * n_j++; d++; diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c index 6111bcb28376..27fe74948882 100644 --- a/lib/percpu-refcount.c +++ b/lib/percpu-refcount.c @@ -12,7 +12,7 @@ * particular cpu can (and will) wrap - this is fine, when we go to shutdown the * percpu counters will all sum to the correct value * - * (More precisely: because moduler arithmatic is commutative the sum of all the + * (More precisely: because modular arithmetic is commutative the sum of all the * percpu_count vars will be equal to what it would have been if all the gets * and puts were done to a single integer, even if some of the percpu integers * overflow or underflow). -- cgit From a68075908a37850918ad96b056acc9ac4ce1bd90 Mon Sep 17 00:00:00 2001 From: Jason Andryuk Date: Fri, 12 Feb 2016 23:13:33 +0000 Subject: lib/ucs2_string: Correct ucs2 -> utf8 conversion The comparisons should be >= since 0x800 and 0x80 require an additional bit to store. For the 3 byte case, the existing shift would drop off 2 more bits than intended. For the 2 byte case, there should be 5 bits bits in byte 1, and 6 bits in byte 2. Signed-off-by: Jason Andryuk Reviewed-by: Laszlo Ersek Cc: Peter Jones Cc: Matthew Garrett Cc: "Lee, Chun-Yi" Signed-off-by: Matt Fleming --- lib/ucs2_string.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/ucs2_string.c b/lib/ucs2_string.c index 17dd74e21ef9..f0b323abb4c6 100644 --- a/lib/ucs2_string.c +++ b/lib/ucs2_string.c @@ -59,9 +59,9 @@ ucs2_utf8size(const ucs2_char_t *src) for (i = 0; i < ucs2_strlen(src); i++) { u16 c = src[i]; - if (c > 0x800) + if (c >= 0x800) j += 3; - else if (c > 0x80) + else if (c >= 0x80) j += 2; else j += 1; @@ -88,19 +88,19 @@ ucs2_as_utf8(u8 *dest, const ucs2_char_t *src, unsigned long maxlength) for (i = 0; maxlength && i < limit; i++) { u16 c = src[i]; - if (c > 0x800) { + if (c >= 0x800) { if (maxlength < 3) break; maxlength -= 3; dest[j++] = 0xe0 | (c & 0xf000) >> 12; - dest[j++] = 0x80 | (c & 0x0fc0) >> 8; + dest[j++] = 0x80 | (c & 0x0fc0) >> 6; dest[j++] = 0x80 | (c & 0x003f); - } else if (c > 0x80) { + } else if (c >= 0x80) { if (maxlength < 2) break; maxlength -= 2; - dest[j++] = 0xc0 | (c & 0xfe0) >> 5; - dest[j++] = 0x80 | (c & 0x01f); + dest[j++] = 0xc0 | (c & 0x7c0) >> 6; + dest[j++] = 0x80 | (c & 0x03f); } else { maxlength -= 1; dest[j++] = c & 0x7f; -- cgit From e52bc7c28ac9f54db6f86b19ed65c599def18c98 Mon Sep 17 00:00:00 2001 From: David Decotigny Date: Fri, 19 Feb 2016 09:23:59 -0500 Subject: lib/bitmap.c: conversion routines to/from u32 array Aimed at transferring bitmaps to/from user-space in a 32/64-bit agnostic way. Tested: unit tests (next patch) on qemu i386, x86_64, ppc, ppc64 BE and LE, ARM. Signed-off-by: David Decotigny Reviewed-by: Ben Hutchings Signed-off-by: David S. Miller --- lib/bitmap.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) (limited to 'lib') diff --git a/lib/bitmap.c b/lib/bitmap.c index 814814397cce..c66da508cbf7 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -12,6 +12,8 @@ #include #include #include +#include +#include #include #include @@ -1059,6 +1061,93 @@ int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order) } EXPORT_SYMBOL(bitmap_allocate_region); +/** + * bitmap_from_u32array - copy the contents of a u32 array of bits to bitmap + * @bitmap: array of unsigned longs, the destination bitmap, non NULL + * @nbits: number of bits in @bitmap + * @buf: array of u32 (in host byte order), the source bitmap, non NULL + * @nwords: number of u32 words in @buf + * + * copy min(nbits, 32*nwords) bits from @buf to @bitmap, remaining + * bits between nword and nbits in @bitmap (if any) are cleared. In + * last word of @bitmap, the bits beyond nbits (if any) are kept + * unchanged. + * + * Return the number of bits effectively copied. + */ +unsigned int +bitmap_from_u32array(unsigned long *bitmap, unsigned int nbits, + const u32 *buf, unsigned int nwords) +{ + unsigned int dst_idx, src_idx; + + for (src_idx = dst_idx = 0; dst_idx < BITS_TO_LONGS(nbits); ++dst_idx) { + unsigned long part = 0; + + if (src_idx < nwords) + part = buf[src_idx++]; + +#if BITS_PER_LONG == 64 + if (src_idx < nwords) + part |= ((unsigned long) buf[src_idx++]) << 32; +#endif + + if (dst_idx < nbits/BITS_PER_LONG) + bitmap[dst_idx] = part; + else { + unsigned long mask = BITMAP_LAST_WORD_MASK(nbits); + + bitmap[dst_idx] = (bitmap[dst_idx] & ~mask) + | (part & mask); + } + } + + return min_t(unsigned int, nbits, 32*nwords); +} +EXPORT_SYMBOL(bitmap_from_u32array); + +/** + * bitmap_to_u32array - copy the contents of bitmap to a u32 array of bits + * @buf: array of u32 (in host byte order), the dest bitmap, non NULL + * @nwords: number of u32 words in @buf + * @bitmap: array of unsigned longs, the source bitmap, non NULL + * @nbits: number of bits in @bitmap + * + * copy min(nbits, 32*nwords) bits from @bitmap to @buf. Remaining + * bits after nbits in @buf (if any) are cleared. + * + * Return the number of bits effectively copied. + */ +unsigned int +bitmap_to_u32array(u32 *buf, unsigned int nwords, + const unsigned long *bitmap, unsigned int nbits) +{ + unsigned int dst_idx = 0, src_idx = 0; + + while (dst_idx < nwords) { + unsigned long part = 0; + + if (src_idx < BITS_TO_LONGS(nbits)) { + part = bitmap[src_idx]; + if (src_idx >= nbits/BITS_PER_LONG) + part &= BITMAP_LAST_WORD_MASK(nbits); + src_idx++; + } + + buf[dst_idx++] = part & 0xffffffffUL; + +#if BITS_PER_LONG == 64 + if (dst_idx < nwords) { + part >>= 32; + buf[dst_idx++] = part & 0xffffffffUL; + } +#endif + } + + return min_t(unsigned int, nbits, 32*nwords); +} +EXPORT_SYMBOL(bitmap_to_u32array); + /** * bitmap_copy_le - copy a bitmap, putting the bits into little-endian order. * @dst: destination buffer -- cgit From 5fd003f56c2c584b62a0486ad25bbd4be02b8b6c Mon Sep 17 00:00:00 2001 From: David Decotigny Date: Fri, 19 Feb 2016 09:24:00 -0500 Subject: test_bitmap: unit tests for lib/bitmap.c This is mainly testing bitmap construction and conversion to/from u32[] for now. Tested: qemu i386, x86_64, ppc, ppc64 BE and LE, ARM. Signed-off-by: David Decotigny Signed-off-by: David S. Miller --- lib/Kconfig.debug | 8 ++ lib/Makefile | 1 + lib/test_bitmap.c | 358 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 367 insertions(+) create mode 100644 lib/test_bitmap.c (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index ecb9e75614bf..f890ee5e1385 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1738,6 +1738,14 @@ config TEST_KSTRTOX config TEST_PRINTF tristate "Test printf() family of functions at runtime" +config TEST_BITMAP + tristate "Test bitmap_*() family of functions at runtime" + default n + help + Enable this option to test the bitmap functions at boot. + + If unsure, say N. + config TEST_RHASHTABLE tristate "Perform selftest on resizable hash table" default n diff --git a/lib/Makefile b/lib/Makefile index a7c26a41a738..dda4039588b1 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -43,6 +43,7 @@ obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_keys.o obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o obj-$(CONFIG_TEST_PRINTF) += test_printf.o +obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o ifeq ($(CONFIG_DEBUG_KOBJECT),y) CFLAGS_kobject.o += -DDEBUG diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c new file mode 100644 index 000000000000..e2cbd43d193c --- /dev/null +++ b/lib/test_bitmap.c @@ -0,0 +1,358 @@ +/* + * Test cases for printf facility. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include + +static unsigned total_tests __initdata; +static unsigned failed_tests __initdata; + +static char pbl_buffer[PAGE_SIZE] __initdata; + + +static bool __init +__check_eq_uint(const char *srcfile, unsigned int line, + const unsigned int exp_uint, unsigned int x) +{ + if (exp_uint != x) { + pr_warn("[%s:%u] expected %u, got %u\n", + srcfile, line, exp_uint, x); + return false; + } + return true; +} + + +static bool __init +__check_eq_bitmap(const char *srcfile, unsigned int line, + const unsigned long *exp_bmap, unsigned int exp_nbits, + const unsigned long *bmap, unsigned int nbits) +{ + if (exp_nbits != nbits) { + pr_warn("[%s:%u] bitmap length mismatch: expected %u, got %u\n", + srcfile, line, exp_nbits, nbits); + return false; + } + + if (!bitmap_equal(exp_bmap, bmap, nbits)) { + pr_warn("[%s:%u] bitmaps contents differ: expected \"%*pbl\", got \"%*pbl\"\n", + srcfile, line, + exp_nbits, exp_bmap, nbits, bmap); + return false; + } + return true; +} + +static bool __init +__check_eq_pbl(const char *srcfile, unsigned int line, + const char *expected_pbl, + const unsigned long *bitmap, unsigned int nbits) +{ + snprintf(pbl_buffer, sizeof(pbl_buffer), "%*pbl", nbits, bitmap); + if (strcmp(expected_pbl, pbl_buffer)) { + pr_warn("[%s:%u] expected \"%s\", got \"%s\"\n", + srcfile, line, + expected_pbl, pbl_buffer); + return false; + } + return true; +} + +static bool __init +__check_eq_u32_array(const char *srcfile, unsigned int line, + const u32 *exp_arr, unsigned int exp_len, + const u32 *arr, unsigned int len) +{ + if (exp_len != len) { + pr_warn("[%s:%u] array length differ: expected %u, got %u\n", + srcfile, line, + exp_len, len); + return false; + } + + if (memcmp(exp_arr, arr, len*sizeof(*arr))) { + pr_warn("[%s:%u] array contents differ\n", srcfile, line); + print_hex_dump(KERN_WARNING, " exp: ", DUMP_PREFIX_OFFSET, + 32, 4, exp_arr, exp_len*sizeof(*exp_arr), false); + print_hex_dump(KERN_WARNING, " got: ", DUMP_PREFIX_OFFSET, + 32, 4, arr, len*sizeof(*arr), false); + return false; + } + + return true; +} + +#define __expect_eq(suffix, ...) \ + ({ \ + int result = 0; \ + total_tests++; \ + if (!__check_eq_ ## suffix(__FILE__, __LINE__, \ + ##__VA_ARGS__)) { \ + failed_tests++; \ + result = 1; \ + } \ + result; \ + }) + +#define expect_eq_uint(...) __expect_eq(uint, ##__VA_ARGS__) +#define expect_eq_bitmap(...) __expect_eq(bitmap, ##__VA_ARGS__) +#define expect_eq_pbl(...) __expect_eq(pbl, ##__VA_ARGS__) +#define expect_eq_u32_array(...) __expect_eq(u32_array, ##__VA_ARGS__) + +static void __init test_zero_fill_copy(void) +{ + DECLARE_BITMAP(bmap1, 1024); + DECLARE_BITMAP(bmap2, 1024); + + bitmap_zero(bmap1, 1024); + bitmap_zero(bmap2, 1024); + + /* single-word bitmaps */ + expect_eq_pbl("", bmap1, 23); + + bitmap_fill(bmap1, 19); + expect_eq_pbl("0-18", bmap1, 1024); + + bitmap_copy(bmap2, bmap1, 23); + expect_eq_pbl("0-18", bmap2, 1024); + + bitmap_fill(bmap2, 23); + expect_eq_pbl("0-22", bmap2, 1024); + + bitmap_copy(bmap2, bmap1, 23); + expect_eq_pbl("0-18", bmap2, 1024); + + bitmap_zero(bmap1, 23); + expect_eq_pbl("", bmap1, 1024); + + /* multi-word bitmaps */ + bitmap_zero(bmap1, 1024); + expect_eq_pbl("", bmap1, 1024); + + bitmap_fill(bmap1, 109); + expect_eq_pbl("0-108", bmap1, 1024); + + bitmap_copy(bmap2, bmap1, 1024); + expect_eq_pbl("0-108", bmap2, 1024); + + bitmap_fill(bmap2, 1024); + expect_eq_pbl("0-1023", bmap2, 1024); + + bitmap_copy(bmap2, bmap1, 1024); + expect_eq_pbl("0-108", bmap2, 1024); + + /* the following tests assume a 32- or 64-bit arch (even 128b + * if we care) + */ + + bitmap_fill(bmap2, 1024); + bitmap_copy(bmap2, bmap1, 109); /* ... but 0-padded til word length */ + expect_eq_pbl("0-108,128-1023", bmap2, 1024); + + bitmap_fill(bmap2, 1024); + bitmap_copy(bmap2, bmap1, 97); /* ... but aligned on word length */ + expect_eq_pbl("0-108,128-1023", bmap2, 1024); + + bitmap_zero(bmap2, 97); /* ... but 0-padded til word length */ + expect_eq_pbl("128-1023", bmap2, 1024); +} + +static void __init test_bitmap_u32_array_conversions(void) +{ + DECLARE_BITMAP(bmap1, 1024); + DECLARE_BITMAP(bmap2, 1024); + u32 exp_arr[32], arr[32]; + unsigned nbits; + + for (nbits = 0 ; nbits < 257 ; ++nbits) { + const unsigned int used_u32s = DIV_ROUND_UP(nbits, 32); + unsigned int i, rv; + + bitmap_zero(bmap1, nbits); + bitmap_set(bmap1, nbits, 1024 - nbits); /* garbage */ + + memset(arr, 0xff, sizeof(arr)); + rv = bitmap_to_u32array(arr, used_u32s, bmap1, nbits); + expect_eq_uint(nbits, rv); + + memset(exp_arr, 0xff, sizeof(exp_arr)); + memset(exp_arr, 0, used_u32s*sizeof(*exp_arr)); + expect_eq_u32_array(exp_arr, 32, arr, 32); + + bitmap_fill(bmap2, 1024); + rv = bitmap_from_u32array(bmap2, nbits, arr, used_u32s); + expect_eq_uint(nbits, rv); + expect_eq_bitmap(bmap1, 1024, bmap2, 1024); + + for (i = 0 ; i < nbits ; ++i) { + /* + * test conversion bitmap -> u32[] + */ + + bitmap_zero(bmap1, 1024); + __set_bit(i, bmap1); + bitmap_set(bmap1, nbits, 1024 - nbits); /* garbage */ + + memset(arr, 0xff, sizeof(arr)); + rv = bitmap_to_u32array(arr, used_u32s, bmap1, nbits); + expect_eq_uint(nbits, rv); + + /* 1st used u32 words contain expected bit set, the + * remaining words are left unchanged (0xff) + */ + memset(exp_arr, 0xff, sizeof(exp_arr)); + memset(exp_arr, 0, used_u32s*sizeof(*exp_arr)); + exp_arr[i/32] = (1U<<(i%32)); + expect_eq_u32_array(exp_arr, 32, arr, 32); + + + /* same, with longer array to fill + */ + memset(arr, 0xff, sizeof(arr)); + rv = bitmap_to_u32array(arr, 32, bmap1, nbits); + expect_eq_uint(nbits, rv); + + /* 1st used u32 words contain expected bit set, the + * remaining words are all 0s + */ + memset(exp_arr, 0, sizeof(exp_arr)); + exp_arr[i/32] = (1U<<(i%32)); + expect_eq_u32_array(exp_arr, 32, arr, 32); + + /* + * test conversion u32[] -> bitmap + */ + + /* the 1st nbits of bmap2 are identical to + * bmap1, the remaining bits of bmap2 are left + * unchanged (all 1s) + */ + bitmap_fill(bmap2, 1024); + rv = bitmap_from_u32array(bmap2, nbits, + exp_arr, used_u32s); + expect_eq_uint(nbits, rv); + + expect_eq_bitmap(bmap1, 1024, bmap2, 1024); + + /* same, with more bits to fill + */ + memset(arr, 0xff, sizeof(arr)); /* garbage */ + memset(arr, 0, used_u32s*sizeof(u32)); + arr[i/32] = (1U<<(i%32)); + + bitmap_fill(bmap2, 1024); + rv = bitmap_from_u32array(bmap2, 1024, arr, used_u32s); + expect_eq_uint(used_u32s*32, rv); + + /* the 1st nbits of bmap2 are identical to + * bmap1, the remaining bits of bmap2 are cleared + */ + bitmap_zero(bmap1, 1024); + __set_bit(i, bmap1); + expect_eq_bitmap(bmap1, 1024, bmap2, 1024); + + + /* + * test short conversion bitmap -> u32[] (1 + * word too short) + */ + if (used_u32s > 1) { + bitmap_zero(bmap1, 1024); + __set_bit(i, bmap1); + bitmap_set(bmap1, nbits, + 1024 - nbits); /* garbage */ + memset(arr, 0xff, sizeof(arr)); + + rv = bitmap_to_u32array(arr, used_u32s - 1, + bmap1, nbits); + expect_eq_uint((used_u32s - 1)*32, rv); + + /* 1st used u32 words contain expected + * bit set, the remaining words are + * left unchanged (0xff) + */ + memset(exp_arr, 0xff, sizeof(exp_arr)); + memset(exp_arr, 0, + (used_u32s-1)*sizeof(*exp_arr)); + if ((i/32) < (used_u32s - 1)) + exp_arr[i/32] = (1U<<(i%32)); + expect_eq_u32_array(exp_arr, 32, arr, 32); + } + + /* + * test short conversion u32[] -> bitmap (3 + * bits too short) + */ + if (nbits > 3) { + memset(arr, 0xff, sizeof(arr)); /* garbage */ + memset(arr, 0, used_u32s*sizeof(*arr)); + arr[i/32] = (1U<<(i%32)); + + bitmap_zero(bmap1, 1024); + rv = bitmap_from_u32array(bmap1, nbits - 3, + arr, used_u32s); + expect_eq_uint(nbits - 3, rv); + + /* we are expecting the bit < nbits - + * 3 (none otherwise), and the rest of + * bmap1 unchanged (0-filled) + */ + bitmap_zero(bmap2, 1024); + if (i < nbits - 3) + __set_bit(i, bmap2); + expect_eq_bitmap(bmap2, 1024, bmap1, 1024); + + /* do the same with bmap1 initially + * 1-filled + */ + + bitmap_fill(bmap1, 1024); + rv = bitmap_from_u32array(bmap1, nbits - 3, + arr, used_u32s); + expect_eq_uint(nbits - 3, rv); + + /* we are expecting the bit < nbits - + * 3 (none otherwise), and the rest of + * bmap1 unchanged (1-filled) + */ + bitmap_zero(bmap2, 1024); + if (i < nbits - 3) + __set_bit(i, bmap2); + bitmap_set(bmap2, nbits-3, 1024 - nbits + 3); + expect_eq_bitmap(bmap2, 1024, bmap1, 1024); + } + } + } +} + +static int __init test_bitmap_init(void) +{ + test_zero_fill_copy(); + test_bitmap_u32_array_conversions(); + + if (failed_tests == 0) + pr_info("all %u tests passed\n", total_tests); + else + pr_warn("failed %u out of %u tests\n", + failed_tests, total_tests); + + return failed_tests ? -EINVAL : 0; +} + +static void __exit test_bitmap_cleanup(void) +{ +} + +module_init(test_bitmap_init); +module_exit(test_bitmap_cleanup); + +MODULE_AUTHOR("david decotigny "); +MODULE_LICENSE("GPL"); -- cgit From a272858a3c1ecd4a935ba23c66668f81214bd110 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 1 Jan 2016 12:39:09 +0100 Subject: extable: add support for relative extables to search and sort routines This adds support to the generic search_extable() and sort_extable() implementations for dealing with exception table entries whose fields contain relative offsets rather than absolute addresses. Acked-by: Helge Deller Acked-by: Heiko Carstens Acked-by: H. Peter Anvin Acked-by: Tony Luck Acked-by: Will Deacon Acked-by: Richard Henderson Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- lib/extable.c | 50 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/extable.c b/lib/extable.c index 4cac81ec225e..0be02ad561e9 100644 --- a/lib/extable.c +++ b/lib/extable.c @@ -14,7 +14,37 @@ #include #include +#ifndef ARCH_HAS_RELATIVE_EXTABLE +#define ex_to_insn(x) ((x)->insn) +#else +static inline unsigned long ex_to_insn(const struct exception_table_entry *x) +{ + return (unsigned long)&x->insn + x->insn; +} +#endif + #ifndef ARCH_HAS_SORT_EXTABLE +#ifndef ARCH_HAS_RELATIVE_EXTABLE +#define swap_ex NULL +#else +static void swap_ex(void *a, void *b, int size) +{ + struct exception_table_entry *x = a, *y = b, tmp; + int delta = b - a; + + tmp = *x; + x->insn = y->insn + delta; + y->insn = tmp.insn - delta; + +#ifdef swap_ex_entry_fixup + swap_ex_entry_fixup(x, y, tmp, delta); +#else + x->fixup = y->fixup + delta; + y->fixup = tmp.fixup - delta; +#endif +} +#endif /* ARCH_HAS_RELATIVE_EXTABLE */ + /* * The exception table needs to be sorted so that the binary * search that we use to find entries in it works properly. @@ -26,9 +56,9 @@ static int cmp_ex(const void *a, const void *b) const struct exception_table_entry *x = a, *y = b; /* avoid overflow */ - if (x->insn > y->insn) + if (ex_to_insn(x) > ex_to_insn(y)) return 1; - if (x->insn < y->insn) + if (ex_to_insn(x) < ex_to_insn(y)) return -1; return 0; } @@ -37,7 +67,7 @@ void sort_extable(struct exception_table_entry *start, struct exception_table_entry *finish) { sort(start, finish - start, sizeof(struct exception_table_entry), - cmp_ex, NULL); + cmp_ex, swap_ex); } #ifdef CONFIG_MODULES @@ -48,13 +78,15 @@ void sort_extable(struct exception_table_entry *start, void trim_init_extable(struct module *m) { /*trim the beginning*/ - while (m->num_exentries && within_module_init(m->extable[0].insn, m)) { + while (m->num_exentries && + within_module_init(ex_to_insn(&m->extable[0]), m)) { m->extable++; m->num_exentries--; } /*trim the end*/ while (m->num_exentries && - within_module_init(m->extable[m->num_exentries-1].insn, m)) + within_module_init(ex_to_insn(&m->extable[m->num_exentries - 1]), + m)) m->num_exentries--; } #endif /* CONFIG_MODULES */ @@ -81,13 +113,13 @@ search_extable(const struct exception_table_entry *first, * careful, the distance between value and insn * can be larger than MAX_LONG: */ - if (mid->insn < value) + if (ex_to_insn(mid) < value) first = mid + 1; - else if (mid->insn > value) + else if (ex_to_insn(mid) > value) last = mid - 1; else return mid; - } - return NULL; + } + return NULL; } #endif -- cgit From 3ee0cb5fb5eea2110db1b5cb7f67029b7be8a376 Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Wed, 17 Feb 2016 14:46:59 +0100 Subject: lib/mpi: Endianness fix The limbs are integers in the host endianness, so we can't simply iterate over the individual bytes. The current code happens to work on little-endian, because the order of the limbs in the MPI array is the same as the order of the bytes in each limb, but it breaks on big-endian. Fixes: 0f74fbf77d45 ("MPI: Fix mpi_read_buffer") Signed-off-by: Michal Marek Signed-off-by: Herbert Xu --- lib/mpi/mpicoder.c | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) (limited to 'lib') diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c index ec533a6c77b5..eb15e7dc7b65 100644 --- a/lib/mpi/mpicoder.c +++ b/lib/mpi/mpicoder.c @@ -128,6 +128,23 @@ leave: } EXPORT_SYMBOL_GPL(mpi_read_from_buffer); +static int count_lzeros(MPI a) +{ + mpi_limb_t alimb; + int i, lzeros = 0; + + for (i = a->nlimbs - 1; i >= 0; i--) { + alimb = a->d[i]; + if (alimb == 0) { + lzeros += sizeof(mpi_limb_t); + } else { + lzeros += count_leading_zeros(alimb) / 8; + break; + } + } + return lzeros; +} + /** * mpi_read_buffer() - read MPI to a bufer provided by user (msb first) * @@ -148,7 +165,7 @@ int mpi_read_buffer(MPI a, uint8_t *buf, unsigned buf_len, unsigned *nbytes, uint8_t *p; mpi_limb_t alimb; unsigned int n = mpi_get_size(a); - int i, lzeros = 0; + int i, lzeros; if (!buf || !nbytes) return -EINVAL; @@ -156,14 +173,7 @@ int mpi_read_buffer(MPI a, uint8_t *buf, unsigned buf_len, unsigned *nbytes, if (sign) *sign = a->sign; - p = (void *)&a->d[a->nlimbs] - 1; - - for (i = a->nlimbs * sizeof(alimb) - 1; i >= 0; i--, p--) { - if (!*p) - lzeros++; - else - break; - } + lzeros = count_lzeros(a); if (buf_len < n - lzeros) { *nbytes = n - lzeros; @@ -351,7 +361,7 @@ int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned *nbytes, u8 *p, *p2; mpi_limb_t alimb, alimb2; unsigned int n = mpi_get_size(a); - int i, x, y = 0, lzeros = 0, buf_len; + int i, x, y = 0, lzeros, buf_len; if (!nbytes) return -EINVAL; @@ -359,14 +369,7 @@ int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned *nbytes, if (sign) *sign = a->sign; - p = (void *)&a->d[a->nlimbs] - 1; - - for (i = a->nlimbs * sizeof(alimb) - 1; i >= 0; i--, p--) { - if (!*p) - lzeros++; - else - break; - } + lzeros = count_lzeros(a); if (*nbytes < n - lzeros) { *nbytes = n - lzeros; -- cgit From c5d552487b9eb116b61032239ffb2f8d192f19b8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 26 Feb 2016 13:46:26 +0100 Subject: lib/mpi: avoid assembler warning A wrapper around the umull assembly instruction might reuse the input register as an output, which is undefined on some ARM machines, as pointed out by this assembler warning: CC lib/mpi/generic_mpih-mul1.o /tmp/ccxJuxIy.s: Assembler messages: /tmp/ccxJuxIy.s:53: rdhi, rdlo and rm must all be different CC lib/mpi/generic_mpih-mul2.o /tmp/ccI0scAD.s: Assembler messages: /tmp/ccI0scAD.s:53: rdhi, rdlo and rm must all be different CC lib/mpi/generic_mpih-mul3.o /tmp/ccMvVQcp.s: Assembler messages: /tmp/ccMvVQcp.s:53: rdhi, rdlo and rm must all be different This changes the constraints to force different registers to be used as output. Signed-off-by: Arnd Bergmann Signed-off-by: Herbert Xu --- lib/mpi/longlong.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/mpi/longlong.h b/lib/mpi/longlong.h index b90e255c2a68..93336502af08 100644 --- a/lib/mpi/longlong.h +++ b/lib/mpi/longlong.h @@ -216,7 +216,7 @@ extern UDItype __udiv_qrnnd(UDItype *, UDItype, UDItype, UDItype); __asm__ ("%@ Inlined umul_ppmm\n" \ "umull %r1, %r0, %r2, %r3" \ : "=&r" ((USItype)(xh)), \ - "=r" ((USItype)(xl)) \ + "=&r" ((USItype)(xl)) \ : "r" ((USItype)(a)), \ "r" ((USItype)(b)) \ : "r0", "r1") -- cgit From 9c6bd0c2f103f4748cb4abcaf141f7d11aabfe9f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 26 Feb 2016 13:46:27 +0100 Subject: lib/mpi: use "static inline" instead of "extern inline" When we use CONFIG_PROFILE_ALL_BRANCHES, every 'if()' introduces a static variable, but that is not allowed in 'extern inline' functions: mpi-inline.h:116:204: warning: '______f' is static but declared in inline function 'mpihelp_sub' which is not static mpi-inline.h:113:184: warning: '______f' is static but declared in inline function 'mpihelp_sub' which is not static mpi-inline.h:70:184: warning: '______f' is static but declared in inline function 'mpihelp_add' which is not static mpi-inline.h:56:204: warning: '______f' is static but declared in inline function 'mpihelp_add_1' which is not static This changes the MPI code to use 'static inline' instead, to get rid of hundreds of warnings. Signed-off-by: Arnd Bergmann Signed-off-by: Herbert Xu --- lib/mpi/mpi-inline.h | 2 +- lib/mpi/mpi-internal.h | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/mpi/mpi-inline.h b/lib/mpi/mpi-inline.h index e2b39852b30a..c245ea31f785 100644 --- a/lib/mpi/mpi-inline.h +++ b/lib/mpi/mpi-inline.h @@ -30,7 +30,7 @@ #define G10_MPI_INLINE_H #ifndef G10_MPI_INLINE_DECL -#define G10_MPI_INLINE_DECL extern inline +#define G10_MPI_INLINE_DECL static inline #endif G10_MPI_INLINE_DECL mpi_limb_t diff --git a/lib/mpi/mpi-internal.h b/lib/mpi/mpi-internal.h index c65dd1bff45a..7eceeddb3fb8 100644 --- a/lib/mpi/mpi-internal.h +++ b/lib/mpi/mpi-internal.h @@ -168,19 +168,19 @@ void mpi_rshift_limbs(MPI a, unsigned int count); int mpi_lshift_limbs(MPI a, unsigned int count); /*-- mpihelp-add.c --*/ -mpi_limb_t mpihelp_add_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, +static inline mpi_limb_t mpihelp_add_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, mpi_limb_t s2_limb); mpi_limb_t mpihelp_add_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_ptr_t s2_ptr, mpi_size_t size); -mpi_limb_t mpihelp_add(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, +static inline mpi_limb_t mpihelp_add(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, mpi_ptr_t s2_ptr, mpi_size_t s2_size); /*-- mpihelp-sub.c --*/ -mpi_limb_t mpihelp_sub_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, +static inline mpi_limb_t mpihelp_sub_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, mpi_limb_t s2_limb); mpi_limb_t mpihelp_sub_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_ptr_t s2_ptr, mpi_size_t size); -mpi_limb_t mpihelp_sub(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, +static inline mpi_limb_t mpihelp_sub(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, mpi_ptr_t s2_ptr, mpi_size_t s2_size); /*-- mpihelp-cmp.c --*/ -- cgit From b9ab5ebb14ec389bd80f66613f1fe3f8f65f2521 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Sun, 28 Feb 2016 22:22:42 -0600 Subject: objtool: Add CONFIG_STACK_VALIDATION option Add a CONFIG_STACK_VALIDATION option which will run "objtool check" for each .o file to ensure the validity of its stack metadata. Signed-off-by: Josh Poimboeuf Cc: Andrew Morton Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Bernd Petrovitsch Cc: Borislav Petkov Cc: Chris J Arges Cc: Jiri Slaby Cc: Linus Torvalds Cc: Michal Marek Cc: Namhyung Kim Cc: Pedro Alves Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: live-patching@vger.kernel.org Link: http://lkml.kernel.org/r/92baab69a6bf9bc7043af0bfca9fb964a1d45546.1456719558.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- lib/Kconfig.debug | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 8bfd1aca7a3d..855265621863 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -342,6 +342,18 @@ config FRAME_POINTER larger and slower, but it gives very useful debugging information in case of kernel bugs. (precise oopses/stacktraces/warnings) +config STACK_VALIDATION + bool "Compile-time stack metadata validation" + depends on HAVE_STACK_VALIDATION + default n + help + Add compile-time checks to validate stack metadata, including frame + pointers (if CONFIG_FRAME_POINTER is enabled). This helps ensure + that runtime stack traces are more reliable. + + For more information, see + tools/objtool/Documentation/stack-validation.txt. + config DEBUG_FORCE_WEAK_PER_CPU bool "Force weak per-cpu definitions" depends on DEBUG_KERNEL -- cgit From 3712bba1a260ad851f3aa8ddea9cb7326f6aa0b3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 22 Feb 2016 22:19:18 +0000 Subject: cpumask: Export cpumask_any_but() Almost every cpumask function is exported, just not the one I need to make the Intel uncore driver modular. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Cc: Andi Kleen Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: David S. Miller Cc: Harish Chegondi Cc: Jacob Pan Cc: Jiri Olsa Cc: Kan Liang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rusty Russell Cc: Stephane Eranian Cc: Vince Weaver Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/20160222221011.878299859@linutronix.de Signed-off-by: Ingo Molnar --- lib/cpumask.c | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/cpumask.c b/lib/cpumask.c index 5a70f6196f57..81dedaab36cc 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -41,6 +41,7 @@ int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) break; return i; } +EXPORT_SYMBOL(cpumask_any_but); /* These are not inline because of header tangles. */ #ifdef CONFIG_CPUMASK_OFFSTACK -- cgit From b07edbe1cf3dae9ba81f24888e2f2a9dbe778918 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 16 Feb 2016 17:24:08 +0100 Subject: netfilter: meta: add PRANDOM support Can be used to randomly match packets e.g. for statistic traffic sampling. See commit 3ad0040573b0c00f8848 ("bpf: split state from prandom_u32() and consolidate {c, e}BPF prngs") for more info why this doesn't use prandom_u32 directly. Unlike bpf nft_meta can be built as a module, so add an EXPORT_SYMBOL for prandom_seed_full_state too. Cc: Daniel Borkmann Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- lib/random32.c | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/random32.c b/lib/random32.c index 12111910ccd0..510d1ce7d4d2 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -255,6 +255,7 @@ void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state) prandom_warmup(state); } } +EXPORT_SYMBOL(prandom_seed_full_state); /* * Generate better values after random number generator -- cgit From 757c989b9994f51b42d6be1bd33c7c12d16a3ac7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 26 Feb 2016 18:43:32 +0000 Subject: cpu/hotplug: Make target state writeable Make it possible to write a target state to the per cpu state file, so we can switch between states. Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: Rik van Riel Cc: Rafael Wysocki Cc: "Srivatsa S. Bhat" Cc: Peter Zijlstra Cc: Arjan van de Ven Cc: Sebastian Siewior Cc: Rusty Russell Cc: Steven Rostedt Cc: Oleg Nesterov Cc: Tejun Heo Cc: Andrew Morton Cc: Paul McKenney Cc: Linus Torvalds Cc: Paul Turner Link: http://lkml.kernel.org/r/20160226182341.022814799@linutronix.de Signed-off-by: Thomas Gleixner --- lib/Kconfig.debug | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 8bfd1aca7a3d..f28f7fad452f 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1442,6 +1442,19 @@ config DEBUG_BLOCK_EXT_DEVT Say N if you are unsure. +config CPU_HOTPLUG_STATE_CONTROL + bool "Enable CPU hotplug state control" + depends on DEBUG_KERNEL + depends on HOTPLUG_CPU + default n + help + Allows to write steps between "offline" and "online" to the CPUs + sysfs target file so states can be stepped granular. This is a debug + option for now as the hotplug machinery cannot be stopped and + restarted at arbitrary points yet. + + Say N if your are unsure. + config NOTIFIER_ERROR_INJECTION tristate "Notifier error injection" depends on DEBUG_KERNEL -- cgit From a8463d4b0e47d1f37af684d97884ffcf35de043b Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 2 Feb 2016 21:46:32 -0800 Subject: dma: Provide simple noop dma ops We are going to require dma_ops for several common drivers, even for systems that do have an identity mapping. Lets provide some minimal no-op dma_ops that can be used for that purpose. Signed-off-by: Christian Borntraeger Reviewed-by: Joerg Roedel Signed-off-by: Andy Lutomirski Signed-off-by: Michael S. Tsirkin --- lib/Makefile | 1 + lib/dma-noop.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100644 lib/dma-noop.c (limited to 'lib') diff --git a/lib/Makefile b/lib/Makefile index a7c26a41a738..a572b86a1b1d 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -18,6 +18,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ obj-$(CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS) += usercopy.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o +lib-$(CONFIG_HAS_DMA) += dma-noop.o lib-y += kobject.o klist.o obj-y += lockref.o diff --git a/lib/dma-noop.c b/lib/dma-noop.c new file mode 100644 index 000000000000..72145646857e --- /dev/null +++ b/lib/dma-noop.c @@ -0,0 +1,75 @@ +/* + * lib/dma-noop.c + * + * Simple DMA noop-ops that map 1:1 with memory + */ +#include +#include +#include +#include + +static void *dma_noop_alloc(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp, + struct dma_attrs *attrs) +{ + void *ret; + + ret = (void *)__get_free_pages(gfp, get_order(size)); + if (ret) + *dma_handle = virt_to_phys(ret); + return ret; +} + +static void dma_noop_free(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t dma_addr, + struct dma_attrs *attrs) +{ + free_pages((unsigned long)cpu_addr, get_order(size)); +} + +static dma_addr_t dma_noop_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + return page_to_phys(page) + offset; +} + +static int dma_noop_map_sg(struct device *dev, struct scatterlist *sgl, int nents, + enum dma_data_direction dir, struct dma_attrs *attrs) +{ + int i; + struct scatterlist *sg; + + for_each_sg(sgl, sg, nents, i) { + void *va; + + BUG_ON(!sg_page(sg)); + va = sg_virt(sg); + sg_dma_address(sg) = (dma_addr_t)virt_to_phys(va); + sg_dma_len(sg) = sg->length; + } + + return nents; +} + +static int dma_noop_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + return 0; +} + +static int dma_noop_supported(struct device *dev, u64 mask) +{ + return 1; +} + +struct dma_map_ops dma_noop_ops = { + .alloc = dma_noop_alloc, + .free = dma_noop_free, + .map_page = dma_noop_map_page, + .map_sg = dma_noop_map_sg, + .mapping_error = dma_noop_mapping_error, + .dma_supported = dma_noop_supported, +}; + +EXPORT_SYMBOL(dma_noop_ops); -- cgit From d77a117e6871ff78a06def46583d23752593de60 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 9 Mar 2016 14:08:10 -0800 Subject: list: kill list_force_poison() Given we have uninitialized list_heads being passed to list_add() it will always be the case that those uninitialized values randomly trigger the poison value. Especially since a list_add() operation will seed the stack with the poison value for later stack allocations to trip over. For example, see these two false positive reports: list_add attempted on force-poisoned entry WARNING: at lib/list_debug.c:34 [..] NIP [c00000000043c390] __list_add+0xb0/0x150 LR [c00000000043c38c] __list_add+0xac/0x150 Call Trace: __list_add+0xac/0x150 (unreliable) __down+0x4c/0xf8 down+0x68/0x70 xfs_buf_lock+0x4c/0x150 [xfs] list_add attempted on force-poisoned entry(0000000000000500), new->next == d0000000059ecdb0, new->prev == 0000000000000500 WARNING: at lib/list_debug.c:33 [..] NIP [c00000000042db78] __list_add+0xa8/0x140 LR [c00000000042db74] __list_add+0xa4/0x140 Call Trace: __list_add+0xa4/0x140 (unreliable) rwsem_down_read_failed+0x6c/0x1a0 down_read+0x58/0x60 xfs_log_commit_cil+0x7c/0x600 [xfs] Fixes: commit 5c2c2587b132 ("mm, dax, pmem: introduce {get|put}_dev_pagemap() for dax-gup") Signed-off-by: Dan Williams Reported-by: Eryu Guan Tested-by: Eryu Guan Cc: Ross Zwisler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/list_debug.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'lib') diff --git a/lib/list_debug.c b/lib/list_debug.c index 3345a089ef7b..3859bf63561c 100644 --- a/lib/list_debug.c +++ b/lib/list_debug.c @@ -12,13 +12,6 @@ #include #include -static struct list_head force_poison; -void list_force_poison(struct list_head *entry) -{ - entry->next = &force_poison; - entry->prev = &force_poison; -} - /* * Insert a new entry between two known consecutive entries. * @@ -30,8 +23,6 @@ void __list_add(struct list_head *new, struct list_head *prev, struct list_head *next) { - WARN(new->next == &force_poison || new->prev == &force_poison, - "list_add attempted on force-poisoned entry\n"); WARN(next->prev != prev, "list_add corruption. next->prev should be " "prev (%p), but was %p. (next=%p).\n", -- cgit From 01cfbad79a5e2b835abf6a8154a341d75a6fc8cd Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 11 Mar 2016 14:05:34 -0800 Subject: ipv4: Update parameters for csum_tcpudp_magic to their original types This patch updates all instances of csum_tcpudp_magic and csum_tcpudp_nofold to reflect the types that are usually used as the source inputs. For example the protocol field is populated based on nexthdr which is actually an unsigned 8 bit value. The length is usually populated based on skb->len which is an unsigned integer. This addresses an issue in which the IPv6 function csum_ipv6_magic was generating a checksum using the full 32b of skb->len while csum_tcpudp_magic was only using the lower 16 bits. As a result we could run into issues when attempting to adjust the checksum as there was no protocol agnostic way to update it. With this change the value is still truncated as many architectures use "(len + proto) << 8", however this truncation only occurs for values greater than 16776960 in length and as such is unlikely to occur as we stop the inner headers at ~64K in size. I did have to make a few minor changes in the arm, mn10300, nios2, and score versions of the function in order to support these changes as they were either using things such as an OR to combine the protocol and length, or were using ntohs to convert the length which would have truncated the value. I also updated a few spots in terms of whitespace and type differences for the addresses. Most of this was just to make sure all of the definitions were in sync going forward. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- lib/checksum.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/checksum.c b/lib/checksum.c index 8b39e86dbab5..d3ec93f9e5f3 100644 --- a/lib/checksum.c +++ b/lib/checksum.c @@ -191,9 +191,7 @@ static inline u32 from64to32(u64 x) } __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, - unsigned short len, - unsigned short proto, - __wsum sum) + __u32 len, __u8 proto, __wsum sum) { unsigned long long s = (__force u32)sum; -- cgit From edf14cdbf9a0e5ab52698ca66d07a76ade0d5c46 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Tue, 15 Mar 2016 14:55:56 -0700 Subject: mm, printk: introduce new format string for flags In mm we use several kinds of flags bitfields that are sometimes printed for debugging purposes, or exported to userspace via sysfs. To make them easier to interpret independently on kernel version and config, we want to dump also the symbolic flag names. So far this has been done with repeated calls to pr_cont(), which is unreliable on SMP, and not usable for e.g. sysfs export. To get a more reliable and universal solution, this patch extends printk() format string for pointers to handle the page flags (%pGp), gfp_flags (%pGg) and vma flags (%pGv). Existing users of dump_flag_names() are converted and simplified. It would be possible to pass flags by value instead of pointer, but the %p format string for pointers already has extensions for various kernel structures, so it's a good fit, and the extra indirection in a non-critical path is negligible. [linux@rasmusvillemoes.dk: lots of good implementation suggestions] Signed-off-by: Vlastimil Babka Acked-by: Michal Hocko Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Ingo Molnar Cc: Rasmus Villemoes Cc: Joonsoo Kim Cc: Minchan Kim Cc: Sasha Levin Cc: "Kirill A. Shutemov" Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_printf.c | 53 +++++++++++++++++++++++++++++++++++++++ lib/vsprintf.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 128 insertions(+) (limited to 'lib') diff --git a/lib/test_printf.c b/lib/test_printf.c index 4f6ae60433bc..563f10e6876a 100644 --- a/lib/test_printf.c +++ b/lib/test_printf.c @@ -17,6 +17,9 @@ #include #include +#include +#include + #define BUF_SIZE 256 #define PAD_SIZE 16 #define FILL_CHAR '$' @@ -410,6 +413,55 @@ netdev_features(void) { } +static void __init +flags(void) +{ + unsigned long flags; + gfp_t gfp; + char *cmp_buffer; + + flags = 0; + test("", "%pGp", &flags); + + /* Page flags should filter the zone id */ + flags = 1UL << NR_PAGEFLAGS; + test("", "%pGp", &flags); + + flags |= 1UL << PG_uptodate | 1UL << PG_dirty | 1UL << PG_lru + | 1UL << PG_active | 1UL << PG_swapbacked; + test("uptodate|dirty|lru|active|swapbacked", "%pGp", &flags); + + + flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC + | VM_DENYWRITE; + test("read|exec|mayread|maywrite|mayexec|denywrite", "%pGv", &flags); + + gfp = GFP_TRANSHUGE; + test("GFP_TRANSHUGE", "%pGg", &gfp); + + gfp = GFP_ATOMIC|__GFP_DMA; + test("GFP_ATOMIC|GFP_DMA", "%pGg", &gfp); + + gfp = __GFP_ATOMIC; + test("__GFP_ATOMIC", "%pGg", &gfp); + + cmp_buffer = kmalloc(BUF_SIZE, GFP_KERNEL); + if (!cmp_buffer) + return; + + /* Any flags not translated by the table should remain numeric */ + gfp = ~__GFP_BITS_MASK; + snprintf(cmp_buffer, BUF_SIZE, "%#lx", (unsigned long) gfp); + test(cmp_buffer, "%pGg", &gfp); + + snprintf(cmp_buffer, BUF_SIZE, "__GFP_ATOMIC|%#lx", + (unsigned long) gfp); + gfp |= __GFP_ATOMIC; + test(cmp_buffer, "%pGg", &gfp); + + kfree(cmp_buffer); +} + static void __init test_pointer(void) { @@ -428,6 +480,7 @@ test_pointer(void) struct_clk(); bitmap(); netdev_features(); + flags(); } static int __init diff --git a/lib/vsprintf.c b/lib/vsprintf.c index f44e178e6ede..525c8e19bda2 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -35,6 +35,8 @@ #include #endif +#include "../mm/internal.h" /* For the trace_print_flags arrays */ + #include /* for PAGE_SIZE */ #include /* for dereference_function_descriptor() */ #include /* cpu_to_le16 */ @@ -1407,6 +1409,72 @@ char *clock(char *buf, char *end, struct clk *clk, struct printf_spec spec, } } +static +char *format_flags(char *buf, char *end, unsigned long flags, + const struct trace_print_flags *names) +{ + unsigned long mask; + const struct printf_spec strspec = { + .field_width = -1, + .precision = -1, + }; + const struct printf_spec numspec = { + .flags = SPECIAL|SMALL, + .field_width = -1, + .precision = -1, + .base = 16, + }; + + for ( ; flags && names->name; names++) { + mask = names->mask; + if ((flags & mask) != mask) + continue; + + buf = string(buf, end, names->name, strspec); + + flags &= ~mask; + if (flags) { + if (buf < end) + *buf = '|'; + buf++; + } + } + + if (flags) + buf = number(buf, end, flags, numspec); + + return buf; +} + +static noinline_for_stack +char *flags_string(char *buf, char *end, void *flags_ptr, const char *fmt) +{ + unsigned long flags; + const struct trace_print_flags *names; + + switch (fmt[1]) { + case 'p': + flags = *(unsigned long *)flags_ptr; + /* Remove zone id */ + flags &= (1UL << NR_PAGEFLAGS) - 1; + names = pageflag_names; + break; + case 'v': + flags = *(unsigned long *)flags_ptr; + names = vmaflag_names; + break; + case 'g': + flags = *(gfp_t *)flags_ptr; + names = gfpflag_names; + break; + default: + WARN_ONCE(1, "Unsupported flags modifier: %c\n", fmt[1]); + return buf; + } + + return format_flags(buf, end, flags, names); +} + int kptr_restrict __read_mostly; /* @@ -1495,6 +1563,11 @@ int kptr_restrict __read_mostly; * - 'Cn' For a clock, it prints the name (Common Clock Framework) or address * (legacy clock framework) of the clock * - 'Cr' For a clock, it prints the current rate of the clock + * - 'G' For flags to be printed as a collection of symbolic strings that would + * construct the specific value. Supported flags given by option: + * p page flags (see struct page) given as pointer to unsigned long + * g gfp flags (GFP_* and __GFP_*) given as pointer to gfp_t + * v vma flags (VM_*) given as pointer to unsigned long * * ** Please update also Documentation/printk-formats.txt when making changes ** * @@ -1648,6 +1721,8 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, return bdev_name(buf, end, ptr, spec, fmt); #endif + case 'G': + return flags_string(buf, end, ptr, fmt); } spec.flags |= SMALL; if (spec.field_width == -1) { -- cgit From 58e698af4c6347c726090d5480b2e51d1d07edf9 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Thu, 17 Mar 2016 14:18:36 -0700 Subject: radix-tree: account radix_tree_node to memory cgroup Allocation of radix_tree_node objects can be easily triggered from userspace, so we should account them to memory cgroup. Besides, we need them accounted for making shadow node shrinker per memcg (see mm/workingset.c). A tricky thing about accounting radix_tree_node objects is that they are mostly allocated through radix_tree_preload(), so we can't just set SLAB_ACCOUNT for radix_tree_node_cachep - that would likely result in a lot of unrelated cgroups using objects from each other's caches. One way to overcome this would be making radix tree preloads per memcg, but that would probably look cumbersome and overcomplicated. Instead, we make radix_tree_node_alloc() first try to allocate from the cache with __GFP_ACCOUNT, no matter if the caller has preloaded or not, and only if it fails fall back on using per cpu preloads. This should make most allocations accounted. Signed-off-by: Vladimir Davydov Acked-by: Johannes Weiner Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 6b79e9026e24..224b369f5a5e 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -191,6 +191,15 @@ radix_tree_node_alloc(struct radix_tree_root *root) if (!gfpflags_allow_blocking(gfp_mask) && !in_interrupt()) { struct radix_tree_preload *rtp; + /* + * Even if the caller has preloaded, try to allocate from the + * cache first for the new node to get accounted. + */ + ret = kmem_cache_alloc(radix_tree_node_cachep, + gfp_mask | __GFP_ACCOUNT | __GFP_NOWARN); + if (ret) + goto out; + /* * Provided the caller has preloaded here, we will always * succeed in getting a node here (and never reach @@ -208,10 +217,11 @@ radix_tree_node_alloc(struct radix_tree_root *root) * for debugging. */ kmemleak_update_trace(ret); + goto out; } - if (ret == NULL) - ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); - + ret = kmem_cache_alloc(radix_tree_node_cachep, + gfp_mask | __GFP_ACCOUNT); +out: BUG_ON(radix_tree_is_indirect_ptr(ret)); return ret; } -- cgit From d7b85cab74edef84a9330c476478ba8cd732b6a9 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 17 Mar 2016 14:21:38 -0700 Subject: lib/bug.c: make panic_on_warn available for all architectures Christian Borntraeger reported that panic_on_warn doesn't have any effect on s390. The panic_on_warn feature was introduced with 9e3961a09798 ("kernel: add panic_on_warn"). However it did care only for the case when WANT_WARN_ON_SLOWPATH is defined. This is turn is only the case for architectures which do not have an own __WARN_TAINT defined. Other architectures which do have __WARN_TAINT defined call report_bug() for warnings within lib/bug.c which does not call panic() in case panic_on_warn is set. Let's simply enable the panic_on_warn feature by adding the same code like it was added to warn_slowpath_common() in panic.c. This enables panic_on_warn also for arm64, parisc, powerpc, s390 and sh. Signed-off-by: Heiko Carstens Reported-by: Christian Borntraeger Tested-by: Christian Borntraeger Acked-by: Prarit Bhargava Cc: Catalin Marinas Cc: Will Deacon Cc: "James E.J. Bottomley" Cc: Helge Deller Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Tested-by: Michael Ellerman (powerpc) Cc: Martin Schwidefsky Cc: Heiko Carstens Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/bug.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'lib') diff --git a/lib/bug.c b/lib/bug.c index cff145f032a5..6cde380f09de 100644 --- a/lib/bug.c +++ b/lib/bug.c @@ -175,6 +175,17 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs) pr_warn("WARNING: at %p [verbose debug info unavailable]\n", (void *)bugaddr); + if (panic_on_warn) { + /* + * This thread may hit another WARN() in the panic path. + * Resetting this prevents additional WARN() from + * panicking the system on this thread. Other threads + * are blocked by the panic_mutex in panic(). + */ + panic_on_warn = 0; + panic("panic_on_warn set ...\n"); + } + print_modules(); show_regs(regs); print_oops_end_marker(); -- cgit From 339e6353046dd4f675304d696a88aefdd727298e Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Thu, 17 Mar 2016 14:21:48 -0700 Subject: radix_tree: tag all internal tree nodes as indirect pointers Set the 'indirect_ptr' bit on all the pointers to internal nodes, not just on the root node. This enables the following patches to support multi-order entries in the radix tree. This patch is split out for ease of bisection. Signed-off-by: Matthew Wilcox Cc: Johannes Weiner Cc: Matthew Wilcox Cc: "Kirill A. Shutemov" Cc: Ross Zwisler Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 224b369f5a5e..ff91792346f6 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -368,9 +368,10 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) node->count = 1; node->parent = NULL; slot = root->rnode; - if (newheight > 1) { + if (radix_tree_is_indirect_ptr(slot) && newheight > 1) { slot = indirect_to_ptr(slot); slot->parent = node; + slot = ptr_to_indirect(slot); } node->slots[0] = slot; node = ptr_to_indirect(node); @@ -425,17 +426,20 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, slot->path = height; slot->parent = node; if (node) { - rcu_assign_pointer(node->slots[offset], slot); + rcu_assign_pointer(node->slots[offset], + ptr_to_indirect(slot)); node->count++; slot->path |= offset << RADIX_TREE_HEIGHT_SHIFT; } else - rcu_assign_pointer(root->rnode, ptr_to_indirect(slot)); + rcu_assign_pointer(root->rnode, + ptr_to_indirect(slot)); } /* Go a level down */ offset = (index >> shift) & RADIX_TREE_MAP_MASK; node = slot; slot = node->slots[offset]; + slot = indirect_to_ptr(slot); shift -= RADIX_TREE_MAP_SHIFT; height--; } @@ -533,6 +537,7 @@ void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index, node = rcu_dereference_raw(*slot); if (node == NULL) return NULL; + node = indirect_to_ptr(node); shift -= RADIX_TREE_MAP_SHIFT; height--; @@ -619,6 +624,7 @@ void *radix_tree_tag_set(struct radix_tree_root *root, tag_set(slot, tag, offset); slot = slot->slots[offset]; BUG_ON(slot == NULL); + slot = indirect_to_ptr(slot); shift -= RADIX_TREE_MAP_SHIFT; height--; } @@ -658,11 +664,12 @@ void *radix_tree_tag_clear(struct radix_tree_root *root, goto out; shift = height * RADIX_TREE_MAP_SHIFT; - slot = indirect_to_ptr(root->rnode); + slot = root->rnode; while (shift) { if (slot == NULL) goto out; + slot = indirect_to_ptr(slot); shift -= RADIX_TREE_MAP_SHIFT; offset = (index >> shift) & RADIX_TREE_MAP_MASK; @@ -738,6 +745,7 @@ int radix_tree_tag_get(struct radix_tree_root *root, if (node == NULL) return 0; + node = indirect_to_ptr(node); offset = (index >> shift) & RADIX_TREE_MAP_MASK; if (!tag_get(node, tag, offset)) @@ -838,6 +846,7 @@ restart: node = rcu_dereference_raw(node->slots[offset]); if (node == NULL) goto restart; + node = indirect_to_ptr(node); shift -= RADIX_TREE_MAP_SHIFT; offset = (index >> shift) & RADIX_TREE_MAP_MASK; } @@ -939,6 +948,7 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, shift -= RADIX_TREE_MAP_SHIFT; node = slot; slot = slot->slots[offset]; + slot = indirect_to_ptr(slot); continue; } @@ -1195,6 +1205,7 @@ static unsigned long __locate(struct radix_tree_node *slot, void *item, slot = rcu_dereference_raw(slot->slots[i]); if (slot == NULL) goto out; + slot = indirect_to_ptr(slot); } /* Bottom level: check items */ @@ -1278,7 +1289,8 @@ static inline void radix_tree_shrink(struct radix_tree_root *root) */ if (to_free->count != 1) break; - if (!to_free->slots[0]) + slot = to_free->slots[0]; + if (!slot) break; /* @@ -1288,8 +1300,8 @@ static inline void radix_tree_shrink(struct radix_tree_root *root) * (to_free->slots[0]), it will be safe to dereference the new * one (root->rnode) as far as dependent read barriers go. */ - slot = to_free->slots[0]; if (root->height > 1) { + slot = indirect_to_ptr(slot); slot->parent = NULL; slot = ptr_to_indirect(slot); } -- cgit From 0070e28d97e72aeac2a85f538d8a452400dfe1c7 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Thu, 17 Mar 2016 14:21:51 -0700 Subject: radix_tree: loop based on shift count, not height When we introduce entries that can cover multiple indices, we will need to stop in __radix_tree_create based on the shift, not the height. Split out for ease of bisect. Signed-off-by: Matthew Wilcox Cc: Johannes Weiner Cc: Matthew Wilcox Cc: "Kirill A. Shutemov" Cc: Ross Zwisler Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index ff91792346f6..9bb440f317c9 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -415,10 +415,10 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, slot = indirect_to_ptr(root->rnode); height = root->height; - shift = (height-1) * RADIX_TREE_MAP_SHIFT; + shift = height * RADIX_TREE_MAP_SHIFT; offset = 0; /* uninitialised var warning */ - while (height > 0) { + while (shift > 0) { if (slot == NULL) { /* Have to add a child node. */ if (!(slot = radix_tree_node_alloc(root))) @@ -436,11 +436,11 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, } /* Go a level down */ + shift -= RADIX_TREE_MAP_SHIFT; offset = (index >> shift) & RADIX_TREE_MAP_MASK; node = slot; slot = node->slots[offset]; slot = indirect_to_ptr(slot); - shift -= RADIX_TREE_MAP_SHIFT; height--; } -- cgit From e61452365372570253b2b1de84bab0cdb2e62c64 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Thu, 17 Mar 2016 14:21:54 -0700 Subject: radix_tree: add support for multi-order entries With huge pages, it is convenient to have the radix tree be able to return an entry that covers multiple indices. Previous attempts to deal with the problem have involved inserting N duplicate entries, which is a waste of memory and leads to problems trying to handle aliased tags, or probing the tree multiple times to find alternative entries which might cover the requested index. This approach inserts one canonical entry into the tree for a given range of indices, and may also insert other entries in order to ensure that lookups find the canonical entry. This solution only tolerates inserting powers of two that are greater than the fanout of the tree. If we wish to expand the radix tree's abilities to support large-ish pages that is less than the fanout at the penultimate level of the tree, then we would need to add one more step in lookup to ensure that any sibling nodes in the final level of the tree are dereferenced and we return the canonical entry that they reference. Signed-off-by: Matthew Wilcox Cc: Johannes Weiner Cc: Matthew Wilcox Cc: "Kirill A. Shutemov" Cc: Ross Zwisler Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 109 ++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 83 insertions(+), 26 deletions(-) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 9bb440f317c9..d907dca302d5 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -333,7 +333,8 @@ static inline unsigned long radix_tree_maxindex(unsigned int height) /* * Extend a radix tree so it can store key @index. */ -static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) +static int radix_tree_extend(struct radix_tree_root *root, + unsigned long index, unsigned order) { struct radix_tree_node *node; struct radix_tree_node *slot; @@ -345,7 +346,7 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) while (index > radix_tree_maxindex(height)) height++; - if (root->rnode == NULL) { + if ((root->rnode == NULL) && (order == 0)) { root->height = height; goto out; } @@ -386,6 +387,7 @@ out: * __radix_tree_create - create a slot in a radix tree * @root: radix tree root * @index: index key + * @order: index occupies 2^order aligned slots * @nodep: returns node * @slotp: returns slot * @@ -399,26 +401,29 @@ out: * Returns -ENOMEM, or 0 for success. */ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, - struct radix_tree_node **nodep, void ***slotp) + unsigned order, struct radix_tree_node **nodep, + void ***slotp) { struct radix_tree_node *node = NULL, *slot; unsigned int height, shift, offset; int error; + BUG_ON((0 < order) && (order < RADIX_TREE_MAP_SHIFT)); + /* Make sure the tree is high enough. */ if (index > radix_tree_maxindex(root->height)) { - error = radix_tree_extend(root, index); + error = radix_tree_extend(root, index, order); if (error) return error; } - slot = indirect_to_ptr(root->rnode); + slot = root->rnode; height = root->height; shift = height * RADIX_TREE_MAP_SHIFT; offset = 0; /* uninitialised var warning */ - while (shift > 0) { + while (shift > order) { if (slot == NULL) { /* Have to add a child node. */ if (!(slot = radix_tree_node_alloc(root))) @@ -433,15 +438,31 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, } else rcu_assign_pointer(root->rnode, ptr_to_indirect(slot)); - } + } else if (!radix_tree_is_indirect_ptr(slot)) + break; /* Go a level down */ + height--; shift -= RADIX_TREE_MAP_SHIFT; offset = (index >> shift) & RADIX_TREE_MAP_MASK; - node = slot; + node = indirect_to_ptr(slot); slot = node->slots[offset]; - slot = indirect_to_ptr(slot); - height--; + } + + /* Insert pointers to the canonical entry */ + if ((shift - order) > 0) { + int i, n = 1 << (shift - order); + offset = offset & ~(n - 1); + slot = ptr_to_indirect(&node->slots[offset]); + for (i = 0; i < n; i++) { + if (node->slots[offset + i]) + return -EEXIST; + } + + for (i = 1; i < n; i++) { + rcu_assign_pointer(node->slots[offset + i], slot); + node->count++; + } } if (nodep) @@ -452,15 +473,16 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, } /** - * radix_tree_insert - insert into a radix tree + * __radix_tree_insert - insert into a radix tree * @root: radix tree root * @index: index key + * @order: key covers the 2^order indices around index * @item: item to insert * * Insert an item into the radix tree at position @index. */ -int radix_tree_insert(struct radix_tree_root *root, - unsigned long index, void *item) +int __radix_tree_insert(struct radix_tree_root *root, unsigned long index, + unsigned order, void *item) { struct radix_tree_node *node; void **slot; @@ -468,7 +490,7 @@ int radix_tree_insert(struct radix_tree_root *root, BUG_ON(radix_tree_is_indirect_ptr(item)); - error = __radix_tree_create(root, index, &node, &slot); + error = __radix_tree_create(root, index, order, &node, &slot); if (error) return error; if (*slot != NULL) @@ -486,7 +508,7 @@ int radix_tree_insert(struct radix_tree_root *root, return 0; } -EXPORT_SYMBOL(radix_tree_insert); +EXPORT_SYMBOL(__radix_tree_insert); /** * __radix_tree_lookup - lookup an item in a radix tree @@ -537,6 +559,8 @@ void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index, node = rcu_dereference_raw(*slot); if (node == NULL) return NULL; + if (!radix_tree_is_indirect_ptr(node)) + break; node = indirect_to_ptr(node); shift -= RADIX_TREE_MAP_SHIFT; @@ -624,6 +648,8 @@ void *radix_tree_tag_set(struct radix_tree_root *root, tag_set(slot, tag, offset); slot = slot->slots[offset]; BUG_ON(slot == NULL); + if (!radix_tree_is_indirect_ptr(slot)) + break; slot = indirect_to_ptr(slot); shift -= RADIX_TREE_MAP_SHIFT; height--; @@ -669,6 +695,8 @@ void *radix_tree_tag_clear(struct radix_tree_root *root, while (shift) { if (slot == NULL) goto out; + if (!radix_tree_is_indirect_ptr(slot)) + break; slot = indirect_to_ptr(slot); shift -= RADIX_TREE_MAP_SHIFT; @@ -753,6 +781,8 @@ int radix_tree_tag_get(struct radix_tree_root *root, if (height == 1) return 1; node = rcu_dereference_raw(node->slots[offset]); + if (!radix_tree_is_indirect_ptr(node)) + return 1; shift -= RADIX_TREE_MAP_SHIFT; height--; } @@ -813,6 +843,7 @@ restart: node = rnode; while (1) { + struct radix_tree_node *slot; if ((flags & RADIX_TREE_ITER_TAGGED) ? !test_bit(offset, node->tags[tag]) : !node->slots[offset]) { @@ -843,10 +874,12 @@ restart: if (!shift) break; - node = rcu_dereference_raw(node->slots[offset]); - if (node == NULL) + slot = rcu_dereference_raw(node->slots[offset]); + if (slot == NULL) goto restart; - node = indirect_to_ptr(node); + if (!radix_tree_is_indirect_ptr(slot)) + break; + node = indirect_to_ptr(slot); shift -= RADIX_TREE_MAP_SHIFT; offset = (index >> shift) & RADIX_TREE_MAP_MASK; } @@ -944,16 +977,20 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, if (!tag_get(slot, iftag, offset)) goto next; if (shift) { - /* Go down one level */ - shift -= RADIX_TREE_MAP_SHIFT; node = slot; slot = slot->slots[offset]; - slot = indirect_to_ptr(slot); - continue; + if (radix_tree_is_indirect_ptr(slot)) { + slot = indirect_to_ptr(slot); + shift -= RADIX_TREE_MAP_SHIFT; + continue; + } else { + slot = node; + node = node->parent; + } } /* tag the leaf */ - tagged++; + tagged += 1 << shift; tag_set(slot, settag, offset); /* walk back up the path tagging interior nodes */ @@ -1201,11 +1238,20 @@ static unsigned long __locate(struct radix_tree_node *slot, void *item, goto out; } - shift -= RADIX_TREE_MAP_SHIFT; slot = rcu_dereference_raw(slot->slots[i]); if (slot == NULL) goto out; + if (!radix_tree_is_indirect_ptr(slot)) { + if (slot == item) { + *found_index = index + i; + index = 0; + } else { + index += shift; + } + goto out; + } slot = indirect_to_ptr(slot); + shift -= RADIX_TREE_MAP_SHIFT; } /* Bottom level: check items */ @@ -1285,7 +1331,8 @@ static inline void radix_tree_shrink(struct radix_tree_root *root) /* * The candidate node has more than one child, or its child - * is not at the leftmost slot, we cannot shrink. + * is not at the leftmost slot, or it is a multiorder entry, + * we cannot shrink. */ if (to_free->count != 1) break; @@ -1301,6 +1348,9 @@ static inline void radix_tree_shrink(struct radix_tree_root *root) * one (root->rnode) as far as dependent read barriers go. */ if (root->height > 1) { + if (!radix_tree_is_indirect_ptr(slot)) + break; + slot = indirect_to_ptr(slot); slot->parent = NULL; slot = ptr_to_indirect(slot); @@ -1399,7 +1449,7 @@ void *radix_tree_delete_item(struct radix_tree_root *root, unsigned long index, void *item) { struct radix_tree_node *node; - unsigned int offset; + unsigned int offset, i; void **slot; void *entry; int tag; @@ -1428,6 +1478,13 @@ void *radix_tree_delete_item(struct radix_tree_root *root, radix_tree_tag_clear(root, index, tag); } + /* Delete any sibling slots pointing to this slot */ + for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) { + if (node->slots[offset + i] != ptr_to_indirect(slot)) + break; + node->slots[offset + i] = NULL; + node->count--; + } node->slots[offset] = NULL; node->count--; -- cgit From 7cf19af4debc804e408b059d58df7c9c226ca6fb Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Thu, 17 Mar 2016 14:21:57 -0700 Subject: radix_tree: add radix_tree_dump This is debug code which is #if 0 out. Signed-off-by: Matthew Wilcox Cc: Johannes Weiner Cc: Matthew Wilcox Cc: "Kirill A. Shutemov" Cc: Ross Zwisler Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index d907dca302d5..1624c4117961 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -173,6 +173,41 @@ radix_tree_find_next_bit(const unsigned long *addr, return size; } +#if 0 +static void dump_node(void *slot, int height, int offset) +{ + struct radix_tree_node *node; + int i; + + if (!slot) + return; + + if (height == 0) { + pr_debug("radix entry %p offset %d\n", slot, offset); + return; + } + + node = indirect_to_ptr(slot); + pr_debug("radix node: %p offset %d tags %lx %lx %lx path %x count %d parent %p\n", + slot, offset, node->tags[0][0], node->tags[1][0], + node->tags[2][0], node->path, node->count, node->parent); + + for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) + dump_node(node->slots[i], height - 1, i); +} + +/* For debug */ +static void radix_tree_dump(struct radix_tree_root *root) +{ + pr_debug("radix root: %p height %d rnode %p tags %x\n", + root, root->height, root->rnode, + root->gfp_mask >> __GFP_BITS_SHIFT); + if (!radix_tree_is_indirect_ptr(root->rnode)) + return; + dump_node(root->rnode, root->height, 0); +} +#endif + /* * This assumes that the caller has performed appropriate preallocation, and * that the caller has pinned this thread of control to the current CPU. -- cgit From 56b060814e2d87d6646a85a2f4609c73587399ca Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 17 Mar 2016 14:22:14 -0700 Subject: lib/string: introduce match_string() helper Occasionally we have to search for an occurrence of a string in an array of strings. Make a simple helper for that purpose. Signed-off-by: Andy Shevchenko Cc: "David S. Miller" Cc: Bartlomiej Zolnierkiewicz Cc: David Airlie Cc: David Woodhouse Cc: Dmitry Eremin-Solenikov Cc: Greg Kroah-Hartman Cc: Heikki Krogerus Cc: Linus Walleij Cc: Mika Westerberg Cc: Rafael J. Wysocki Cc: Sebastian Reichel Cc: Tejun Heo Cc: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/string.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'lib') diff --git a/lib/string.c b/lib/string.c index 0323c0d5629a..e9c9db161d2c 100644 --- a/lib/string.c +++ b/lib/string.c @@ -630,6 +630,32 @@ bool sysfs_streq(const char *s1, const char *s2) } EXPORT_SYMBOL(sysfs_streq); +/** + * match_string - matches given string in an array + * @array: array of strings + * @n: number of strings in the array or -1 for NULL terminated arrays + * @string: string to match with + * + * Return: + * index of a @string in the @array if matches, or %-EINVAL otherwise. + */ +int match_string(const char * const *array, size_t n, const char *string) +{ + int index; + const char *item; + + for (index = 0; index < n; index++) { + item = array[index]; + if (!item) + break; + if (!strcmp(item, string)) + return index; + } + + return -EINVAL; +} +EXPORT_SYMBOL(match_string); + /** * strtobool - convert common user inputs into boolean values * @s: input string -- cgit From ef951599074ba4fad2d0efa0a977129b41e6d203 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 17 Mar 2016 14:22:50 -0700 Subject: lib: move strtobool() to kstrtobool() Create the kstrtobool_from_user() helper and move strtobool() logic into the new kstrtobool() (matching all the other kstrto* functions). Provides an inline wrapper for existing strtobool() callers. Signed-off-by: Kees Cook Cc: Joe Perches Cc: Andy Shevchenko Cc: Rasmus Villemoes Cc: Daniel Borkmann Cc: Amitkumar Karwar Cc: Nishant Sarmukadam Cc: Kalle Valo Cc: Steve French Cc: Michael Ellerman Cc: Heiko Carstens Cc: Martin Schwidefsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/kstrtox.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ lib/string.c | 29 ----------------------------- 2 files changed, 50 insertions(+), 29 deletions(-) (limited to 'lib') diff --git a/lib/kstrtox.c b/lib/kstrtox.c index 94be244e8441..e8ba4a013e82 100644 --- a/lib/kstrtox.c +++ b/lib/kstrtox.c @@ -321,6 +321,56 @@ int kstrtos8(const char *s, unsigned int base, s8 *res) } EXPORT_SYMBOL(kstrtos8); +/** + * kstrtobool - convert common user inputs into boolean values + * @s: input string + * @res: result + * + * This routine returns 0 iff the first character is one of 'Yy1Nn0'. + * Otherwise it will return -EINVAL. Value pointed to by res is + * updated upon finding a match. + */ +int kstrtobool(const char *s, bool *res) +{ + if (!s) + return -EINVAL; + + switch (s[0]) { + case 'y': + case 'Y': + case '1': + *res = true; + return 0; + case 'n': + case 'N': + case '0': + *res = false; + return 0; + default: + break; + } + + return -EINVAL; +} +EXPORT_SYMBOL(kstrtobool); + +/* + * Since "base" would be a nonsense argument, this open-codes the + * _from_user helper instead of using the helper macro below. + */ +int kstrtobool_from_user(const char __user *s, size_t count, bool *res) +{ + /* Longest string needed to differentiate, newline, terminator */ + char buf[4]; + + count = min(count, sizeof(buf) - 1); + if (copy_from_user(buf, s, count)) + return -EFAULT; + buf[count] = '\0'; + return kstrtobool(buf, res); +} +EXPORT_SYMBOL(kstrtobool_from_user); + #define kstrto_from_user(f, g, type) \ int f(const char __user *s, size_t count, unsigned int base, type *res) \ { \ diff --git a/lib/string.c b/lib/string.c index e9c9db161d2c..ed83562a53ae 100644 --- a/lib/string.c +++ b/lib/string.c @@ -656,35 +656,6 @@ int match_string(const char * const *array, size_t n, const char *string) } EXPORT_SYMBOL(match_string); -/** - * strtobool - convert common user inputs into boolean values - * @s: input string - * @res: result - * - * This routine returns 0 iff the first character is one of 'Yy1Nn0'. - * Otherwise it will return -EINVAL. Value pointed to by res is - * updated upon finding a match. - */ -int strtobool(const char *s, bool *res) -{ - switch (s[0]) { - case 'y': - case 'Y': - case '1': - *res = true; - break; - case 'n': - case 'N': - case '0': - *res = false; - break; - default: - return -EINVAL; - } - return 0; -} -EXPORT_SYMBOL(strtobool); - #ifndef __HAVE_ARCH_MEMSET /** * memset - Fill a region of memory with the given value -- cgit From a81a5a17d44b26521fb1199f8ccf27f4af337a67 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 17 Mar 2016 14:22:57 -0700 Subject: lib: add "on"/"off" support to kstrtobool Add support for "on" and "off" when converting to boolean. Signed-off-by: Kees Cook Cc: Amitkumar Karwar Cc: Andy Shevchenko Cc: Daniel Borkmann Cc: Heiko Carstens Cc: Joe Perches Cc: Kalle Valo Cc: Martin Schwidefsky Cc: Michael Ellerman Cc: Nishant Sarmukadam Cc: Rasmus Villemoes Cc: Steve French Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/kstrtox.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/kstrtox.c b/lib/kstrtox.c index e8ba4a013e82..d8a5cf66c316 100644 --- a/lib/kstrtox.c +++ b/lib/kstrtox.c @@ -326,9 +326,9 @@ EXPORT_SYMBOL(kstrtos8); * @s: input string * @res: result * - * This routine returns 0 iff the first character is one of 'Yy1Nn0'. - * Otherwise it will return -EINVAL. Value pointed to by res is - * updated upon finding a match. + * This routine returns 0 iff the first character is one of 'Yy1Nn0', or + * [oO][NnFf] for "on" and "off". Otherwise it will return -EINVAL. Value + * pointed to by res is updated upon finding a match. */ int kstrtobool(const char *s, bool *res) { @@ -346,6 +346,20 @@ int kstrtobool(const char *s, bool *res) case '0': *res = false; return 0; + case 'o': + case 'O': + switch (s[1]) { + case 'n': + case 'N': + *res = true; + return 0; + case 'f': + case 'F': + *res = false; + return 0; + default: + break; + } default: break; } -- cgit From 2553b67a1fbe7bf202e4e8070ab0b00d3d3a06a2 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 17 Mar 2016 14:23:04 -0700 Subject: lib/bug.c: use common WARN helper The traceoff_on_warning option doesn't have any effect on s390, powerpc, arm64, parisc, and sh because there are two different types of WARN implementations: 1) The above mentioned architectures treat WARN() as a special case of a BUG() exception. They handle warnings in report_bug() in lib/bug.c. 2) All other architectures just call warn_slowpath_*() directly. Their warnings are handled in warn_slowpath_common() in kernel/panic.c. Support traceoff_on_warning on all architectures and prevent any future divergence by using a single common function to emit the warning. Also remove the '()' from '%pS()', because the parentheses look funky: [ 45.607629] WARNING: at /root/warn_mod/warn_mod.c:17 .init_dummy+0x20/0x40 [warn_mod]() Reported-by: Chunyu Hu Signed-off-by: Josh Poimboeuf Acked-by: Heiko Carstens Tested-by: Prarit Bhargava Acked-by: Prarit Bhargava Acked-by: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/bug.c | 26 ++------------------------ 1 file changed, 2 insertions(+), 24 deletions(-) (limited to 'lib') diff --git a/lib/bug.c b/lib/bug.c index 6cde380f09de..bc3656e944d2 100644 --- a/lib/bug.c +++ b/lib/bug.c @@ -167,30 +167,8 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs) if (warning) { /* this is a WARN_ON rather than BUG/BUG_ON */ - pr_warn("------------[ cut here ]------------\n"); - - if (file) - pr_warn("WARNING: at %s:%u\n", file, line); - else - pr_warn("WARNING: at %p [verbose debug info unavailable]\n", - (void *)bugaddr); - - if (panic_on_warn) { - /* - * This thread may hit another WARN() in the panic path. - * Resetting this prevents additional WARN() from - * panicking the system on this thread. Other threads - * are blocked by the panic_mutex in panic(). - */ - panic_on_warn = 0; - panic("panic_on_warn set ...\n"); - } - - print_modules(); - show_regs(regs); - print_oops_end_marker(); - /* Just a warning, don't kill lockdep. */ - add_taint(BUG_GET_TAINT(bug), LOCKDEP_STILL_OK); + __warn(file, line, (void *)bugaddr, BUG_GET_TAINT(bug), regs, + NULL); return BUG_TRAP_TYPE_WARN; } -- cgit From f9310b2f9a19b7f16c7b1c1558f8b649b9b933c1 Mon Sep 17 00:00:00 2001 From: Jessica Yu Date: Thu, 17 Mar 2016 14:23:07 -0700 Subject: sscanf: implement basic character sets Implement basic character sets for the '%[' conversion specifier. The '%[' conversion specifier matches a nonempty sequence of characters from the specified set of accepted (or with '^', rejected) characters between the brackets. The substring matched is to be made up of characters in (or not in) the set. This is useful for matching substrings that are delimited by something other than spaces. This implementation differs from its glibc counterpart in the following ways: (1) No support for character ranges (e.g., 'a-z' or '0-9') (2) The hyphen '-' is not a special character (3) The closing bracket ']' cannot be matched (4) No support (yet) for discarding matching input ('%*[') The bitmap code is largely based upon sample code which was provided by Rasmus. The motivation for adding character set support to sscanf originally stemmed from the kernel livepatching project. An ongoing patchset utilizes new livepatch Elf symbol and section names to store important metadata livepatch needs to properly apply its patches. Such metadata is stored in these section and symbol names as substrings delimited by periods '.' and commas ','. For example, a livepatch symbol name might look like this: .klp.sym.vmlinux.printk,0 However, sscanf currently can only extract "substrings" delimited by whitespace using the "%s" specifier. Thus for the above symbol name, one cannot not use sscanf() to extract substrings "vmlinux" or "printk", for example. A number of discussions on the livepatch mailing list dealing with string parsing code for extracting these '.' and ',' delimited substrings eventually led to the conclusion that such code would be completely unnecessary if the kernel sscanf() supported character sets. Thus only a single sscanf() call would be necessary to extract these substrings. In addition, such an addition to sscanf() could benefit other areas of the kernel that might have a similar need in the future. [akpm@linux-foundation.org: 80-col tweaks] Signed-off-by: Jessica Yu Signed-off-by: Rasmus Villemoes Cc: Andy Shevchenko Cc: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/vsprintf.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 525c8e19bda2..ccb664b54280 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -2640,8 +2640,12 @@ int vsscanf(const char *buf, const char *fmt, va_list args) if (*fmt == '*') { if (!*str) break; - while (!isspace(*fmt) && *fmt != '%' && *fmt) + while (!isspace(*fmt) && *fmt != '%' && *fmt) { + /* '%*[' not yet supported, invalid format */ + if (*fmt == '[') + return num; fmt++; + } while (!isspace(*str) && *str) str++; continue; @@ -2714,6 +2718,59 @@ int vsscanf(const char *buf, const char *fmt, va_list args) num++; } continue; + /* + * Warning: This implementation of the '[' conversion specifier + * deviates from its glibc counterpart in the following ways: + * (1) It does NOT support ranges i.e. '-' is NOT a special + * character + * (2) It cannot match the closing bracket ']' itself + * (3) A field width is required + * (4) '%*[' (discard matching input) is currently not supported + * + * Example usage: + * ret = sscanf("00:0a:95","%2[^:]:%2[^:]:%2[^:]", + * buf1, buf2, buf3); + * if (ret < 3) + * // etc.. + */ + case '[': + { + char *s = (char *)va_arg(args, char *); + DECLARE_BITMAP(set, 256) = {0}; + unsigned int len = 0; + bool negate = (*fmt == '^'); + + /* field width is required */ + if (field_width == -1) + return num; + + if (negate) + ++fmt; + + for ( ; *fmt && *fmt != ']'; ++fmt, ++len) + set_bit((u8)*fmt, set); + + /* no ']' or no character set found */ + if (!*fmt || !len) + return num; + ++fmt; + + if (negate) { + bitmap_complement(set, set, 256); + /* exclude null '\0' byte */ + clear_bit(0, set); + } + + /* match must be non-empty */ + if (!test_bit((u8)*str, set)) + return num; + + while (test_bit((u8)*str, set) && field_width--) + *s++ = *str++; + *s = '\0'; + ++num; + } + continue; case 'o': base = 8; break; -- cgit From 5c9a8750a6409c63a0f01d51a9024861022f6593 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Tue, 22 Mar 2016 14:27:30 -0700 Subject: kernel: add kcov code coverage kcov provides code coverage collection for coverage-guided fuzzing (randomized testing). Coverage-guided fuzzing is a testing technique that uses coverage feedback to determine new interesting inputs to a system. A notable user-space example is AFL (http://lcamtuf.coredump.cx/afl/). However, this technique is not widely used for kernel testing due to missing compiler and kernel support. kcov does not aim to collect as much coverage as possible. It aims to collect more or less stable coverage that is function of syscall inputs. To achieve this goal it does not collect coverage in soft/hard interrupts and instrumentation of some inherently non-deterministic or non-interesting parts of kernel is disbled (e.g. scheduler, locking). Currently there is a single coverage collection mode (tracing), but the API anticipates additional collection modes. Initially I also implemented a second mode which exposes coverage in a fixed-size hash table of counters (what Quentin used in his original patch). I've dropped the second mode for simplicity. This patch adds the necessary support on kernel side. The complimentary compiler support was added in gcc revision 231296. We've used this support to build syzkaller system call fuzzer, which has found 90 kernel bugs in just 2 months: https://github.com/google/syzkaller/wiki/Found-Bugs We've also found 30+ bugs in our internal systems with syzkaller. Another (yet unexplored) direction where kcov coverage would greatly help is more traditional "blob mutation". For example, mounting a random blob as a filesystem, or receiving a random blob over wire. Why not gcov. Typical fuzzing loop looks as follows: (1) reset coverage, (2) execute a bit of code, (3) collect coverage, repeat. A typical coverage can be just a dozen of basic blocks (e.g. an invalid input). In such context gcov becomes prohibitively expensive as reset/collect coverage steps depend on total number of basic blocks/edges in program (in case of kernel it is about 2M). Cost of kcov depends only on number of executed basic blocks/edges. On top of that, kernel requires per-thread coverage because there are always background threads and unrelated processes that also produce coverage. With inlined gcov instrumentation per-thread coverage is not possible. kcov exposes kernel PCs and control flow to user-space which is insecure. But debugfs should not be mapped as user accessible. Based on a patch by Quentin Casasnovas. [akpm@linux-foundation.org: make task_struct.kcov_mode have type `enum kcov_mode'] [akpm@linux-foundation.org: unbreak allmodconfig] [akpm@linux-foundation.org: follow x86 Makefile layout standards] Signed-off-by: Dmitry Vyukov Reviewed-by: Kees Cook Cc: syzkaller Cc: Vegard Nossum Cc: Catalin Marinas Cc: Tavis Ormandy Cc: Will Deacon Cc: Quentin Casasnovas Cc: Kostya Serebryany Cc: Eric Dumazet Cc: Alexander Potapenko Cc: Kees Cook Cc: Bjorn Helgaas Cc: Sasha Levin Cc: David Drysdale Cc: Ard Biesheuvel Cc: Andrey Ryabinin Cc: Kirill A. Shutemov Cc: Jiri Slaby Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.debug | 21 +++++++++++++++++++++ lib/Makefile | 12 ++++++++++++ 2 files changed, 33 insertions(+) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 5a60f45cd9bb..532d4d52d1df 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -696,6 +696,27 @@ source "lib/Kconfig.kasan" endmenu # "Memory Debugging" +config ARCH_HAS_KCOV + bool + help + KCOV does not have any arch-specific code, but currently it is enabled + only for x86_64. KCOV requires testing on other archs, and most likely + disabling of instrumentation for some early boot code. + +config KCOV + bool "Code coverage for fuzzing" + depends on ARCH_HAS_KCOV + select DEBUG_FS + help + KCOV exposes kernel code coverage information in a form suitable + for coverage-guided fuzzing (randomized testing). + + If RANDOMIZE_BASE is enabled, PC values will not be stable across + different machines and across reboots. If you need stable PC values, + disable RANDOMIZE_BASE. + + For more details, see Documentation/kcov.txt. + config DEBUG_SHIRQ bool "Debug shared IRQ handlers" depends on DEBUG_KERNEL diff --git a/lib/Makefile b/lib/Makefile index 4962d14c450f..a1de5b61ff40 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -7,6 +7,18 @@ ORIG_CFLAGS := $(KBUILD_CFLAGS) KBUILD_CFLAGS = $(subst $(CC_FLAGS_FTRACE),,$(ORIG_CFLAGS)) endif +# These files are disabled because they produce lots of non-interesting and/or +# flaky coverage that is not a function of syscall inputs. For example, +# rbtree can be global and individual rotations don't correlate with inputs. +KCOV_INSTRUMENT_string.o := n +KCOV_INSTRUMENT_rbtree.o := n +KCOV_INSTRUMENT_list_debug.o := n +KCOV_INSTRUMENT_debugobjects.o := n +KCOV_INSTRUMENT_dynamic_debug.o := n +# Kernel does not boot if we instrument this file as it uses custom calling +# convention (see CONFIG_ARCH_HWEIGHT_CFLAGS). +KCOV_INSTRUMENT_hweight.o := n + lib-y := ctype.o string.o vsprintf.o cmdline.o \ rbtree.o radix-tree.o dump_stack.o timerqueue.o\ idr.o int_sqrt.o extable.o \ -- cgit From dde5cf39d4d2cce71f2997c37210dd624d0e4bf6 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Tue, 22 Mar 2016 14:27:45 -0700 Subject: ubsan: fix tree-wide -Wmaybe-uninitialized false positives -fsanitize=* options makes GCC less smart than usual and increase number of 'maybe-uninitialized' false-positives. So this patch does two things: * Add -Wno-maybe-uninitialized to CFLAGS_UBSAN which will disable all such warnings for instrumented files. * Remove CONFIG_UBSAN_SANITIZE_ALL from all[yes|mod]config builds. So the all[yes|mod]config build goes without -fsanitize=* and still with -Wmaybe-uninitialized. Signed-off-by: Andrey Ryabinin Reported-by: Fengguang Wu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.ubsan | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'lib') diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan index e07c1ba9ba13..39494af9a84a 100644 --- a/lib/Kconfig.ubsan +++ b/lib/Kconfig.ubsan @@ -13,6 +13,11 @@ config UBSAN_SANITIZE_ALL bool "Enable instrumentation for the entire kernel" depends on UBSAN depends on ARCH_HAS_UBSAN_SANITIZE_ALL + + # We build with -Wno-maybe-uninitilzed, but we still want to + # use -Wmaybe-uninitilized in allmodconfig builds. + # So dependsy bellow used to disable this option in allmodconfig + depends on !COMPILE_TEST default y help This option activates instrumentation for the entire kernel. -- cgit From 6c31da3464b4d28825d1827ee41a3a217b2dcf0e Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 19 Mar 2016 17:54:10 +0100 Subject: parisc,metag: Implement CONFIG_DEBUG_STACK_USAGE option On parisc and metag the stack grows upwards, so for those we need to scan the stack downwards in order to calculate how much stack a process has used. Tested on a 64bit parisc kernel. Signed-off-by: Helge Deller --- lib/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 532d4d52d1df..1e9a607534ca 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -558,7 +558,7 @@ config DEBUG_KMEMLEAK_DEFAULT_OFF config DEBUG_STACK_USAGE bool "Stack utilization instrumentation" - depends on DEBUG_KERNEL && !IA64 && !PARISC && !METAG + depends on DEBUG_KERNEL && !IA64 help Enables the display of the minimum amount of free stack which each task has ever had available in the sysrq-T and sysrq-P debug output. -- cgit From e6e8379c876de16c6b78f83b15d5ac32c79cb440 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Fri, 25 Mar 2016 14:21:56 -0700 Subject: kasan: modify kmalloc_large_oob_right(), add kmalloc_pagealloc_oob_right() This patchset implements SLAB support for KASAN Unlike SLUB, SLAB doesn't store allocation/deallocation stacks for heap objects, therefore we reimplement this feature in mm/kasan/stackdepot.c. The intention is to ultimately switch SLUB to use this implementation as well, which will save a lot of memory (right now SLUB bloats each object by 256 bytes to store the allocation/deallocation stacks). Also neither SLUB nor SLAB delay the reuse of freed memory chunks, which is necessary for better detection of use-after-free errors. We introduce memory quarantine (mm/kasan/quarantine.c), which allows delayed reuse of deallocated memory. This patch (of 7): Rename kmalloc_large_oob_right() to kmalloc_pagealloc_oob_right(), as the test only checks the page allocator functionality. Also reimplement kmalloc_large_oob_right() so that the test allocates a large enough chunk of memory that still does not trigger the page allocator fallback. Signed-off-by: Alexander Potapenko Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Andrey Konovalov Cc: Dmitry Vyukov Cc: Andrey Ryabinin Cc: Steven Rostedt Cc: Konstantin Serebryany Cc: Dmitry Chernenkov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_kasan.c | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/test_kasan.c b/lib/test_kasan.c index c32f3b0048dc..90ad74f71535 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -65,11 +65,34 @@ static noinline void __init kmalloc_node_oob_right(void) kfree(ptr); } -static noinline void __init kmalloc_large_oob_right(void) +#ifdef CONFIG_SLUB +static noinline void __init kmalloc_pagealloc_oob_right(void) { char *ptr; size_t size = KMALLOC_MAX_CACHE_SIZE + 10; + /* Allocate a chunk that does not fit into a SLUB cache to trigger + * the page allocator fallback. + */ + pr_info("kmalloc pagealloc allocation: out-of-bounds to right\n"); + ptr = kmalloc(size, GFP_KERNEL); + if (!ptr) { + pr_err("Allocation failed\n"); + return; + } + + ptr[size] = 0; + kfree(ptr); +} +#endif + +static noinline void __init kmalloc_large_oob_right(void) +{ + char *ptr; + size_t size = KMALLOC_MAX_CACHE_SIZE - 256; + /* Allocate a chunk that is large enough, but still fits into a slab + * and does not trigger the page allocator fallback in SLUB. + */ pr_info("kmalloc large allocation: out-of-bounds to right\n"); ptr = kmalloc(size, GFP_KERNEL); if (!ptr) { @@ -324,6 +347,9 @@ static int __init kmalloc_tests_init(void) kmalloc_oob_right(); kmalloc_oob_left(); kmalloc_node_oob_right(); +#ifdef CONFIG_SLUB + kmalloc_pagealloc_oob_right(); +#endif kmalloc_large_oob_right(); kmalloc_oob_krealloc_more(); kmalloc_oob_krealloc_less(); -- cgit From 7ed2f9e663854db313f177a511145630e398b402 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Fri, 25 Mar 2016 14:21:59 -0700 Subject: mm, kasan: SLAB support Add KASAN hooks to SLAB allocator. This patch is based on the "mm: kasan: unified support for SLUB and SLAB allocators" patch originally prepared by Dmitry Chernenkov. Signed-off-by: Alexander Potapenko Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Andrey Konovalov Cc: Dmitry Vyukov Cc: Andrey Ryabinin Cc: Steven Rostedt Cc: Konstantin Serebryany Cc: Dmitry Chernenkov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.kasan | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index 0fee5acd5aa0..0e4d2b3b0aee 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -5,7 +5,7 @@ if HAVE_ARCH_KASAN config KASAN bool "KASan: runtime memory debugger" - depends on SLUB_DEBUG + depends on SLUB_DEBUG || (SLAB && !DEBUG_SLAB) select CONSTRUCTORS help Enables kernel address sanitizer - runtime memory debugger, @@ -16,6 +16,8 @@ config KASAN This feature consumes about 1/8 of available memory and brings about ~x3 performance slowdown. For better error detection enable CONFIG_STACKTRACE. + Currently CONFIG_KASAN doesn't work with CONFIG_DEBUG_SLAB + (the resulting kernel does not boot). choice prompt "Instrumentation type" -- cgit From cd11016e5f5212c13c0cec7384a525edc93b4921 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Fri, 25 Mar 2016 14:22:08 -0700 Subject: mm, kasan: stackdepot implementation. Enable stackdepot for SLAB Implement the stack depot and provide CONFIG_STACKDEPOT. Stack depot will allow KASAN store allocation/deallocation stack traces for memory chunks. The stack traces are stored in a hash table and referenced by handles which reside in the kasan_alloc_meta and kasan_free_meta structures in the allocated memory chunks. IRQ stack traces are cut below the IRQ entry point to avoid unnecessary duplication. Right now stackdepot support is only enabled in SLAB allocator. Once KASAN features in SLAB are on par with those in SLUB we can switch SLUB to stackdepot as well, thus removing the dependency on SLUB stack bookkeeping, which wastes a lot of memory. This patch is based on the "mm: kasan: stack depots" patch originally prepared by Dmitry Chernenkov. Joonsoo has said that he plans to reuse the stackdepot code for the mm/page_owner.c debugging facility. [akpm@linux-foundation.org: s/depot_stack_handle/depot_stack_handle_t] [aryabinin@virtuozzo.com: comment style fixes] Signed-off-by: Alexander Potapenko Signed-off-by: Andrey Ryabinin Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Andrey Konovalov Cc: Dmitry Vyukov Cc: Steven Rostedt Cc: Konstantin Serebryany Cc: Dmitry Chernenkov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig | 4 + lib/Kconfig.kasan | 1 + lib/Makefile | 3 + lib/stackdepot.c | 284 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 292 insertions(+) create mode 100644 lib/stackdepot.c (limited to 'lib') diff --git a/lib/Kconfig b/lib/Kconfig index 133ebc0c1773..3cca1222578e 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -536,4 +536,8 @@ config ARCH_HAS_PMEM_API config ARCH_HAS_MMIO_FLUSH bool +config STACKDEPOT + bool + select STACKTRACE + endmenu diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index 0e4d2b3b0aee..67d8c6838ba9 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -7,6 +7,7 @@ config KASAN bool "KASan: runtime memory debugger" depends on SLUB_DEBUG || (SLAB && !DEBUG_SLAB) select CONSTRUCTORS + select STACKDEPOT if SLAB help Enables kernel address sanitizer - runtime memory debugger, designed to find out-of-bounds accesses and use-after-free bugs. diff --git a/lib/Makefile b/lib/Makefile index a1de5b61ff40..7bd6fd436c97 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -181,6 +181,9 @@ obj-$(CONFIG_SG_SPLIT) += sg_split.o obj-$(CONFIG_STMP_DEVICE) += stmp_device.o obj-$(CONFIG_IRQ_POLL) += irq_poll.o +obj-$(CONFIG_STACKDEPOT) += stackdepot.o +KASAN_SANITIZE_stackdepot.o := n + libfdt_files = fdt.o fdt_ro.o fdt_wip.o fdt_rw.o fdt_sw.o fdt_strerror.o \ fdt_empty_tree.o $(foreach file, $(libfdt_files), \ diff --git a/lib/stackdepot.c b/lib/stackdepot.c new file mode 100644 index 000000000000..654c9d87e83a --- /dev/null +++ b/lib/stackdepot.c @@ -0,0 +1,284 @@ +/* + * Generic stack depot for storing stack traces. + * + * Some debugging tools need to save stack traces of certain events which can + * be later presented to the user. For example, KASAN needs to safe alloc and + * free stacks for each object, but storing two stack traces per object + * requires too much memory (e.g. SLUB_DEBUG needs 256 bytes per object for + * that). + * + * Instead, stack depot maintains a hashtable of unique stacktraces. Since alloc + * and free stacks repeat a lot, we save about 100x space. + * Stacks are never removed from depot, so we store them contiguously one after + * another in a contiguos memory allocation. + * + * Author: Alexander Potapenko + * Copyright (C) 2016 Google, Inc. + * + * Based on code by Dmitry Chernenkov. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DEPOT_STACK_BITS (sizeof(depot_stack_handle_t) * 8) + +#define STACK_ALLOC_ORDER 2 /* 'Slab' size order for stack depot, 4 pages */ +#define STACK_ALLOC_SIZE (1LL << (PAGE_SHIFT + STACK_ALLOC_ORDER)) +#define STACK_ALLOC_ALIGN 4 +#define STACK_ALLOC_OFFSET_BITS (STACK_ALLOC_ORDER + PAGE_SHIFT - \ + STACK_ALLOC_ALIGN) +#define STACK_ALLOC_INDEX_BITS (DEPOT_STACK_BITS - STACK_ALLOC_OFFSET_BITS) +#define STACK_ALLOC_SLABS_CAP 1024 +#define STACK_ALLOC_MAX_SLABS \ + (((1LL << (STACK_ALLOC_INDEX_BITS)) < STACK_ALLOC_SLABS_CAP) ? \ + (1LL << (STACK_ALLOC_INDEX_BITS)) : STACK_ALLOC_SLABS_CAP) + +/* The compact structure to store the reference to stacks. */ +union handle_parts { + depot_stack_handle_t handle; + struct { + u32 slabindex : STACK_ALLOC_INDEX_BITS; + u32 offset : STACK_ALLOC_OFFSET_BITS; + }; +}; + +struct stack_record { + struct stack_record *next; /* Link in the hashtable */ + u32 hash; /* Hash in the hastable */ + u32 size; /* Number of frames in the stack */ + union handle_parts handle; + unsigned long entries[1]; /* Variable-sized array of entries. */ +}; + +static void *stack_slabs[STACK_ALLOC_MAX_SLABS]; + +static int depot_index; +static int next_slab_inited; +static size_t depot_offset; +static DEFINE_SPINLOCK(depot_lock); + +static bool init_stack_slab(void **prealloc) +{ + if (!*prealloc) + return false; + /* + * This smp_load_acquire() pairs with smp_store_release() to + * |next_slab_inited| below and in depot_alloc_stack(). + */ + if (smp_load_acquire(&next_slab_inited)) + return true; + if (stack_slabs[depot_index] == NULL) { + stack_slabs[depot_index] = *prealloc; + } else { + stack_slabs[depot_index + 1] = *prealloc; + /* + * This smp_store_release pairs with smp_load_acquire() from + * |next_slab_inited| above and in depot_save_stack(). + */ + smp_store_release(&next_slab_inited, 1); + } + *prealloc = NULL; + return true; +} + +/* Allocation of a new stack in raw storage */ +static struct stack_record *depot_alloc_stack(unsigned long *entries, int size, + u32 hash, void **prealloc, gfp_t alloc_flags) +{ + int required_size = offsetof(struct stack_record, entries) + + sizeof(unsigned long) * size; + struct stack_record *stack; + + required_size = ALIGN(required_size, 1 << STACK_ALLOC_ALIGN); + + if (unlikely(depot_offset + required_size > STACK_ALLOC_SIZE)) { + if (unlikely(depot_index + 1 >= STACK_ALLOC_MAX_SLABS)) { + WARN_ONCE(1, "Stack depot reached limit capacity"); + return NULL; + } + depot_index++; + depot_offset = 0; + /* + * smp_store_release() here pairs with smp_load_acquire() from + * |next_slab_inited| in depot_save_stack() and + * init_stack_slab(). + */ + if (depot_index + 1 < STACK_ALLOC_MAX_SLABS) + smp_store_release(&next_slab_inited, 0); + } + init_stack_slab(prealloc); + if (stack_slabs[depot_index] == NULL) + return NULL; + + stack = stack_slabs[depot_index] + depot_offset; + + stack->hash = hash; + stack->size = size; + stack->handle.slabindex = depot_index; + stack->handle.offset = depot_offset >> STACK_ALLOC_ALIGN; + memcpy(stack->entries, entries, size * sizeof(unsigned long)); + depot_offset += required_size; + + return stack; +} + +#define STACK_HASH_ORDER 20 +#define STACK_HASH_SIZE (1L << STACK_HASH_ORDER) +#define STACK_HASH_MASK (STACK_HASH_SIZE - 1) +#define STACK_HASH_SEED 0x9747b28c + +static struct stack_record *stack_table[STACK_HASH_SIZE] = { + [0 ... STACK_HASH_SIZE - 1] = NULL +}; + +/* Calculate hash for a stack */ +static inline u32 hash_stack(unsigned long *entries, unsigned int size) +{ + return jhash2((u32 *)entries, + size * sizeof(unsigned long) / sizeof(u32), + STACK_HASH_SEED); +} + +/* Find a stack that is equal to the one stored in entries in the hash */ +static inline struct stack_record *find_stack(struct stack_record *bucket, + unsigned long *entries, int size, + u32 hash) +{ + struct stack_record *found; + + for (found = bucket; found; found = found->next) { + if (found->hash == hash && + found->size == size && + !memcmp(entries, found->entries, + size * sizeof(unsigned long))) { + return found; + } + } + return NULL; +} + +void depot_fetch_stack(depot_stack_handle_t handle, struct stack_trace *trace) +{ + union handle_parts parts = { .handle = handle }; + void *slab = stack_slabs[parts.slabindex]; + size_t offset = parts.offset << STACK_ALLOC_ALIGN; + struct stack_record *stack = slab + offset; + + trace->nr_entries = trace->max_entries = stack->size; + trace->entries = stack->entries; + trace->skip = 0; +} + +/** + * depot_save_stack - save stack in a stack depot. + * @trace - the stacktrace to save. + * @alloc_flags - flags for allocating additional memory if required. + * + * Returns the handle of the stack struct stored in depot. + */ +depot_stack_handle_t depot_save_stack(struct stack_trace *trace, + gfp_t alloc_flags) +{ + u32 hash; + depot_stack_handle_t retval = 0; + struct stack_record *found = NULL, **bucket; + unsigned long flags; + struct page *page = NULL; + void *prealloc = NULL; + + if (unlikely(trace->nr_entries == 0)) + goto fast_exit; + + hash = hash_stack(trace->entries, trace->nr_entries); + /* Bad luck, we won't store this stack. */ + if (hash == 0) + goto exit; + + bucket = &stack_table[hash & STACK_HASH_MASK]; + + /* + * Fast path: look the stack trace up without locking. + * The smp_load_acquire() here pairs with smp_store_release() to + * |bucket| below. + */ + found = find_stack(smp_load_acquire(bucket), trace->entries, + trace->nr_entries, hash); + if (found) + goto exit; + + /* + * Check if the current or the next stack slab need to be initialized. + * If so, allocate the memory - we won't be able to do that under the + * lock. + * + * The smp_load_acquire() here pairs with smp_store_release() to + * |next_slab_inited| in depot_alloc_stack() and init_stack_slab(). + */ + if (unlikely(!smp_load_acquire(&next_slab_inited))) { + /* + * Zero out zone modifiers, as we don't have specific zone + * requirements. Keep the flags related to allocation in atomic + * contexts and I/O. + */ + alloc_flags &= ~GFP_ZONEMASK; + alloc_flags &= (GFP_ATOMIC | GFP_KERNEL); + page = alloc_pages(alloc_flags, STACK_ALLOC_ORDER); + if (page) + prealloc = page_address(page); + } + + spin_lock_irqsave(&depot_lock, flags); + + found = find_stack(*bucket, trace->entries, trace->nr_entries, hash); + if (!found) { + struct stack_record *new = + depot_alloc_stack(trace->entries, trace->nr_entries, + hash, &prealloc, alloc_flags); + if (new) { + new->next = *bucket; + /* + * This smp_store_release() pairs with + * smp_load_acquire() from |bucket| above. + */ + smp_store_release(bucket, new); + found = new; + } + } else if (prealloc) { + /* + * We didn't need to store this stack trace, but let's keep + * the preallocated memory for the future. + */ + WARN_ON(!init_stack_slab(&prealloc)); + } + + spin_unlock_irqrestore(&depot_lock, flags); +exit: + if (prealloc) { + /* Nobody used this memory, ok to free it. */ + free_pages((unsigned long)prealloc, STACK_ALLOC_ORDER); + } + if (found) + retval = found->handle.handle; +fast_exit: + return retval; +} -- cgit From 9dcadd381b1d199074937019d612346c061de415 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Fri, 25 Mar 2016 14:22:11 -0700 Subject: kasan: test fix: warn if the UAF could not be detected in kmalloc_uaf2 Signed-off-by: Alexander Potapenko Acked-by: Andrey Ryabinin Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Andrey Konovalov Cc: Dmitry Vyukov Cc: Steven Rostedt Cc: Konstantin Serebryany Cc: Dmitry Chernenkov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_kasan.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib') diff --git a/lib/test_kasan.c b/lib/test_kasan.c index 90ad74f71535..82169fbf2453 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -294,6 +294,8 @@ static noinline void __init kmalloc_uaf2(void) } ptr1[40] = 'x'; + if (ptr1 == ptr2) + pr_err("Could not detect use-after-free: ptr1 == ptr2\n"); kfree(ptr2); } -- cgit