From 0b6ec8c0a3708f0a54b75ee1200772ec5226ec49 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Wed, 27 Jan 2016 15:37:58 +0100
Subject: debugobjects: Allow bigger number of early boot objects

On my bigger s390 systems  I always get "Out of memory.
ODEBUG disabled". Since the number of objects is needed at
compile time, we can not change the size dynamically before
the caches etc are available. Doubling the size seems to
do the trick. Since it is init data it will be freed anyway,
this should be ok.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Link: http://lkml.kernel.org/r/1453905478-13409-1-git-send-email-borntraeger@de.ibm.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 lib/debugobjects.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index 547f7f923dbc..519b5a10fd70 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -21,7 +21,7 @@
 #define ODEBUG_HASH_BITS	14
 #define ODEBUG_HASH_SIZE	(1 << ODEBUG_HASH_BITS)
 
-#define ODEBUG_POOL_SIZE	512
+#define ODEBUG_POOL_SIZE	1024
 #define ODEBUG_POOL_MIN_LEVEL	256
 
 #define ODEBUG_CHUNK_SHIFT	PAGE_SHIFT
-- 
cgit 


From cd4d09ec6f6c12a2cc3db5b7d8876a325a53545b Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Tue, 26 Jan 2016 22:12:04 +0100
Subject: x86/cpufeature: Carve out X86_FEATURE_*

Move them to a separate header and have the following
dependency:

  x86/cpufeatures.h <- x86/processor.h <- x86/cpufeature.h

This makes it easier to use the header in asm code and not
include the whole cpufeature.h and add guards for asm.

Suggested-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1453842730-28463-5-git-send-email-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 lib/atomic64_test.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c
index d62de8bf022d..123481814320 100644
--- a/lib/atomic64_test.c
+++ b/lib/atomic64_test.c
@@ -17,7 +17,7 @@
 #include <linux/atomic.h>
 
 #ifdef CONFIG_X86
-#include <asm/processor.h>	/* for boot_cpu_has below */
+#include <asm/cpufeature.h>	/* for boot_cpu_has below */
 #endif
 
 #define TEST(bit, op, c_op, val)				\
-- 
cgit 


From 72676bb53f33fd0ef3a1484fc1ecfd306dc6ff40 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 2 Feb 2016 16:57:18 -0800
Subject: lib/test-string_helpers.c: fix and improve string_get_size() tests

Recently added commit 564b026fbd0d ("string_helpers: fix precision loss
for some inputs") fixed precision issues for string_get_size() and broke
tests.

Fix and improve them: test both STRING_UNITS_2 and STRING_UNITS_10 at a
time, better failure reporting, test small an huge values.

Fixes: 564b026fbd0d28e9 ("string_helpers: fix precision loss for some inputs")
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: James Bottomley <JBottomley@Odin.com>
Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/test-string_helpers.c | 67 ++++++++++++++++++++++++++++++++++-------------
 1 file changed, 49 insertions(+), 18 deletions(-)

(limited to 'lib')

diff --git a/lib/test-string_helpers.c b/lib/test-string_helpers.c
index 98866a770770..25b5cbfb7615 100644
--- a/lib/test-string_helpers.c
+++ b/lib/test-string_helpers.c
@@ -327,36 +327,67 @@ out:
 }
 
 #define string_get_size_maxbuf 16
-#define test_string_get_size_one(size, blk_size, units, exp_result)            \
+#define test_string_get_size_one(size, blk_size, exp_result10, exp_result2)    \
 	do {                                                                   \
-		BUILD_BUG_ON(sizeof(exp_result) >= string_get_size_maxbuf);    \
-		__test_string_get_size((size), (blk_size), (units),            \
-				       (exp_result));                          \
+		BUILD_BUG_ON(sizeof(exp_result10) >= string_get_size_maxbuf);  \
+		BUILD_BUG_ON(sizeof(exp_result2) >= string_get_size_maxbuf);   \
+		__test_string_get_size((size), (blk_size), (exp_result10),     \
+				       (exp_result2));                         \
 	} while (0)
 
 
-static __init void __test_string_get_size(const u64 size, const u64 blk_size,
-					  const enum string_size_units units,
-					  const char *exp_result)
+static __init void test_string_get_size_check(const char *units,
+					      const char *exp,
+					      char *res,
+					      const u64 size,
+					      const u64 blk_size)
 {
-	char buf[string_get_size_maxbuf];
-
-	string_get_size(size, blk_size, units, buf, sizeof(buf));
-	if (!memcmp(buf, exp_result, strlen(exp_result) + 1))
+	if (!memcmp(res, exp, strlen(exp) + 1))
 		return;
 
-	buf[sizeof(buf) - 1] = '\0';
-	pr_warn("Test 'test_string_get_size_one' failed!\n");
-	pr_warn("string_get_size(size = %llu, blk_size = %llu, units = %d\n",
+	res[string_get_size_maxbuf - 1] = '\0';
+
+	pr_warn("Test 'test_string_get_size' failed!\n");
+	pr_warn("string_get_size(size = %llu, blk_size = %llu, units = %s)\n",
 		size, blk_size, units);
-	pr_warn("expected: '%s', got '%s'\n", exp_result, buf);
+	pr_warn("expected: '%s', got '%s'\n", exp, res);
+}
+
+static __init void __test_string_get_size(const u64 size, const u64 blk_size,
+					  const char *exp_result10,
+					  const char *exp_result2)
+{
+	char buf10[string_get_size_maxbuf];
+	char buf2[string_get_size_maxbuf];
+
+	string_get_size(size, blk_size, STRING_UNITS_10, buf10, sizeof(buf10));
+	string_get_size(size, blk_size, STRING_UNITS_2, buf2, sizeof(buf2));
+
+	test_string_get_size_check("STRING_UNITS_10", exp_result10, buf10,
+				   size, blk_size);
+
+	test_string_get_size_check("STRING_UNITS_2", exp_result2, buf2,
+				   size, blk_size);
 }
 
 static __init void test_string_get_size(void)
 {
-	test_string_get_size_one(16384, 512, STRING_UNITS_2, "8.00 MiB");
-	test_string_get_size_one(8192, 4096, STRING_UNITS_10, "32.7 MB");
-	test_string_get_size_one(1, 512, STRING_UNITS_10, "512 B");
+	/* small values */
+	test_string_get_size_one(0, 512, "0 B", "0 B");
+	test_string_get_size_one(1, 512, "512 B", "512 B");
+	test_string_get_size_one(1100, 1, "1.10 kB", "1.07 KiB");
+
+	/* normal values */
+	test_string_get_size_one(16384, 512, "8.39 MB", "8.00 MiB");
+	test_string_get_size_one(500118192, 512, "256 GB", "238 GiB");
+	test_string_get_size_one(8192, 4096, "33.6 MB", "32.0 MiB");
+
+	/* weird block sizes */
+	test_string_get_size_one(3000, 1900, "5.70 MB", "5.44 MiB");
+
+	/* huge values */
+	test_string_get_size_one(U64_MAX, 4096, "75.6 ZB", "64.0 ZiB");
+	test_string_get_size_one(4096, U64_MAX, "75.6 ZB", "64.0 ZiB");
 }
 
 static int __init test_string_helpers_init(void)
-- 
cgit 


From 46437f9a554fbe3e110580ca08ab703b59f2f95a Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@linux.intel.com>
Date: Tue, 2 Feb 2016 16:57:52 -0800
Subject: radix-tree: fix race in gang lookup

If the indirect_ptr bit is set on a slot, that indicates we need to redo
the lookup.  Introduce a new function radix_tree_iter_retry() which
forces the loop to retry the lookup by setting 'slot' to NULL and
turning the iterator back to point at the problematic entry.

This is a pretty rare problem to hit at the moment; the lookup has to
race with a grow of the radix tree from a height of 0.  The consequences
of hitting this race are that gang lookup could return a pointer to a
radix_tree_node instead of a pointer to whatever the user had inserted
in the tree.

Fixes: cebbd29e1c2f ("radix-tree: rewrite gang lookup using iterator")
Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ohad Ben-Cohen <ohad@wizery.com>
Cc: Konstantin Khlebnikov <khlebnikov@openvz.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/radix-tree.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'lib')

diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index fcf5d98574ce..6b79e9026e24 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -1019,9 +1019,13 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
 		return 0;
 
 	radix_tree_for_each_slot(slot, root, &iter, first_index) {
-		results[ret] = indirect_to_ptr(rcu_dereference_raw(*slot));
+		results[ret] = rcu_dereference_raw(*slot);
 		if (!results[ret])
 			continue;
+		if (radix_tree_is_indirect_ptr(results[ret])) {
+			slot = radix_tree_iter_retry(&iter);
+			continue;
+		}
 		if (++ret == max_items)
 			break;
 	}
@@ -1098,9 +1102,13 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
 		return 0;
 
 	radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) {
-		results[ret] = indirect_to_ptr(rcu_dereference_raw(*slot));
+		results[ret] = rcu_dereference_raw(*slot);
 		if (!results[ret])
 			continue;
+		if (radix_tree_is_indirect_ptr(results[ret])) {
+			slot = radix_tree_iter_retry(&iter);
+			continue;
+		}
 		if (++ret == max_items)
 			break;
 	}
-- 
cgit 


From d7ce36924344ace0dbdc855b1206cacc46b36d45 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 5 Feb 2016 15:36:16 -0800
Subject: dump_stack: avoid potential deadlocks

Some servers experienced fatal deadlocks because of a combination of
bugs, leading to multiple cpus calling dump_stack().

The checksumming bug was fixed in commit 34ae6a1aa054 ("ipv6: update
skb->csum when CE mark is propagated").

The second problem is a faulty locking in dump_stack()

CPU1 runs in process context and calls dump_stack(), grabs dump_lock.

   CPU2 receives a TCP packet under softirq, grabs socket spinlock, and
   call dump_stack() from netdev_rx_csum_fault().

   dump_stack() spins on atomic_cmpxchg(&dump_lock, -1, 2), since
   dump_lock is owned by CPU1

While dumping its stack, CPU1 is interrupted by a softirq, and happens
to process a packet for the TCP socket locked by CPU2.

CPU1 spins forever in spin_lock() : deadlock

Stack trace on CPU1 looked like :

    NMI backtrace for cpu 1
    RIP: _raw_spin_lock+0x25/0x30
    ...
    Call Trace:
      <IRQ>
      tcp_v6_rcv+0x243/0x620
      ip6_input_finish+0x11f/0x330
      ip6_input+0x38/0x40
      ip6_rcv_finish+0x3c/0x90
      ipv6_rcv+0x2a9/0x500
      process_backlog+0x461/0xaa0
      net_rx_action+0x147/0x430
      __do_softirq+0x167/0x2d0
      call_softirq+0x1c/0x30
      do_softirq+0x3f/0x80
      irq_exit+0x6e/0xc0
      smp_call_function_single_interrupt+0x35/0x40
      call_function_single_interrupt+0x6a/0x70
      <EOI>
      printk+0x4d/0x4f
      printk_address+0x31/0x33
      print_trace_address+0x33/0x3c
      print_context_stack+0x7f/0x119
      dump_trace+0x26b/0x28e
      show_trace_log_lvl+0x4f/0x5c
      show_stack_log_lvl+0x104/0x113
      show_stack+0x42/0x44
      dump_stack+0x46/0x58
      netdev_rx_csum_fault+0x38/0x3c
      __skb_checksum_complete_head+0x6e/0x80
      __skb_checksum_complete+0x11/0x20
      tcp_rcv_established+0x2bd5/0x2fd0
      tcp_v6_do_rcv+0x13c/0x620
      sk_backlog_rcv+0x15/0x30
      release_sock+0xd2/0x150
      tcp_recvmsg+0x1c1/0xfc0
      inet_recvmsg+0x7d/0x90
      sock_recvmsg+0xaf/0xe0
      ___sys_recvmsg+0x111/0x3b0
      SyS_recvmsg+0x5c/0xb0
      system_call_fastpath+0x16/0x1b

Fixes: b58d977432c8 ("dump_stack: serialize the output from dump_stack()")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Alex Thorlton <athorlton@sgi.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/dump_stack.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'lib')

diff --git a/lib/dump_stack.c b/lib/dump_stack.c
index 6745c6230db3..c30d07e99dba 100644
--- a/lib/dump_stack.c
+++ b/lib/dump_stack.c
@@ -25,6 +25,7 @@ static atomic_t dump_lock = ATOMIC_INIT(-1);
 
 asmlinkage __visible void dump_stack(void)
 {
+	unsigned long flags;
 	int was_locked;
 	int old;
 	int cpu;
@@ -33,9 +34,8 @@ asmlinkage __visible void dump_stack(void)
 	 * Permit this cpu to perform nested stack dumps while serialising
 	 * against other CPUs
 	 */
-	preempt_disable();
-
 retry:
+	local_irq_save(flags);
 	cpu = smp_processor_id();
 	old = atomic_cmpxchg(&dump_lock, -1, cpu);
 	if (old == -1) {
@@ -43,6 +43,7 @@ retry:
 	} else if (old == cpu) {
 		was_locked = 1;
 	} else {
+		local_irq_restore(flags);
 		cpu_relax();
 		goto retry;
 	}
@@ -52,7 +53,7 @@ retry:
 	if (!was_locked)
 		atomic_set(&dump_lock, -1);
 
-	preempt_enable();
+	local_irq_restore(flags);
 }
 #else
 asmlinkage __visible void dump_stack(void)
-- 
cgit 


From 00cd29b799e3449f0c68b1cc77cd4a5f95b42d17 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Wed, 13 Jan 2016 08:10:31 -0800
Subject: klist: fix starting point removed bug in klist iterators

The starting node for a klist iteration is often passed in from
somewhere way above the klist infrastructure, meaning there's no
guarantee the node is still on the list.  We've seen this in SCSI where
we use bus_find_device() to iterate through a list of devices.  In the
face of heavy hotplug activity, the last device returned by
bus_find_device() can be removed before the next call.  This leads to

Dec  3 13:22:02 localhost kernel: WARNING: CPU: 2 PID: 28073 at include/linux/kref.h:47 klist_iter_init_node+0x3d/0x50()
Dec  3 13:22:02 localhost kernel: Modules linked in: scsi_debug x86_pkg_temp_thermal kvm_intel kvm irqbypass crc32c_intel joydev iTCO_wdt dcdbas ipmi_devintf acpi_power_meter iTCO_vendor_support ipmi_si imsghandler pcspkr wmi acpi_cpufreq tpm_tis tpm shpchp lpc_ich mfd_core nfsd nfs_acl lockd grace sunrpc tg3 ptp pps_core
Dec  3 13:22:02 localhost kernel: CPU: 2 PID: 28073 Comm: cat Not tainted 4.4.0-rc1+ #2
Dec  3 13:22:02 localhost kernel: Hardware name: Dell Inc. PowerEdge R320/08VT7V, BIOS 2.0.22 11/19/2013
Dec  3 13:22:02 localhost kernel: ffffffff81a20e77 ffff880613acfd18 ffffffff81321eef 0000000000000000
Dec  3 13:22:02 localhost kernel: ffff880613acfd50 ffffffff8107ca52 ffff88061176b198 0000000000000000
Dec  3 13:22:02 localhost kernel: ffffffff814542b0 ffff880610cfb100 ffff88061176b198 ffff880613acfd60
Dec  3 13:22:02 localhost kernel: Call Trace:
Dec  3 13:22:02 localhost kernel: [<ffffffff81321eef>] dump_stack+0x44/0x55
Dec  3 13:22:02 localhost kernel: [<ffffffff8107ca52>] warn_slowpath_common+0x82/0xc0
Dec  3 13:22:02 localhost kernel: [<ffffffff814542b0>] ? proc_scsi_show+0x20/0x20
Dec  3 13:22:02 localhost kernel: [<ffffffff8107cb4a>] warn_slowpath_null+0x1a/0x20
Dec  3 13:22:02 localhost kernel: [<ffffffff8167225d>] klist_iter_init_node+0x3d/0x50
Dec  3 13:22:02 localhost kernel: [<ffffffff81421d41>] bus_find_device+0x51/0xb0
Dec  3 13:22:02 localhost kernel: [<ffffffff814545ad>] scsi_seq_next+0x2d/0x40
[...]

And an eventual crash. It can actually occur in any hotplug system
which has a device finder and a starting device.

We can fix this globally by making sure the starting node for
klist_iter_init_node() is actually a member of the list before using it
(and by starting from the beginning if it isn't).

Reported-by: Ewan D. Milne <emilne@redhat.com>
Tested-by: Ewan D. Milne <emilne@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 lib/klist.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'lib')

diff --git a/lib/klist.c b/lib/klist.c
index d74cf7a29afd..0507fa5d84c5 100644
--- a/lib/klist.c
+++ b/lib/klist.c
@@ -282,9 +282,9 @@ void klist_iter_init_node(struct klist *k, struct klist_iter *i,
 			  struct klist_node *n)
 {
 	i->i_klist = k;
-	i->i_cur = n;
-	if (n)
-		kref_get(&n->n_ref);
+	i->i_cur = NULL;
+	if (n && kref_get_unless_zero(&n->n_ref))
+		i->i_cur = n;
 }
 EXPORT_SYMBOL_GPL(klist_iter_init_node);
 
-- 
cgit 


From 20af74ef140f0fbc477f90817c58241107782e10 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@163.com>
Date: Sun, 27 Dec 2015 18:46:05 +0800
Subject: devres: use to_pci_dev()

Use to_pci_dev() instead of open-coding it.

Signed-off-by: Geliang Tang <geliangtang@163.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 lib/devres.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/devres.c b/lib/devres.c
index 8c85672639d3..cb1464c411a2 100644
--- a/lib/devres.c
+++ b/lib/devres.c
@@ -236,7 +236,7 @@ struct pcim_iomap_devres {
 
 static void pcim_iomap_release(struct device *gendev, void *res)
 {
-	struct pci_dev *dev = container_of(gendev, struct pci_dev, dev);
+	struct pci_dev *dev = to_pci_dev(gendev);
 	struct pcim_iomap_devres *this = res;
 	int i;
 
-- 
cgit 


From 4ba6a2b28f111e4c9621487612056d10f3f4a6ca Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 8 Feb 2016 16:09:08 +0900
Subject: scatterlist: fix a typo in comment block of sg_miter_stop()

Fix the doubled "started" and tidy up the following sentences.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/scatterlist.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'lib')

diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index bafa9933fa76..004fc70fc56a 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -598,9 +598,9 @@ EXPORT_SYMBOL(sg_miter_next);
  *
  * Description:
  *   Stops mapping iterator @miter.  @miter should have been started
- *   started using sg_miter_start().  A stopped iteration can be
- *   resumed by calling sg_miter_next() on it.  This is useful when
- *   resources (kmap) need to be released during iteration.
+ *   using sg_miter_start().  A stopped iteration can be resumed by
+ *   calling sg_miter_next() on it.  This is useful when resources (kmap)
+ *   need to be released during iteration.
  *
  * Context:
  *   Preemption disabled if the SG_MITER_ATOMIC is set.  Don't care
-- 
cgit 


From 975db45e9cc561bf8a7eddfa0705d3a078ec184f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 8 Feb 2016 15:36:52 +0100
Subject: locking/static_keys: Avoid nested functions

clang does not support nested functions inside of an array definition:

  lib/test_static_keys.c:105:16: error: function definition is not allowed here
                          .test_key       = test_key_func(&old_true_key, static_key_true),
  lib/test_static_keys.c:50:20: note: expanded from macro 'test_key_func'
          ({bool func(void) { return branch(key); } func; })

That code appears to be a little too clever, so this simplifies it
a bit by defining functions outside of the array.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Jason Baron <jbaron@akamai.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lkml.kernel.org/r/1454942223-2781480-1-git-send-email-arnd@arndb.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 lib/test_static_keys.c | 62 ++++++++++++++++++++++++++++++++++----------------
 1 file changed, 42 insertions(+), 20 deletions(-)

(limited to 'lib')

diff --git a/lib/test_static_keys.c b/lib/test_static_keys.c
index c61b299e367f..915d75df2086 100644
--- a/lib/test_static_keys.c
+++ b/lib/test_static_keys.c
@@ -46,8 +46,11 @@ struct test_key {
 	bool			(*test_key)(void);
 };
 
-#define test_key_func(key, branch) \
-	({bool func(void) { return branch(key); } func;	})
+#define test_key_func(key, branch)	\
+static bool key ## _ ## branch(void)	\
+{					\
+	return branch(&key);		\
+}
 
 static void invert_key(struct static_key *key)
 {
@@ -92,6 +95,25 @@ static int verify_keys(struct test_key *keys, int size, bool invert)
 	return 0;
 }
 
+test_key_func(old_true_key, static_key_true)
+test_key_func(old_false_key, static_key_false)
+test_key_func(true_key, static_branch_likely)
+test_key_func(true_key, static_branch_unlikely)
+test_key_func(false_key, static_branch_likely)
+test_key_func(false_key, static_branch_unlikely)
+test_key_func(base_old_true_key, static_key_true)
+test_key_func(base_inv_old_true_key, static_key_true)
+test_key_func(base_old_false_key, static_key_false)
+test_key_func(base_inv_old_false_key, static_key_false)
+test_key_func(base_true_key, static_branch_likely)
+test_key_func(base_true_key, static_branch_unlikely)
+test_key_func(base_inv_true_key, static_branch_likely)
+test_key_func(base_inv_true_key, static_branch_unlikely)
+test_key_func(base_false_key, static_branch_likely)
+test_key_func(base_false_key, static_branch_unlikely)
+test_key_func(base_inv_false_key, static_branch_likely)
+test_key_func(base_inv_false_key, static_branch_unlikely)
+
 static int __init test_static_key_init(void)
 {
 	int ret;
@@ -102,95 +124,95 @@ static int __init test_static_key_init(void)
 		{
 			.init_state	= true,
 			.key		= &old_true_key,
-			.test_key	= test_key_func(&old_true_key, static_key_true),
+			.test_key	= &old_true_key_static_key_true,
 		},
 		{
 			.init_state	= false,
 			.key		= &old_false_key,
-			.test_key	= test_key_func(&old_false_key, static_key_false),
+			.test_key	= &old_false_key_static_key_false,
 		},
 		/* internal keys - new keys */
 		{
 			.init_state	= true,
 			.key		= &true_key.key,
-			.test_key	= test_key_func(&true_key, static_branch_likely),
+			.test_key	= &true_key_static_branch_likely,
 		},
 		{
 			.init_state	= true,
 			.key		= &true_key.key,
-			.test_key	= test_key_func(&true_key, static_branch_unlikely),
+			.test_key	= &true_key_static_branch_unlikely,
 		},
 		{
 			.init_state	= false,
 			.key		= &false_key.key,
-			.test_key	= test_key_func(&false_key, static_branch_likely),
+			.test_key	= &false_key_static_branch_likely,
 		},
 		{
 			.init_state	= false,
 			.key		= &false_key.key,
-			.test_key	= test_key_func(&false_key, static_branch_unlikely),
+			.test_key	= &false_key_static_branch_unlikely,
 		},
 		/* external keys - old keys */
 		{
 			.init_state	= true,
 			.key		= &base_old_true_key,
-			.test_key	= test_key_func(&base_old_true_key, static_key_true),
+			.test_key	= &base_old_true_key_static_key_true,
 		},
 		{
 			.init_state	= false,
 			.key		= &base_inv_old_true_key,
-			.test_key	= test_key_func(&base_inv_old_true_key, static_key_true),
+			.test_key	= &base_inv_old_true_key_static_key_true,
 		},
 		{
 			.init_state	= false,
 			.key		= &base_old_false_key,
-			.test_key	= test_key_func(&base_old_false_key, static_key_false),
+			.test_key	= &base_old_false_key_static_key_false,
 		},
 		{
 			.init_state	= true,
 			.key		= &base_inv_old_false_key,
-			.test_key	= test_key_func(&base_inv_old_false_key, static_key_false),
+			.test_key	= &base_inv_old_false_key_static_key_false,
 		},
 		/* external keys - new keys */
 		{
 			.init_state	= true,
 			.key		= &base_true_key.key,
-			.test_key	= test_key_func(&base_true_key, static_branch_likely),
+			.test_key	= &base_true_key_static_branch_likely,
 		},
 		{
 			.init_state	= true,
 			.key		= &base_true_key.key,
-			.test_key	= test_key_func(&base_true_key, static_branch_unlikely),
+			.test_key	= &base_true_key_static_branch_unlikely,
 		},
 		{
 			.init_state	= false,
 			.key		= &base_inv_true_key.key,
-			.test_key	= test_key_func(&base_inv_true_key, static_branch_likely),
+			.test_key	= &base_inv_true_key_static_branch_likely,
 		},
 		{
 			.init_state	= false,
 			.key		= &base_inv_true_key.key,
-			.test_key	= test_key_func(&base_inv_true_key, static_branch_unlikely),
+			.test_key	= &base_inv_true_key_static_branch_unlikely,
 		},
 		{
 			.init_state	= false,
 			.key		= &base_false_key.key,
-			.test_key	= test_key_func(&base_false_key, static_branch_likely),
+			.test_key	= &base_false_key_static_branch_likely,
 		},
 		{
 			.init_state	= false,
 			.key		= &base_false_key.key,
-			.test_key	= test_key_func(&base_false_key, static_branch_unlikely),
+			.test_key	= &base_false_key_static_branch_unlikely,
 		},
 		{
 			.init_state	= true,
 			.key		= &base_inv_false_key.key,
-			.test_key	= test_key_func(&base_inv_false_key, static_branch_likely),
+			.test_key	= &base_inv_false_key_static_branch_likely,
 		},
 		{
 			.init_state	= true,
 			.key		= &base_inv_false_key.key,
-			.test_key	= test_key_func(&base_inv_false_key, static_branch_unlikely),
+			.test_key	= &base_inv_false_key_static_branch_unlikely,
 		},
 	};
 
-- 
cgit 


From f303fccb82928790ec58eea82722bd5c54d300b3 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 9 Feb 2016 17:59:38 -0500
Subject: workqueue: implement "workqueue.debug_force_rr_cpu" debug feature

Workqueue used to guarantee local execution for work items queued
without explicit target CPU.  The guarantee is gone now which can
break some usages in subtle ways.  To flush out those cases, this
patch implements a debug feature which forces round-robin CPU
selection for all such work items.

The debug feature defaults to off and can be enabled with a kernel
parameter.  The default can be flipped with a debug config option.

If you hit this commit during bisection, please refer to 041bd12e272c
("Revert "workqueue: make sure delayed work run in local cpu"") for
more information and ping me.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 lib/Kconfig.debug | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'lib')

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index ecb9e75614bf..8bfd1aca7a3d 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1400,6 +1400,21 @@ config RCU_EQS_DEBUG
 
 endmenu # "RCU Debugging"
 
+config DEBUG_WQ_FORCE_RR_CPU
+	bool "Force round-robin CPU selection for unbound work items"
+	depends on DEBUG_KERNEL
+	default n
+	help
+	  Workqueue used to implicitly guarantee that work items queued
+	  without explicit CPU specified are put on the local CPU.  This
+	  guarantee is no longer true and while local CPU is still
+	  preferred work items may be put on foreign CPUs.  Kernel
+	  parameter "workqueue.debug_force_rr_cpu" is added to force
+	  round-robin CPU selection to flush out usages which depend on the
+	  now broken guarantee.  This config option enables the debug
+	  feature by default.  When enabled, memory and cache locality will
+	  be impacted.
+
 config DEBUG_BLOCK_EXT_DEVT
         bool "Force extended block device numbers and spread them"
 	depends on DEBUG_KERNEL
-- 
cgit 


From 2fe829aca9d7bed5fd6b49c6a1452e5e486b6cc3 Mon Sep 17 00:00:00 2001
From: Gabriel Somlo <somlo@cmu.edu>
Date: Thu, 28 Jan 2016 09:23:12 -0500
Subject: kobject: export kset_find_obj() for module use

Signed-off-by: Gabriel Somlo <somlo@cmu.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 lib/kobject.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'lib')

diff --git a/lib/kobject.c b/lib/kobject.c
index 7cbccd2b4c72..445dcaeb0f56 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -861,6 +861,7 @@ struct kobject *kset_find_obj(struct kset *kset, const char *name)
 	spin_unlock(&kset->list_lock);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(kset_find_obj);
 
 static void kset_release(struct kobject *kobj)
 {
-- 
cgit 


From 73500267c930baadadb0d02284909731baf151f7 Mon Sep 17 00:00:00 2001
From: Peter Jones <pjones@redhat.com>
Date: Mon, 8 Feb 2016 14:48:11 -0500
Subject: lib/ucs2_string: Add ucs2 -> utf8 helper functions

This adds ucs2_utf8size(), which tells us how big our ucs2 string is in
bytes, and ucs2_as_utf8, which translates from ucs2 to utf8..

Signed-off-by: Peter Jones <pjones@redhat.com>
Tested-by: Lee, Chun-Yi <jlee@suse.com>
Acked-by: Matthew Garrett <mjg59@coreos.com>
Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
---
 lib/ucs2_string.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

(limited to 'lib')

diff --git a/lib/ucs2_string.c b/lib/ucs2_string.c
index 6f500ef2301d..17dd74e21ef9 100644
--- a/lib/ucs2_string.c
+++ b/lib/ucs2_string.c
@@ -49,3 +49,65 @@ ucs2_strncmp(const ucs2_char_t *a, const ucs2_char_t *b, size_t len)
         }
 }
 EXPORT_SYMBOL(ucs2_strncmp);
+
+unsigned long
+ucs2_utf8size(const ucs2_char_t *src)
+{
+	unsigned long i;
+	unsigned long j = 0;
+
+	for (i = 0; i < ucs2_strlen(src); i++) {
+		u16 c = src[i];
+
+		if (c > 0x800)
+			j += 3;
+		else if (c > 0x80)
+			j += 2;
+		else
+			j += 1;
+	}
+
+	return j;
+}
+EXPORT_SYMBOL(ucs2_utf8size);
+
+/*
+ * copy at most maxlength bytes of whole utf8 characters to dest from the
+ * ucs2 string src.
+ *
+ * The return value is the number of characters copied, not including the
+ * final NUL character.
+ */
+unsigned long
+ucs2_as_utf8(u8 *dest, const ucs2_char_t *src, unsigned long maxlength)
+{
+	unsigned int i;
+	unsigned long j = 0;
+	unsigned long limit = ucs2_strnlen(src, maxlength);
+
+	for (i = 0; maxlength && i < limit; i++) {
+		u16 c = src[i];
+
+		if (c > 0x800) {
+			if (maxlength < 3)
+				break;
+			maxlength -= 3;
+			dest[j++] = 0xe0 | (c & 0xf000) >> 12;
+			dest[j++] = 0x80 | (c & 0x0fc0) >> 8;
+			dest[j++] = 0x80 | (c & 0x003f);
+		} else if (c > 0x80) {
+			if (maxlength < 2)
+				break;
+			maxlength -= 2;
+			dest[j++] = 0xc0 | (c & 0xfe0) >> 5;
+			dest[j++] = 0x80 | (c & 0x01f);
+		} else {
+			maxlength -= 1;
+			dest[j++] = c & 0x7f;
+		}
+	}
+	if (maxlength)
+		dest[j] = '\0';
+	return j;
+}
+EXPORT_SYMBOL(ucs2_as_utf8);
-- 
cgit 


From 7707535ab95e2231b6d7f2bfb4f27558e83c4dc2 Mon Sep 17 00:00:00 2001
From: Yang Shi <yang.shi@linaro.org>
Date: Thu, 11 Feb 2016 16:12:55 -0800
Subject: ubsan: cosmetic fix to Kconfig text

When enabling UBSAN_SANITIZE_ALL, the kernel image size gets increased
significantly (~3x).  So, it sounds better to have some note in Kconfig.

And, fixed a typo.

Signed-off-by: Yang Shi <yang.shi@linaro.org>
Acked-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/Kconfig.ubsan | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan
index 49518fb48cab..e07c1ba9ba13 100644
--- a/lib/Kconfig.ubsan
+++ b/lib/Kconfig.ubsan
@@ -18,6 +18,8 @@ config UBSAN_SANITIZE_ALL
 	  This option activates instrumentation for the entire kernel.
 	  If you don't enable this option, you have to explicitly specify
 	  UBSAN_SANITIZE := y for the files/directories you want to check for UB.
+	  Enabling this option will get kernel image size increased
+	  significantly.
 
 config UBSAN_ALIGNMENT
 	bool "Enable checking of pointers alignment"
@@ -25,5 +27,5 @@ config UBSAN_ALIGNMENT
 	default y if !HAVE_EFFICIENT_UNALIGNED_ACCESS
 	help
 	  This option enables detection of unaligned memory accesses.
-	  Enabling this option on architectures that support unalligned
+	  Enabling this option on architectures that support unaligned
 	  accesses may produce a lot of false positives.
-- 
cgit 


From 7eb391299419a03cbe0fa5ab0e6b0932e42c7a36 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Thu, 11 Feb 2016 16:13:00 -0800
Subject: vsprintf: kptr_restrict is okay in IRQ when 2

The kptr_restrict flag, when set to 1, only prints the kernel address
when the user has CAP_SYSLOG.  When it is set to 2, the kernel address
is always printed as zero.  When set to 1, this needs to check whether
or not we're in IRQ.

However, when set to 2, this check is unneccessary, and produces
confusing results in dmesg.  Thus, only make sure we're not in IRQ when
mode 1 is used, but not mode 2.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/vsprintf.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

(limited to 'lib')

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 48ff9c36644d..f44e178e6ede 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -1590,22 +1590,23 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
 			return buf;
 		}
 	case 'K':
-		/*
-		 * %pK cannot be used in IRQ context because its test
-		 * for CAP_SYSLOG would be meaningless.
-		 */
-		if (kptr_restrict && (in_irq() || in_serving_softirq() ||
-				      in_nmi())) {
-			if (spec.field_width == -1)
-				spec.field_width = default_width;
-			return string(buf, end, "pK-error", spec);
-		}
-
 		switch (kptr_restrict) {
 		case 0:
 			/* Always print %pK values */
 			break;
 		case 1: {
+			const struct cred *cred;
+
+			/*
+			 * kptr_restrict==1 cannot be used in IRQ context
+			 * because its test for CAP_SYSLOG would be meaningless.
+			 */
+			if (in_irq() || in_serving_softirq() || in_nmi()) {
+				if (spec.field_width == -1)
+					spec.field_width = default_width;
+				return string(buf, end, "pK-error", spec);
+			}
+
 			/*
 			 * Only print the real pointer value if the current
 			 * process has CAP_SYSLOG and is running with the
@@ -1615,8 +1616,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
 			 * leak pointer values if a binary opens a file using
 			 * %pK and then elevates privileges before reading it.
 			 */
-			const struct cred *cred = current_cred();
-
+			cred = current_cred();
 			if (!has_capability_noaudit(current, CAP_SYSLOG) ||
 			    !uid_eq(cred->euid, cred->uid) ||
 			    !gid_eq(cred->egid, cred->gid))
-- 
cgit 


From fc4fa6e112c0f999fab022a4eb7f6614bb47c7ab Mon Sep 17 00:00:00 2001
From: Masanari Iida <standby24x7@gmail.com>
Date: Sun, 13 Dec 2015 15:26:11 +0900
Subject: treewide: Fix typo in printk

This patch fix spelling typos found in printk and Kconfig.

Signed-off-by: Masanari Iida <standby24x7@gmail.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 lib/842/842_decompress.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/842/842_decompress.c b/lib/842/842_decompress.c
index 8881dad2a6a0..a2a941f8112d 100644
--- a/lib/842/842_decompress.c
+++ b/lib/842/842_decompress.c
@@ -250,7 +250,7 @@ static int do_op(struct sw842_param *p, u8 o)
 		case OP_ACTION_NOOP:
 			break;
 		default:
-			pr_err("Interal error, invalid op %x\n", op);
+			pr_err("Internal error, invalid op %x\n", op);
 			return -EINVAL;
 		}
 
-- 
cgit 


From bdb428c82aab5d8aba6fbb29dde2cf678eadc432 Mon Sep 17 00:00:00 2001
From: Bogdan Sikora <bsikora@redhat.com>
Date: Sun, 27 Dec 2015 14:58:23 +0100
Subject: lib+mm: fix few spelling mistakes

All are in comments.

Signed-off-by: Bogdan Sikora <bsikora@redhat.com>
Cc: <linux-mm@kvack.org>
Cc: Rafael Aquini <aquini@redhat.com>
Cc: Kent Overstreet <kmo@daterainc.com>
Cc: Jan Kara <jack@suse.cz>
[jkosina@suse.cz: more fixup]
Acked-by: Rafael Aquini <aquini@redhat.com>
Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 lib/flex_proportions.c | 2 +-
 lib/percpu-refcount.c  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'lib')

diff --git a/lib/flex_proportions.c b/lib/flex_proportions.c
index 8f25652f40d4..a71cf1bdd4c9 100644
--- a/lib/flex_proportions.c
+++ b/lib/flex_proportions.c
@@ -17,7 +17,7 @@
  *
  *   \Sum_{j} p_{j} = 1,
  *
- * This formula can be straightforwardly computed by maintaing denominator
+ * This formula can be straightforwardly computed by maintaining denominator
  * (let's call it 'd') and for each event type its numerator (let's call it
  * 'n_j'). When an event of type 'j' happens, we simply need to do:
  *   n_j++; d++;
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index 6111bcb28376..27fe74948882 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -12,7 +12,7 @@
  * particular cpu can (and will) wrap - this is fine, when we go to shutdown the
  * percpu counters will all sum to the correct value
  *
- * (More precisely: because moduler arithmatic is commutative the sum of all the
+ * (More precisely: because modular arithmetic is commutative the sum of all the
  * percpu_count vars will be equal to what it would have been if all the gets
  * and puts were done to a single integer, even if some of the percpu integers
  * overflow or underflow).
-- 
cgit 


From a68075908a37850918ad96b056acc9ac4ce1bd90 Mon Sep 17 00:00:00 2001
From: Jason Andryuk <jandryuk@gmail.com>
Date: Fri, 12 Feb 2016 23:13:33 +0000
Subject: lib/ucs2_string: Correct ucs2 -> utf8 conversion

The comparisons should be >= since 0x800 and 0x80 require an additional bit
to store.

For the 3 byte case, the existing shift would drop off 2 more bits than
intended.

For the 2 byte case, there should be 5 bits bits in byte 1, and 6 bits in
byte 2.

Signed-off-by: Jason Andryuk <jandryuk@gmail.com>
Reviewed-by: Laszlo Ersek <lersek@redhat.com>
Cc: Peter Jones <pjones@redhat.com>
Cc: Matthew Garrett <mjg59@coreos.com>
Cc: "Lee, Chun-Yi" <jlee@suse.com>
Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
---
 lib/ucs2_string.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'lib')

diff --git a/lib/ucs2_string.c b/lib/ucs2_string.c
index 17dd74e21ef9..f0b323abb4c6 100644
--- a/lib/ucs2_string.c
+++ b/lib/ucs2_string.c
@@ -59,9 +59,9 @@ ucs2_utf8size(const ucs2_char_t *src)
 	for (i = 0; i < ucs2_strlen(src); i++) {
 		u16 c = src[i];
 
-		if (c > 0x800)
+		if (c >= 0x800)
 			j += 3;
-		else if (c > 0x80)
+		else if (c >= 0x80)
 			j += 2;
 		else
 			j += 1;
@@ -88,19 +88,19 @@ ucs2_as_utf8(u8 *dest, const ucs2_char_t *src, unsigned long maxlength)
 	for (i = 0; maxlength && i < limit; i++) {
 		u16 c = src[i];
 
-		if (c > 0x800) {
+		if (c >= 0x800) {
 			if (maxlength < 3)
 				break;
 			maxlength -= 3;
 			dest[j++] = 0xe0 | (c & 0xf000) >> 12;
-			dest[j++] = 0x80 | (c & 0x0fc0) >> 8;
+			dest[j++] = 0x80 | (c & 0x0fc0) >> 6;
 			dest[j++] = 0x80 | (c & 0x003f);
-		} else if (c > 0x80) {
+		} else if (c >= 0x80) {
 			if (maxlength < 2)
 				break;
 			maxlength -= 2;
-			dest[j++] = 0xc0 | (c & 0xfe0) >> 5;
-			dest[j++] = 0x80 | (c & 0x01f);
+			dest[j++] = 0xc0 | (c & 0x7c0) >> 6;
+			dest[j++] = 0x80 | (c & 0x03f);
 		} else {
 			maxlength -= 1;
 			dest[j++] = c & 0x7f;
-- 
cgit 


From e52bc7c28ac9f54db6f86b19ed65c599def18c98 Mon Sep 17 00:00:00 2001
From: David Decotigny <decot@googlers.com>
Date: Fri, 19 Feb 2016 09:23:59 -0500
Subject: lib/bitmap.c: conversion routines to/from u32 array

Aimed at transferring bitmaps to/from user-space in a 32/64-bit agnostic
way.

Tested:
  unit tests (next patch) on qemu i386, x86_64, ppc, ppc64 BE and LE,
  ARM.

Signed-off-by: David Decotigny <decot@googlers.com>
Reviewed-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/bitmap.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 89 insertions(+)

(limited to 'lib')

diff --git a/lib/bitmap.c b/lib/bitmap.c
index 814814397cce..c66da508cbf7 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -12,6 +12,8 @@
 #include <linux/bitmap.h>
 #include <linux/bitops.h>
 #include <linux/bug.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
 
 #include <asm/page.h>
 #include <asm/uaccess.h>
@@ -1059,6 +1061,93 @@ int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order)
 }
 EXPORT_SYMBOL(bitmap_allocate_region);
 
+/**
+ * bitmap_from_u32array - copy the contents of a u32 array of bits to bitmap
+ *	@bitmap: array of unsigned longs, the destination bitmap, non NULL
+ *	@nbits: number of bits in @bitmap
+ *	@buf: array of u32 (in host byte order), the source bitmap, non NULL
+ *	@nwords: number of u32 words in @buf
+ *
+ * copy min(nbits, 32*nwords) bits from @buf to @bitmap, remaining
+ * bits between nword and nbits in @bitmap (if any) are cleared. In
+ * last word of @bitmap, the bits beyond nbits (if any) are kept
+ * unchanged.
+ *
+ * Return the number of bits effectively copied.
+ */
+unsigned int
+bitmap_from_u32array(unsigned long *bitmap, unsigned int nbits,
+		     const u32 *buf, unsigned int nwords)
+{
+	unsigned int dst_idx, src_idx;
+
+	for (src_idx = dst_idx = 0; dst_idx < BITS_TO_LONGS(nbits); ++dst_idx) {
+		unsigned long part = 0;
+
+		if (src_idx < nwords)
+			part = buf[src_idx++];
+
+#if BITS_PER_LONG == 64
+		if (src_idx < nwords)
+			part |= ((unsigned long) buf[src_idx++]) << 32;
+#endif
+
+		if (dst_idx < nbits/BITS_PER_LONG)
+			bitmap[dst_idx] = part;
+		else {
+			unsigned long mask = BITMAP_LAST_WORD_MASK(nbits);
+
+			bitmap[dst_idx] = (bitmap[dst_idx] & ~mask)
+				| (part & mask);
+		}
+	}
+
+	return min_t(unsigned int, nbits, 32*nwords);
+}
+EXPORT_SYMBOL(bitmap_from_u32array);
+
+/**
+ * bitmap_to_u32array - copy the contents of bitmap to a u32 array of bits
+ *	@buf: array of u32 (in host byte order), the dest bitmap, non NULL
+ *	@nwords: number of u32 words in @buf
+ *	@bitmap: array of unsigned longs, the source bitmap, non NULL
+ *	@nbits: number of bits in @bitmap
+ *
+ * copy min(nbits, 32*nwords) bits from @bitmap to @buf. Remaining
+ * bits after nbits in @buf (if any) are cleared.
+ *
+ * Return the number of bits effectively copied.
+ */
+unsigned int
+bitmap_to_u32array(u32 *buf, unsigned int nwords,
+		   const unsigned long *bitmap, unsigned int nbits)
+{
+	unsigned int dst_idx = 0, src_idx = 0;
+
+	while (dst_idx < nwords) {
+		unsigned long part = 0;
+
+		if (src_idx < BITS_TO_LONGS(nbits)) {
+			part = bitmap[src_idx];
+			if (src_idx >= nbits/BITS_PER_LONG)
+				part &= BITMAP_LAST_WORD_MASK(nbits);
+			src_idx++;
+		}
+
+		buf[dst_idx++] = part & 0xffffffffUL;
+
+#if BITS_PER_LONG == 64
+		if (dst_idx < nwords) {
+			part >>= 32;
+			buf[dst_idx++] = part & 0xffffffffUL;
+		}
+#endif
+	}
+
+	return min_t(unsigned int, nbits, 32*nwords);
+}
+EXPORT_SYMBOL(bitmap_to_u32array);
+
 /**
  * bitmap_copy_le - copy a bitmap, putting the bits into little-endian order.
  * @dst:   destination buffer
-- 
cgit 


From 5fd003f56c2c584b62a0486ad25bbd4be02b8b6c Mon Sep 17 00:00:00 2001
From: David Decotigny <decot@googlers.com>
Date: Fri, 19 Feb 2016 09:24:00 -0500
Subject: test_bitmap: unit tests for lib/bitmap.c

This is mainly testing bitmap construction and conversion to/from u32[]
for now.

Tested:
  qemu i386, x86_64, ppc, ppc64 BE and LE, ARM.

Signed-off-by: David Decotigny <decot@googlers.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/Kconfig.debug |   8 ++
 lib/Makefile      |   1 +
 lib/test_bitmap.c | 358 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 367 insertions(+)
 create mode 100644 lib/test_bitmap.c

(limited to 'lib')

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index ecb9e75614bf..f890ee5e1385 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1738,6 +1738,14 @@ config TEST_KSTRTOX
 config TEST_PRINTF
 	tristate "Test printf() family of functions at runtime"
 
+config TEST_BITMAP
+	tristate "Test bitmap_*() family of functions at runtime"
+	default n
+	help
+	  Enable this option to test the bitmap functions at boot.
+
+	  If unsure, say N.
+
 config TEST_RHASHTABLE
 	tristate "Perform selftest on resizable hash table"
 	default n
diff --git a/lib/Makefile b/lib/Makefile
index a7c26a41a738..dda4039588b1 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -43,6 +43,7 @@ obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o
 obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_keys.o
 obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o
 obj-$(CONFIG_TEST_PRINTF) += test_printf.o
+obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o
 
 ifeq ($(CONFIG_DEBUG_KOBJECT),y)
 CFLAGS_kobject.o += -DDEBUG
diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c
new file mode 100644
index 000000000000..e2cbd43d193c
--- /dev/null
+++ b/lib/test_bitmap.c
@@ -0,0 +1,358 @@
+/*
+ * Test cases for printf facility.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/bitmap.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+
+static unsigned total_tests __initdata;
+static unsigned failed_tests __initdata;
+
+static char pbl_buffer[PAGE_SIZE] __initdata;
+
+
+static bool __init
+__check_eq_uint(const char *srcfile, unsigned int line,
+		const unsigned int exp_uint, unsigned int x)
+{
+	if (exp_uint != x) {
+		pr_warn("[%s:%u] expected %u, got %u\n",
+			srcfile, line, exp_uint, x);
+		return false;
+	}
+	return true;
+}
+
+
+static bool __init
+__check_eq_bitmap(const char *srcfile, unsigned int line,
+		  const unsigned long *exp_bmap, unsigned int exp_nbits,
+		  const unsigned long *bmap, unsigned int nbits)
+{
+	if (exp_nbits != nbits) {
+		pr_warn("[%s:%u] bitmap length mismatch: expected %u, got %u\n",
+			srcfile, line, exp_nbits, nbits);
+		return false;
+	}
+
+	if (!bitmap_equal(exp_bmap, bmap, nbits)) {
+		pr_warn("[%s:%u] bitmaps contents differ: expected \"%*pbl\", got \"%*pbl\"\n",
+			srcfile, line,
+			exp_nbits, exp_bmap, nbits, bmap);
+		return false;
+	}
+	return true;
+}
+
+static bool __init
+__check_eq_pbl(const char *srcfile, unsigned int line,
+	       const char *expected_pbl,
+	       const unsigned long *bitmap, unsigned int nbits)
+{
+	snprintf(pbl_buffer, sizeof(pbl_buffer), "%*pbl", nbits, bitmap);
+	if (strcmp(expected_pbl, pbl_buffer)) {
+		pr_warn("[%s:%u] expected \"%s\", got \"%s\"\n",
+			srcfile, line,
+			expected_pbl, pbl_buffer);
+		return false;
+	}
+	return true;
+}
+
+static bool __init
+__check_eq_u32_array(const char *srcfile, unsigned int line,
+		     const u32 *exp_arr, unsigned int exp_len,
+		     const u32 *arr, unsigned int len)
+{
+	if (exp_len != len) {
+		pr_warn("[%s:%u] array length differ: expected %u, got %u\n",
+			srcfile, line,
+			exp_len, len);
+		return false;
+	}
+
+	if (memcmp(exp_arr, arr, len*sizeof(*arr))) {
+		pr_warn("[%s:%u] array contents differ\n", srcfile, line);
+		print_hex_dump(KERN_WARNING, "  exp:  ", DUMP_PREFIX_OFFSET,
+			       32, 4, exp_arr, exp_len*sizeof(*exp_arr), false);
+		print_hex_dump(KERN_WARNING, "  got:  ", DUMP_PREFIX_OFFSET,
+			       32, 4, arr, len*sizeof(*arr), false);
+		return false;
+	}
+
+	return true;
+}
+
+#define __expect_eq(suffix, ...)					\
+	({								\
+		int result = 0;						\
+		total_tests++;						\
+		if (!__check_eq_ ## suffix(__FILE__, __LINE__,		\
+					   ##__VA_ARGS__)) {		\
+			failed_tests++;					\
+			result = 1;					\
+		}							\
+		result;							\
+	})
+
+#define expect_eq_uint(...)		__expect_eq(uint, ##__VA_ARGS__)
+#define expect_eq_bitmap(...)		__expect_eq(bitmap, ##__VA_ARGS__)
+#define expect_eq_pbl(...)		__expect_eq(pbl, ##__VA_ARGS__)
+#define expect_eq_u32_array(...)	__expect_eq(u32_array, ##__VA_ARGS__)
+
+static void __init test_zero_fill_copy(void)
+{
+	DECLARE_BITMAP(bmap1, 1024);
+	DECLARE_BITMAP(bmap2, 1024);
+
+	bitmap_zero(bmap1, 1024);
+	bitmap_zero(bmap2, 1024);
+
+	/* single-word bitmaps */
+	expect_eq_pbl("", bmap1, 23);
+
+	bitmap_fill(bmap1, 19);
+	expect_eq_pbl("0-18", bmap1, 1024);
+
+	bitmap_copy(bmap2, bmap1, 23);
+	expect_eq_pbl("0-18", bmap2, 1024);
+
+	bitmap_fill(bmap2, 23);
+	expect_eq_pbl("0-22", bmap2, 1024);
+
+	bitmap_copy(bmap2, bmap1, 23);
+	expect_eq_pbl("0-18", bmap2, 1024);
+
+	bitmap_zero(bmap1, 23);
+	expect_eq_pbl("", bmap1, 1024);
+
+	/* multi-word bitmaps */
+	bitmap_zero(bmap1, 1024);
+	expect_eq_pbl("", bmap1, 1024);
+
+	bitmap_fill(bmap1, 109);
+	expect_eq_pbl("0-108", bmap1, 1024);
+
+	bitmap_copy(bmap2, bmap1, 1024);
+	expect_eq_pbl("0-108", bmap2, 1024);
+
+	bitmap_fill(bmap2, 1024);
+	expect_eq_pbl("0-1023", bmap2, 1024);
+
+	bitmap_copy(bmap2, bmap1, 1024);
+	expect_eq_pbl("0-108", bmap2, 1024);
+
+	/* the following tests assume a 32- or 64-bit arch (even 128b
+	 * if we care)
+	 */
+
+	bitmap_fill(bmap2, 1024);
+	bitmap_copy(bmap2, bmap1, 109);  /* ... but 0-padded til word length */
+	expect_eq_pbl("0-108,128-1023", bmap2, 1024);
+
+	bitmap_fill(bmap2, 1024);
+	bitmap_copy(bmap2, bmap1, 97);  /* ... but aligned on word length */
+	expect_eq_pbl("0-108,128-1023", bmap2, 1024);
+
+	bitmap_zero(bmap2, 97);  /* ... but 0-padded til word length */
+	expect_eq_pbl("128-1023", bmap2, 1024);
+}
+
+static void __init test_bitmap_u32_array_conversions(void)
+{
+	DECLARE_BITMAP(bmap1, 1024);
+	DECLARE_BITMAP(bmap2, 1024);
+	u32 exp_arr[32], arr[32];
+	unsigned nbits;
+
+	for (nbits = 0 ; nbits < 257 ; ++nbits) {
+		const unsigned int used_u32s = DIV_ROUND_UP(nbits, 32);
+		unsigned int i, rv;
+
+		bitmap_zero(bmap1, nbits);
+		bitmap_set(bmap1, nbits, 1024 - nbits);  /* garbage */
+
+		memset(arr, 0xff, sizeof(arr));
+		rv = bitmap_to_u32array(arr, used_u32s, bmap1, nbits);
+		expect_eq_uint(nbits, rv);
+
+		memset(exp_arr, 0xff, sizeof(exp_arr));
+		memset(exp_arr, 0, used_u32s*sizeof(*exp_arr));
+		expect_eq_u32_array(exp_arr, 32, arr, 32);
+
+		bitmap_fill(bmap2, 1024);
+		rv = bitmap_from_u32array(bmap2, nbits, arr, used_u32s);
+		expect_eq_uint(nbits, rv);
+		expect_eq_bitmap(bmap1, 1024, bmap2, 1024);
+
+		for (i = 0 ; i < nbits ; ++i) {
+			/*
+			 * test conversion bitmap -> u32[]
+			 */
+
+			bitmap_zero(bmap1, 1024);
+			__set_bit(i, bmap1);
+			bitmap_set(bmap1, nbits, 1024 - nbits);  /* garbage */
+
+			memset(arr, 0xff, sizeof(arr));
+			rv = bitmap_to_u32array(arr, used_u32s, bmap1, nbits);
+			expect_eq_uint(nbits, rv);
+
+			/* 1st used u32 words contain expected bit set, the
+			 * remaining words are left unchanged (0xff)
+			 */
+			memset(exp_arr, 0xff, sizeof(exp_arr));
+			memset(exp_arr, 0, used_u32s*sizeof(*exp_arr));
+			exp_arr[i/32] = (1U<<(i%32));
+			expect_eq_u32_array(exp_arr, 32, arr, 32);
+
+
+			/* same, with longer array to fill
+			 */
+			memset(arr, 0xff, sizeof(arr));
+			rv = bitmap_to_u32array(arr, 32, bmap1, nbits);
+			expect_eq_uint(nbits, rv);
+
+			/* 1st used u32 words contain expected bit set, the
+			 * remaining words are all 0s
+			 */
+			memset(exp_arr, 0, sizeof(exp_arr));
+			exp_arr[i/32] = (1U<<(i%32));
+			expect_eq_u32_array(exp_arr, 32, arr, 32);
+
+			/*
+			 * test conversion u32[] -> bitmap
+			 */
+
+			/* the 1st nbits of bmap2 are identical to
+			 * bmap1, the remaining bits of bmap2 are left
+			 * unchanged (all 1s)
+			 */
+			bitmap_fill(bmap2, 1024);
+			rv = bitmap_from_u32array(bmap2, nbits,
+						  exp_arr, used_u32s);
+			expect_eq_uint(nbits, rv);
+
+			expect_eq_bitmap(bmap1, 1024, bmap2, 1024);
+
+			/* same, with more bits to fill
+			 */
+			memset(arr, 0xff, sizeof(arr));  /* garbage */
+			memset(arr, 0, used_u32s*sizeof(u32));
+			arr[i/32] = (1U<<(i%32));
+
+			bitmap_fill(bmap2, 1024);
+			rv = bitmap_from_u32array(bmap2, 1024, arr, used_u32s);
+			expect_eq_uint(used_u32s*32, rv);
+
+			/* the 1st nbits of bmap2 are identical to
+			 * bmap1, the remaining bits of bmap2 are cleared
+			 */
+			bitmap_zero(bmap1, 1024);
+			__set_bit(i, bmap1);
+			expect_eq_bitmap(bmap1, 1024, bmap2, 1024);
+
+
+			/*
+			 * test short conversion bitmap -> u32[] (1
+			 * word too short)
+			 */
+			if (used_u32s > 1) {
+				bitmap_zero(bmap1, 1024);
+				__set_bit(i, bmap1);
+				bitmap_set(bmap1, nbits,
+					   1024 - nbits);  /* garbage */
+				memset(arr, 0xff, sizeof(arr));
+
+				rv = bitmap_to_u32array(arr, used_u32s - 1,
+							bmap1, nbits);
+				expect_eq_uint((used_u32s - 1)*32, rv);
+
+				/* 1st used u32 words contain expected
+				 * bit set, the remaining words are
+				 * left unchanged (0xff)
+				 */
+				memset(exp_arr, 0xff, sizeof(exp_arr));
+				memset(exp_arr, 0,
+				       (used_u32s-1)*sizeof(*exp_arr));
+				if ((i/32) < (used_u32s - 1))
+					exp_arr[i/32] = (1U<<(i%32));
+				expect_eq_u32_array(exp_arr, 32, arr, 32);
+			}
+
+			/*
+			 * test short conversion u32[] -> bitmap (3
+			 * bits too short)
+			 */
+			if (nbits > 3) {
+				memset(arr, 0xff, sizeof(arr));  /* garbage */
+				memset(arr, 0, used_u32s*sizeof(*arr));
+				arr[i/32] = (1U<<(i%32));
+
+				bitmap_zero(bmap1, 1024);
+				rv = bitmap_from_u32array(bmap1, nbits - 3,
+							  arr, used_u32s);
+				expect_eq_uint(nbits - 3, rv);
+
+				/* we are expecting the bit < nbits -
+				 * 3 (none otherwise), and the rest of
+				 * bmap1 unchanged (0-filled)
+				 */
+				bitmap_zero(bmap2, 1024);
+				if (i < nbits - 3)
+					__set_bit(i, bmap2);
+				expect_eq_bitmap(bmap2, 1024, bmap1, 1024);
+
+				/* do the same with bmap1 initially
+				 * 1-filled
+				 */
+
+				bitmap_fill(bmap1, 1024);
+				rv = bitmap_from_u32array(bmap1, nbits - 3,
+							 arr, used_u32s);
+				expect_eq_uint(nbits - 3, rv);
+
+				/* we are expecting the bit < nbits -
+				 * 3 (none otherwise), and the rest of
+				 * bmap1 unchanged (1-filled)
+				 */
+				bitmap_zero(bmap2, 1024);
+				if (i < nbits - 3)
+					__set_bit(i, bmap2);
+				bitmap_set(bmap2, nbits-3, 1024 - nbits + 3);
+				expect_eq_bitmap(bmap2, 1024, bmap1, 1024);
+			}
+		}
+	}
+}
+
+static int __init test_bitmap_init(void)
+{
+	test_zero_fill_copy();
+	test_bitmap_u32_array_conversions();
+
+	if (failed_tests == 0)
+		pr_info("all %u tests passed\n", total_tests);
+	else
+		pr_warn("failed %u out of %u tests\n",
+			failed_tests, total_tests);
+
+	return failed_tests ? -EINVAL : 0;
+}
+
+static void __exit test_bitmap_cleanup(void)
+{
+}
+
+module_init(test_bitmap_init);
+module_exit(test_bitmap_cleanup);
+
+MODULE_AUTHOR("david decotigny <david.decotigny@googlers.com>");
+MODULE_LICENSE("GPL");
-- 
cgit 


From a272858a3c1ecd4a935ba23c66668f81214bd110 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Fri, 1 Jan 2016 12:39:09 +0100
Subject: extable: add support for relative extables to search and sort
 routines

This adds support to the generic search_extable() and sort_extable()
implementations for dealing with exception table entries whose fields
contain relative offsets rather than absolute addresses.

Acked-by: Helge Deller <deller@gmx.de>
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: H. Peter Anvin <hpa@linux.intel.com>
Acked-by: Tony Luck <tony.luck@intel.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Acked-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 lib/extable.c | 50 +++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 41 insertions(+), 9 deletions(-)

(limited to 'lib')

diff --git a/lib/extable.c b/lib/extable.c
index 4cac81ec225e..0be02ad561e9 100644
--- a/lib/extable.c
+++ b/lib/extable.c
@@ -14,7 +14,37 @@
 #include <linux/sort.h>
 #include <asm/uaccess.h>
 
+#ifndef ARCH_HAS_RELATIVE_EXTABLE
+#define ex_to_insn(x)	((x)->insn)
+#else
+static inline unsigned long ex_to_insn(const struct exception_table_entry *x)
+{
+	return (unsigned long)&x->insn + x->insn;
+}
+#endif
+
 #ifndef ARCH_HAS_SORT_EXTABLE
+#ifndef ARCH_HAS_RELATIVE_EXTABLE
+#define swap_ex		NULL
+#else
+static void swap_ex(void *a, void *b, int size)
+{
+	struct exception_table_entry *x = a, *y = b, tmp;
+	int delta = b - a;
+
+	tmp = *x;
+	x->insn = y->insn + delta;
+	y->insn = tmp.insn - delta;
+
+#ifdef swap_ex_entry_fixup
+	swap_ex_entry_fixup(x, y, tmp, delta);
+#else
+	x->fixup = y->fixup + delta;
+	y->fixup = tmp.fixup - delta;
+#endif
+}
+#endif /* ARCH_HAS_RELATIVE_EXTABLE */
+
 /*
  * The exception table needs to be sorted so that the binary
  * search that we use to find entries in it works properly.
@@ -26,9 +56,9 @@ static int cmp_ex(const void *a, const void *b)
 	const struct exception_table_entry *x = a, *y = b;
 
 	/* avoid overflow */
-	if (x->insn > y->insn)
+	if (ex_to_insn(x) > ex_to_insn(y))
 		return 1;
-	if (x->insn < y->insn)
+	if (ex_to_insn(x) < ex_to_insn(y))
 		return -1;
 	return 0;
 }
@@ -37,7 +67,7 @@ void sort_extable(struct exception_table_entry *start,
 		  struct exception_table_entry *finish)
 {
 	sort(start, finish - start, sizeof(struct exception_table_entry),
-	     cmp_ex, NULL);
+	     cmp_ex, swap_ex);
 }
 
 #ifdef CONFIG_MODULES
@@ -48,13 +78,15 @@ void sort_extable(struct exception_table_entry *start,
 void trim_init_extable(struct module *m)
 {
 	/*trim the beginning*/
-	while (m->num_exentries && within_module_init(m->extable[0].insn, m)) {
+	while (m->num_exentries &&
+	       within_module_init(ex_to_insn(&m->extable[0]), m)) {
 		m->extable++;
 		m->num_exentries--;
 	}
 	/*trim the end*/
 	while (m->num_exentries &&
-		within_module_init(m->extable[m->num_exentries-1].insn, m))
+	       within_module_init(ex_to_insn(&m->extable[m->num_exentries - 1]),
+				  m))
 		m->num_exentries--;
 }
 #endif /* CONFIG_MODULES */
@@ -81,13 +113,13 @@ search_extable(const struct exception_table_entry *first,
 		 * careful, the distance between value and insn
 		 * can be larger than MAX_LONG:
 		 */
-		if (mid->insn < value)
+		if (ex_to_insn(mid) < value)
 			first = mid + 1;
-		else if (mid->insn > value)
+		else if (ex_to_insn(mid) > value)
 			last = mid - 1;
 		else
 			return mid;
-        }
-        return NULL;
+	}
+	return NULL;
 }
 #endif
-- 
cgit 


From 3ee0cb5fb5eea2110db1b5cb7f67029b7be8a376 Mon Sep 17 00:00:00 2001
From: Michal Marek <mmarek@suse.com>
Date: Wed, 17 Feb 2016 14:46:59 +0100
Subject: lib/mpi: Endianness fix

The limbs are integers in the host endianness, so we can't simply
iterate over the individual bytes. The current code happens to work on
little-endian, because the order of the limbs in the MPI array is the
same as the order of the bytes in each limb, but it breaks on
big-endian.

Fixes: 0f74fbf77d45 ("MPI: Fix mpi_read_buffer")
Signed-off-by: Michal Marek <mmarek@suse.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 lib/mpi/mpicoder.c | 39 +++++++++++++++++++++------------------
 1 file changed, 21 insertions(+), 18 deletions(-)

(limited to 'lib')

diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c
index ec533a6c77b5..eb15e7dc7b65 100644
--- a/lib/mpi/mpicoder.c
+++ b/lib/mpi/mpicoder.c
@@ -128,6 +128,23 @@ leave:
 }
 EXPORT_SYMBOL_GPL(mpi_read_from_buffer);
 
+static int count_lzeros(MPI a)
+{
+	mpi_limb_t alimb;
+	int i, lzeros = 0;
+
+	for (i = a->nlimbs - 1; i >= 0; i--) {
+		alimb = a->d[i];
+		if (alimb == 0) {
+			lzeros += sizeof(mpi_limb_t);
+		} else {
+			lzeros += count_leading_zeros(alimb) / 8;
+			break;
+		}
+	}
+	return lzeros;
+}
+
 /**
  * mpi_read_buffer() - read MPI to a bufer provided by user (msb first)
  *
@@ -148,7 +165,7 @@ int mpi_read_buffer(MPI a, uint8_t *buf, unsigned buf_len, unsigned *nbytes,
 	uint8_t *p;
 	mpi_limb_t alimb;
 	unsigned int n = mpi_get_size(a);
-	int i, lzeros = 0;
+	int i, lzeros;
 
 	if (!buf || !nbytes)
 		return -EINVAL;
@@ -156,14 +173,7 @@ int mpi_read_buffer(MPI a, uint8_t *buf, unsigned buf_len, unsigned *nbytes,
 	if (sign)
 		*sign = a->sign;
 
-	p = (void *)&a->d[a->nlimbs] - 1;
-
-	for (i = a->nlimbs * sizeof(alimb) - 1; i >= 0; i--, p--) {
-		if (!*p)
-			lzeros++;
-		else
-			break;
-	}
+	lzeros = count_lzeros(a);
 
 	if (buf_len < n - lzeros) {
 		*nbytes = n - lzeros;
@@ -351,7 +361,7 @@ int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned *nbytes,
 	u8 *p, *p2;
 	mpi_limb_t alimb, alimb2;
 	unsigned int n = mpi_get_size(a);
-	int i, x, y = 0, lzeros = 0, buf_len;
+	int i, x, y = 0, lzeros, buf_len;
 
 	if (!nbytes)
 		return -EINVAL;
@@ -359,14 +369,7 @@ int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned *nbytes,
 	if (sign)
 		*sign = a->sign;
 
-	p = (void *)&a->d[a->nlimbs] - 1;
-
-	for (i = a->nlimbs * sizeof(alimb) - 1; i >= 0; i--, p--) {
-		if (!*p)
-			lzeros++;
-		else
-			break;
-	}
+	lzeros = count_lzeros(a);
 
 	if (*nbytes < n - lzeros) {
 		*nbytes = n - lzeros;
-- 
cgit 


From c5d552487b9eb116b61032239ffb2f8d192f19b8 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 26 Feb 2016 13:46:26 +0100
Subject: lib/mpi: avoid assembler warning

A wrapper around the umull assembly instruction might reuse
the input register as an output, which is undefined on
some ARM machines, as pointed out by this assembler warning:

  CC      lib/mpi/generic_mpih-mul1.o
/tmp/ccxJuxIy.s: Assembler messages:
/tmp/ccxJuxIy.s:53: rdhi, rdlo and rm must all be different
  CC      lib/mpi/generic_mpih-mul2.o
/tmp/ccI0scAD.s: Assembler messages:
/tmp/ccI0scAD.s:53: rdhi, rdlo and rm must all be different
  CC      lib/mpi/generic_mpih-mul3.o
/tmp/ccMvVQcp.s: Assembler messages:
/tmp/ccMvVQcp.s:53: rdhi, rdlo and rm must all be different

This changes the constraints to force different registers to
be used as output.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 lib/mpi/longlong.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/mpi/longlong.h b/lib/mpi/longlong.h
index b90e255c2a68..93336502af08 100644
--- a/lib/mpi/longlong.h
+++ b/lib/mpi/longlong.h
@@ -216,7 +216,7 @@ extern UDItype __udiv_qrnnd(UDItype *, UDItype, UDItype, UDItype);
 	__asm__ ("%@ Inlined umul_ppmm\n" \
 		"umull %r1, %r0, %r2, %r3" \
 	: "=&r" ((USItype)(xh)), \
-			"=r" ((USItype)(xl)) \
+			"=&r" ((USItype)(xl)) \
 	: "r" ((USItype)(a)), \
 			"r" ((USItype)(b)) \
 	: "r0", "r1")
-- 
cgit 


From 9c6bd0c2f103f4748cb4abcaf141f7d11aabfe9f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 26 Feb 2016 13:46:27 +0100
Subject: lib/mpi: use "static inline" instead of "extern inline"

When we use CONFIG_PROFILE_ALL_BRANCHES, every 'if()' introduces
a static variable, but that is not allowed in 'extern inline'
functions:

mpi-inline.h:116:204: warning: '______f' is static but declared in inline function 'mpihelp_sub' which is not static
mpi-inline.h:113:184: warning: '______f' is static but declared in inline function 'mpihelp_sub' which is not static
mpi-inline.h:70:184: warning: '______f' is static but declared in inline function 'mpihelp_add' which is not static
mpi-inline.h:56:204: warning: '______f' is static but declared in inline function 'mpihelp_add_1' which is not static

This changes the MPI code to use 'static inline' instead, to get
rid of hundreds of warnings.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 lib/mpi/mpi-inline.h   | 2 +-
 lib/mpi/mpi-internal.h | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'lib')

diff --git a/lib/mpi/mpi-inline.h b/lib/mpi/mpi-inline.h
index e2b39852b30a..c245ea31f785 100644
--- a/lib/mpi/mpi-inline.h
+++ b/lib/mpi/mpi-inline.h
@@ -30,7 +30,7 @@
 #define G10_MPI_INLINE_H
 
 #ifndef G10_MPI_INLINE_DECL
-#define G10_MPI_INLINE_DECL  extern inline
+#define G10_MPI_INLINE_DECL  static inline
 #endif
 
 G10_MPI_INLINE_DECL mpi_limb_t
diff --git a/lib/mpi/mpi-internal.h b/lib/mpi/mpi-internal.h
index c65dd1bff45a..7eceeddb3fb8 100644
--- a/lib/mpi/mpi-internal.h
+++ b/lib/mpi/mpi-internal.h
@@ -168,19 +168,19 @@ void mpi_rshift_limbs(MPI a, unsigned int count);
 int mpi_lshift_limbs(MPI a, unsigned int count);
 
 /*-- mpihelp-add.c --*/
-mpi_limb_t mpihelp_add_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+static inline mpi_limb_t mpihelp_add_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
 			 mpi_size_t s1_size, mpi_limb_t s2_limb);
 mpi_limb_t mpihelp_add_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
 			 mpi_ptr_t s2_ptr, mpi_size_t size);
-mpi_limb_t mpihelp_add(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
+static inline mpi_limb_t mpihelp_add(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
 		       mpi_ptr_t s2_ptr, mpi_size_t s2_size);
 
 /*-- mpihelp-sub.c --*/
-mpi_limb_t mpihelp_sub_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+static inline mpi_limb_t mpihelp_sub_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
 			 mpi_size_t s1_size, mpi_limb_t s2_limb);
 mpi_limb_t mpihelp_sub_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
 			 mpi_ptr_t s2_ptr, mpi_size_t size);
-mpi_limb_t mpihelp_sub(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
+static inline mpi_limb_t mpihelp_sub(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
 		       mpi_ptr_t s2_ptr, mpi_size_t s2_size);
 
 /*-- mpihelp-cmp.c --*/
-- 
cgit 


From b9ab5ebb14ec389bd80f66613f1fe3f8f65f2521 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Sun, 28 Feb 2016 22:22:42 -0600
Subject: objtool: Add CONFIG_STACK_VALIDATION option

Add a CONFIG_STACK_VALIDATION option which will run "objtool check" for
each .o file to ensure the validity of its stack metadata.

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Bernd Petrovitsch <bernd@petrovitsch.priv.at>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Chris J Arges <chris.j.arges@canonical.com>
Cc: Jiri Slaby <jslaby@suse.cz>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michal Marek <mmarek@suse.cz>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Pedro Alves <palves@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: live-patching@vger.kernel.org
Link: http://lkml.kernel.org/r/92baab69a6bf9bc7043af0bfca9fb964a1d45546.1456719558.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 lib/Kconfig.debug | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'lib')

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 8bfd1aca7a3d..855265621863 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -342,6 +342,18 @@ config FRAME_POINTER
 	  larger and slower, but it gives very useful debugging information
 	  in case of kernel bugs. (precise oopses/stacktraces/warnings)
 
+config STACK_VALIDATION
+	bool "Compile-time stack metadata validation"
+	depends on HAVE_STACK_VALIDATION
+	default n
+	help
+	  Add compile-time checks to validate stack metadata, including frame
+	  pointers (if CONFIG_FRAME_POINTER is enabled).  This helps ensure
+	  that runtime stack traces are more reliable.
+
+	  For more information, see
+	  tools/objtool/Documentation/stack-validation.txt.
+
 config DEBUG_FORCE_WEAK_PER_CPU
 	bool "Force weak per-cpu definitions"
 	depends on DEBUG_KERNEL
-- 
cgit 


From 3712bba1a260ad851f3aa8ddea9cb7326f6aa0b3 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 22 Feb 2016 22:19:18 +0000
Subject: cpumask: Export cpumask_any_but()

Almost every cpumask function is exported, just not the one I need to make the
Intel uncore driver modular.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andi Kleen <andi.kleen@intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: David S. Miller <davem@davemloft.net>
Cc: Harish Chegondi <harish.chegondi@intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Stephane Eranian <eranian@google.com>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: linux-kernel@vger.kernel.org
Link: http://lkml.kernel.org/r/20160222221011.878299859@linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 lib/cpumask.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'lib')

diff --git a/lib/cpumask.c b/lib/cpumask.c
index 5a70f6196f57..81dedaab36cc 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -41,6 +41,7 @@ int cpumask_any_but(const struct cpumask *mask, unsigned int cpu)
 			break;
 	return i;
 }
+EXPORT_SYMBOL(cpumask_any_but);
 
 /* These are not inline because of header tangles. */
 #ifdef CONFIG_CPUMASK_OFFSTACK
-- 
cgit 


From b07edbe1cf3dae9ba81f24888e2f2a9dbe778918 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 16 Feb 2016 17:24:08 +0100
Subject: netfilter: meta: add PRANDOM support

Can be used to randomly match packets e.g. for statistic traffic sampling.

See commit 3ad0040573b0c00f8848
("bpf: split state from prandom_u32() and consolidate {c, e}BPF prngs")
for more info why this doesn't use prandom_u32 directly.

Unlike bpf nft_meta can be built as a module, so add an EXPORT_SYMBOL
for prandom_seed_full_state too.

Cc: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 lib/random32.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'lib')

diff --git a/lib/random32.c b/lib/random32.c
index 12111910ccd0..510d1ce7d4d2 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -255,6 +255,7 @@ void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state)
 		prandom_warmup(state);
 	}
 }
+EXPORT_SYMBOL(prandom_seed_full_state);
 
 /*
  *	Generate better values after random number generator
-- 
cgit 


From 757c989b9994f51b42d6be1bd33c7c12d16a3ac7 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 26 Feb 2016 18:43:32 +0000
Subject: cpu/hotplug: Make target state writeable

Make it possible to write a target state to the per cpu state file, so we can
switch between states.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arch@vger.kernel.org
Cc: Rik van Riel <riel@redhat.com>
Cc: Rafael Wysocki <rafael.j.wysocki@intel.com>
Cc: "Srivatsa S. Bhat" <srivatsa@mit.edu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul Turner <pjt@google.com>
Link: http://lkml.kernel.org/r/20160226182341.022814799@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 lib/Kconfig.debug | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'lib')

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 8bfd1aca7a3d..f28f7fad452f 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1442,6 +1442,19 @@ config DEBUG_BLOCK_EXT_DEVT
 
 	  Say N if you are unsure.
 
+config CPU_HOTPLUG_STATE_CONTROL
+	bool "Enable CPU hotplug state control"
+	depends on DEBUG_KERNEL
+	depends on HOTPLUG_CPU
+	default n
+	help
+	  Allows to write steps between "offline" and "online" to the CPUs
+	  sysfs target file so states can be stepped granular. This is a debug
+	  option for now as the hotplug machinery cannot be stopped and
+	  restarted at arbitrary points yet.
+
+	  Say N if your are unsure.
+
 config NOTIFIER_ERROR_INJECTION
 	tristate "Notifier error injection"
 	depends on DEBUG_KERNEL
-- 
cgit 


From a8463d4b0e47d1f37af684d97884ffcf35de043b Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Tue, 2 Feb 2016 21:46:32 -0800
Subject: dma: Provide simple noop dma ops

We are going to require dma_ops for several common drivers, even for
systems that do have an identity mapping. Lets provide some minimal
no-op dma_ops that can be used for that purpose.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 lib/Makefile   |  1 +
 lib/dma-noop.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 76 insertions(+)
 create mode 100644 lib/dma-noop.c

(limited to 'lib')

diff --git a/lib/Makefile b/lib/Makefile
index a7c26a41a738..a572b86a1b1d 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -18,6 +18,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
 obj-$(CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS) += usercopy.o
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
+lib-$(CONFIG_HAS_DMA) += dma-noop.o
 
 lib-y	+= kobject.o klist.o
 obj-y	+= lockref.o
diff --git a/lib/dma-noop.c b/lib/dma-noop.c
new file mode 100644
index 000000000000..72145646857e
--- /dev/null
+++ b/lib/dma-noop.c
@@ -0,0 +1,75 @@
+/*
+ *	lib/dma-noop.c
+ *
+ * Simple DMA noop-ops that map 1:1 with memory
+ */
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <linux/dma-mapping.h>
+#include <linux/scatterlist.h>
+
+static void *dma_noop_alloc(struct device *dev, size_t size,
+			    dma_addr_t *dma_handle, gfp_t gfp,
+			    struct dma_attrs *attrs)
+{
+	void *ret;
+
+	ret = (void *)__get_free_pages(gfp, get_order(size));
+	if (ret)
+		*dma_handle = virt_to_phys(ret);
+	return ret;
+}
+
+static void dma_noop_free(struct device *dev, size_t size,
+			  void *cpu_addr, dma_addr_t dma_addr,
+			  struct dma_attrs *attrs)
+{
+	free_pages((unsigned long)cpu_addr, get_order(size));
+}
+
+static dma_addr_t dma_noop_map_page(struct device *dev, struct page *page,
+				      unsigned long offset, size_t size,
+				      enum dma_data_direction dir,
+				      struct dma_attrs *attrs)
+{
+	return page_to_phys(page) + offset;
+}
+
+static int dma_noop_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
+			     enum dma_data_direction dir, struct dma_attrs *attrs)
+{
+	int i;
+	struct scatterlist *sg;
+
+	for_each_sg(sgl, sg, nents, i) {
+		void *va;
+
+		BUG_ON(!sg_page(sg));
+		va = sg_virt(sg);
+		sg_dma_address(sg) = (dma_addr_t)virt_to_phys(va);
+		sg_dma_len(sg) = sg->length;
+	}
+
+	return nents;
+}
+
+static int dma_noop_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+	return 0;
+}
+
+static int dma_noop_supported(struct device *dev, u64 mask)
+{
+	return 1;
+}
+
+struct dma_map_ops dma_noop_ops = {
+	.alloc			= dma_noop_alloc,
+	.free			= dma_noop_free,
+	.map_page		= dma_noop_map_page,
+	.map_sg			= dma_noop_map_sg,
+	.mapping_error		= dma_noop_mapping_error,
+	.dma_supported		= dma_noop_supported,
+};
+
+EXPORT_SYMBOL(dma_noop_ops);
-- 
cgit 


From d77a117e6871ff78a06def46583d23752593de60 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 9 Mar 2016 14:08:10 -0800
Subject: list: kill list_force_poison()

Given we have uninitialized list_heads being passed to list_add() it
will always be the case that those uninitialized values randomly trigger
the poison value.  Especially since a list_add() operation will seed the
stack with the poison value for later stack allocations to trip over.

For example, see these two false positive reports:

  list_add attempted on force-poisoned entry
  WARNING: at lib/list_debug.c:34
  [..]
  NIP [c00000000043c390] __list_add+0xb0/0x150
  LR [c00000000043c38c] __list_add+0xac/0x150
  Call Trace:
    __list_add+0xac/0x150 (unreliable)
    __down+0x4c/0xf8
    down+0x68/0x70
    xfs_buf_lock+0x4c/0x150 [xfs]

  list_add attempted on force-poisoned entry(0000000000000500),
   new->next == d0000000059ecdb0, new->prev == 0000000000000500
  WARNING: at lib/list_debug.c:33
  [..]
  NIP [c00000000042db78] __list_add+0xa8/0x140
  LR [c00000000042db74] __list_add+0xa4/0x140
  Call Trace:
    __list_add+0xa4/0x140 (unreliable)
    rwsem_down_read_failed+0x6c/0x1a0
    down_read+0x58/0x60
    xfs_log_commit_cil+0x7c/0x600 [xfs]

Fixes: commit 5c2c2587b132 ("mm, dax, pmem: introduce {get|put}_dev_pagemap() for dax-gup")
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Reported-by: Eryu Guan <eguan@redhat.com>
Tested-by: Eryu Guan <eguan@redhat.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/list_debug.c | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'lib')

diff --git a/lib/list_debug.c b/lib/list_debug.c
index 3345a089ef7b..3859bf63561c 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -12,13 +12,6 @@
 #include <linux/kernel.h>
 #include <linux/rculist.h>
 
-static struct list_head force_poison;
-void list_force_poison(struct list_head *entry)
-{
-	entry->next = &force_poison;
-	entry->prev = &force_poison;
-}
-
 /*
  * Insert a new entry between two known consecutive entries.
  *
@@ -30,8 +23,6 @@ void __list_add(struct list_head *new,
 			      struct list_head *prev,
 			      struct list_head *next)
 {
-	WARN(new->next == &force_poison || new->prev == &force_poison,
-		"list_add attempted on force-poisoned entry\n");
 	WARN(next->prev != prev,
 		"list_add corruption. next->prev should be "
 		"prev (%p), but was %p. (next=%p).\n",
-- 
cgit 


From 01cfbad79a5e2b835abf6a8154a341d75a6fc8cd Mon Sep 17 00:00:00 2001
From: Alexander Duyck <aduyck@mirantis.com>
Date: Fri, 11 Mar 2016 14:05:34 -0800
Subject: ipv4: Update parameters for csum_tcpudp_magic to their original types

This patch updates all instances of csum_tcpudp_magic and
csum_tcpudp_nofold to reflect the types that are usually used as the source
inputs.  For example the protocol field is populated based on nexthdr which
is actually an unsigned 8 bit value.  The length is usually populated based
on skb->len which is an unsigned integer.

This addresses an issue in which the IPv6 function csum_ipv6_magic was
generating a checksum using the full 32b of skb->len while
csum_tcpudp_magic was only using the lower 16 bits.  As a result we could
run into issues when attempting to adjust the checksum as there was no
protocol agnostic way to update it.

With this change the value is still truncated as many architectures use
"(len + proto) << 8", however this truncation only occurs for values
greater than 16776960 in length and as such is unlikely to occur as we stop
the inner headers at ~64K in size.

I did have to make a few minor changes in the arm, mn10300, nios2, and
score versions of the function in order to support these changes as they
were either using things such as an OR to combine the protocol and length,
or were using ntohs to convert the length which would have truncated the
value.

I also updated a few spots in terms of whitespace and type differences for
the addresses.  Most of this was just to make sure all of the definitions
were in sync going forward.

Signed-off-by: Alexander Duyck <aduyck@mirantis.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/checksum.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'lib')

diff --git a/lib/checksum.c b/lib/checksum.c
index 8b39e86dbab5..d3ec93f9e5f3 100644
--- a/lib/checksum.c
+++ b/lib/checksum.c
@@ -191,9 +191,7 @@ static inline u32 from64to32(u64 x)
 }
 
 __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
-			unsigned short len,
-			unsigned short proto,
-			__wsum sum)
+			  __u32 len, __u8 proto, __wsum sum)
 {
 	unsigned long long s = (__force u32)sum;
 
-- 
cgit 


From edf14cdbf9a0e5ab52698ca66d07a76ade0d5c46 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Tue, 15 Mar 2016 14:55:56 -0700
Subject: mm, printk: introduce new format string for flags

In mm we use several kinds of flags bitfields that are sometimes printed
for debugging purposes, or exported to userspace via sysfs.  To make
them easier to interpret independently on kernel version and config, we
want to dump also the symbolic flag names.  So far this has been done
with repeated calls to pr_cont(), which is unreliable on SMP, and not
usable for e.g.  sysfs export.

To get a more reliable and universal solution, this patch extends
printk() format string for pointers to handle the page flags (%pGp),
gfp_flags (%pGg) and vma flags (%pGv).  Existing users of
dump_flag_names() are converted and simplified.

It would be possible to pass flags by value instead of pointer, but the
%p format string for pointers already has extensions for various kernel
structures, so it's a good fit, and the extra indirection in a
non-critical path is negligible.

[linux@rasmusvillemoes.dk: lots of good implementation suggestions]
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Mel Gorman <mgorman@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/test_printf.c | 53 +++++++++++++++++++++++++++++++++++++++
 lib/vsprintf.c    | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 128 insertions(+)

(limited to 'lib')

diff --git a/lib/test_printf.c b/lib/test_printf.c
index 4f6ae60433bc..563f10e6876a 100644
--- a/lib/test_printf.c
+++ b/lib/test_printf.c
@@ -17,6 +17,9 @@
 #include <linux/socket.h>
 #include <linux/in.h>
 
+#include <linux/gfp.h>
+#include <linux/mm.h>
+
 #define BUF_SIZE 256
 #define PAD_SIZE 16
 #define FILL_CHAR '$'
@@ -410,6 +413,55 @@ netdev_features(void)
 {
 }
 
+static void __init
+flags(void)
+{
+	unsigned long flags;
+	gfp_t gfp;
+	char *cmp_buffer;
+
+	flags = 0;
+	test("", "%pGp", &flags);
+
+	/* Page flags should filter the zone id */
+	flags = 1UL << NR_PAGEFLAGS;
+	test("", "%pGp", &flags);
+
+	flags |= 1UL << PG_uptodate | 1UL << PG_dirty | 1UL << PG_lru
+		| 1UL << PG_active | 1UL << PG_swapbacked;
+	test("uptodate|dirty|lru|active|swapbacked", "%pGp", &flags);
+
+
+	flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC
+			| VM_DENYWRITE;
+	test("read|exec|mayread|maywrite|mayexec|denywrite", "%pGv", &flags);
+
+	gfp = GFP_TRANSHUGE;
+	test("GFP_TRANSHUGE", "%pGg", &gfp);
+
+	gfp = GFP_ATOMIC|__GFP_DMA;
+	test("GFP_ATOMIC|GFP_DMA", "%pGg", &gfp);
+
+	gfp = __GFP_ATOMIC;
+	test("__GFP_ATOMIC", "%pGg", &gfp);
+
+	cmp_buffer = kmalloc(BUF_SIZE, GFP_KERNEL);
+	if (!cmp_buffer)
+		return;
+
+	/* Any flags not translated by the table should remain numeric */
+	gfp = ~__GFP_BITS_MASK;
+	snprintf(cmp_buffer, BUF_SIZE, "%#lx", (unsigned long) gfp);
+	test(cmp_buffer, "%pGg", &gfp);
+
+	snprintf(cmp_buffer, BUF_SIZE, "__GFP_ATOMIC|%#lx",
+							(unsigned long) gfp);
+	gfp |= __GFP_ATOMIC;
+	test(cmp_buffer, "%pGg", &gfp);
+
+	kfree(cmp_buffer);
+}
+
 static void __init
 test_pointer(void)
 {
@@ -428,6 +480,7 @@ test_pointer(void)
 	struct_clk();
 	bitmap();
 	netdev_features();
+	flags();
 }
 
 static int __init
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index f44e178e6ede..525c8e19bda2 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -35,6 +35,8 @@
 #include <linux/blkdev.h>
 #endif
 
+#include "../mm/internal.h"	/* For the trace_print_flags arrays */
+
 #include <asm/page.h>		/* for PAGE_SIZE */
 #include <asm/sections.h>	/* for dereference_function_descriptor() */
 #include <asm/byteorder.h>	/* cpu_to_le16 */
@@ -1407,6 +1409,72 @@ char *clock(char *buf, char *end, struct clk *clk, struct printf_spec spec,
 	}
 }
 
+static
+char *format_flags(char *buf, char *end, unsigned long flags,
+					const struct trace_print_flags *names)
+{
+	unsigned long mask;
+	const struct printf_spec strspec = {
+		.field_width = -1,
+		.precision = -1,
+	};
+	const struct printf_spec numspec = {
+		.flags = SPECIAL|SMALL,
+		.field_width = -1,
+		.precision = -1,
+		.base = 16,
+	};
+
+	for ( ; flags && names->name; names++) {
+		mask = names->mask;
+		if ((flags & mask) != mask)
+			continue;
+
+		buf = string(buf, end, names->name, strspec);
+
+		flags &= ~mask;
+		if (flags) {
+			if (buf < end)
+				*buf = '|';
+			buf++;
+		}
+	}
+
+	if (flags)
+		buf = number(buf, end, flags, numspec);
+
+	return buf;
+}
+
+static noinline_for_stack
+char *flags_string(char *buf, char *end, void *flags_ptr, const char *fmt)
+{
+	unsigned long flags;
+	const struct trace_print_flags *names;
+
+	switch (fmt[1]) {
+	case 'p':
+		flags = *(unsigned long *)flags_ptr;
+		/* Remove zone id */
+		flags &= (1UL << NR_PAGEFLAGS) - 1;
+		names = pageflag_names;
+		break;
+	case 'v':
+		flags = *(unsigned long *)flags_ptr;
+		names = vmaflag_names;
+		break;
+	case 'g':
+		flags = *(gfp_t *)flags_ptr;
+		names = gfpflag_names;
+		break;
+	default:
+		WARN_ONCE(1, "Unsupported flags modifier: %c\n", fmt[1]);
+		return buf;
+	}
+
+	return format_flags(buf, end, flags, names);
+}
+
 int kptr_restrict __read_mostly;
 
 /*
@@ -1495,6 +1563,11 @@ int kptr_restrict __read_mostly;
  * - 'Cn' For a clock, it prints the name (Common Clock Framework) or address
  *        (legacy clock framework) of the clock
  * - 'Cr' For a clock, it prints the current rate of the clock
+ * - 'G' For flags to be printed as a collection of symbolic strings that would
+ *       construct the specific value. Supported flags given by option:
+ *       p page flags (see struct page) given as pointer to unsigned long
+ *       g gfp flags (GFP_* and __GFP_*) given as pointer to gfp_t
+ *       v vma flags (VM_*) given as pointer to unsigned long
  *
  * ** Please update also Documentation/printk-formats.txt when making changes **
  *
@@ -1648,6 +1721,8 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
 		return bdev_name(buf, end, ptr, spec, fmt);
 #endif
 
+	case 'G':
+		return flags_string(buf, end, ptr, fmt);
 	}
 	spec.flags |= SMALL;
 	if (spec.field_width == -1) {
-- 
cgit 


From 58e698af4c6347c726090d5480b2e51d1d07edf9 Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov@virtuozzo.com>
Date: Thu, 17 Mar 2016 14:18:36 -0700
Subject: radix-tree: account radix_tree_node to memory cgroup

Allocation of radix_tree_node objects can be easily triggered from
userspace, so we should account them to memory cgroup.  Besides, we need
them accounted for making shadow node shrinker per memcg (see
mm/workingset.c).

A tricky thing about accounting radix_tree_node objects is that they are
mostly allocated through radix_tree_preload(), so we can't just set
SLAB_ACCOUNT for radix_tree_node_cachep - that would likely result in a
lot of unrelated cgroups using objects from each other's caches.

One way to overcome this would be making radix tree preloads per memcg,
but that would probably look cumbersome and overcomplicated.

Instead, we make radix_tree_node_alloc() first try to allocate from the
cache with __GFP_ACCOUNT, no matter if the caller has preloaded or not,
and only if it fails fall back on using per cpu preloads.  This should
make most allocations accounted.

Signed-off-by: Vladimir Davydov <vdavydov@virtuozzo.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/radix-tree.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

(limited to 'lib')

diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 6b79e9026e24..224b369f5a5e 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -191,6 +191,15 @@ radix_tree_node_alloc(struct radix_tree_root *root)
 	if (!gfpflags_allow_blocking(gfp_mask) && !in_interrupt()) {
 		struct radix_tree_preload *rtp;
 
+		/*
+		 * Even if the caller has preloaded, try to allocate from the
+		 * cache first for the new node to get accounted.
+		 */
+		ret = kmem_cache_alloc(radix_tree_node_cachep,
+				       gfp_mask | __GFP_ACCOUNT | __GFP_NOWARN);
+		if (ret)
+			goto out;
+
 		/*
 		 * Provided the caller has preloaded here, we will always
 		 * succeed in getting a node here (and never reach
@@ -208,10 +217,11 @@ radix_tree_node_alloc(struct radix_tree_root *root)
 		 * for debugging.
 		 */
 		kmemleak_update_trace(ret);
+		goto out;
 	}
-	if (ret == NULL)
-		ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
-
+	ret = kmem_cache_alloc(radix_tree_node_cachep,
+			       gfp_mask | __GFP_ACCOUNT);
+out:
 	BUG_ON(radix_tree_is_indirect_ptr(ret));
 	return ret;
 }
-- 
cgit 


From d7b85cab74edef84a9330c476478ba8cd732b6a9 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Thu, 17 Mar 2016 14:21:38 -0700
Subject: lib/bug.c: make panic_on_warn available for all architectures

Christian Borntraeger reported that panic_on_warn doesn't have any
effect on s390.

The panic_on_warn feature was introduced with 9e3961a09798 ("kernel: add
panic_on_warn").  However it did care only for the case when
WANT_WARN_ON_SLOWPATH is defined.  This is turn is only the case for
architectures which do not have an own __WARN_TAINT defined.

Other architectures which do have __WARN_TAINT defined call report_bug()
for warnings within lib/bug.c which does not call panic() in case
panic_on_warn is set.

Let's simply enable the panic_on_warn feature by adding the same code
like it was added to warn_slowpath_common() in panic.c.

This enables panic_on_warn also for arm64, parisc, powerpc, s390 and sh.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Reported-by: Christian Borntraeger <borntraeger@de.ibm.com>
Tested-by: Christian Borntraeger <borntraeger@de.ibm.com>
Acked-by: Prarit Bhargava <prarit@redhat.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Cc: Helge Deller <deller@gmx.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Tested-by: Michael Ellerman <mpe@ellerman.id.au> (powerpc)
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/bug.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'lib')

diff --git a/lib/bug.c b/lib/bug.c
index cff145f032a5..6cde380f09de 100644
--- a/lib/bug.c
+++ b/lib/bug.c
@@ -175,6 +175,17 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
 			pr_warn("WARNING: at %p [verbose debug info unavailable]\n",
 				(void *)bugaddr);
 
+		if (panic_on_warn) {
+			/*
+			 * This thread may hit another WARN() in the panic path.
+			 * Resetting this prevents additional WARN() from
+			 * panicking the system on this thread.  Other threads
+			 * are blocked by the panic_mutex in panic().
+			 */
+			panic_on_warn = 0;
+			panic("panic_on_warn set ...\n");
+		}
+
 		print_modules();
 		show_regs(regs);
 		print_oops_end_marker();
-- 
cgit 


From 339e6353046dd4f675304d696a88aefdd727298e Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@linux.intel.com>
Date: Thu, 17 Mar 2016 14:21:48 -0700
Subject: radix_tree: tag all internal tree nodes as indirect pointers

Set the 'indirect_ptr' bit on all the pointers to internal nodes, not
just on the root node.  This enables the following patches to support
multi-order entries in the radix tree.  This patch is split out for ease
of bisection.

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Matthew Wilcox <willy@linux.intel.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/radix-tree.c | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

(limited to 'lib')

diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 224b369f5a5e..ff91792346f6 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -368,9 +368,10 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
 		node->count = 1;
 		node->parent = NULL;
 		slot = root->rnode;
-		if (newheight > 1) {
+		if (radix_tree_is_indirect_ptr(slot) && newheight > 1) {
 			slot = indirect_to_ptr(slot);
 			slot->parent = node;
+			slot = ptr_to_indirect(slot);
 		}
 		node->slots[0] = slot;
 		node = ptr_to_indirect(node);
@@ -425,17 +426,20 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
 			slot->path = height;
 			slot->parent = node;
 			if (node) {
-				rcu_assign_pointer(node->slots[offset], slot);
+				rcu_assign_pointer(node->slots[offset],
+							ptr_to_indirect(slot));
 				node->count++;
 				slot->path |= offset << RADIX_TREE_HEIGHT_SHIFT;
 			} else
-				rcu_assign_pointer(root->rnode, ptr_to_indirect(slot));
+				rcu_assign_pointer(root->rnode,
+							ptr_to_indirect(slot));
 		}
 
 		/* Go a level down */
 		offset = (index >> shift) & RADIX_TREE_MAP_MASK;
 		node = slot;
 		slot = node->slots[offset];
+		slot = indirect_to_ptr(slot);
 		shift -= RADIX_TREE_MAP_SHIFT;
 		height--;
 	}
@@ -533,6 +537,7 @@ void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index,
 		node = rcu_dereference_raw(*slot);
 		if (node == NULL)
 			return NULL;
+		node = indirect_to_ptr(node);
 
 		shift -= RADIX_TREE_MAP_SHIFT;
 		height--;
@@ -619,6 +624,7 @@ void *radix_tree_tag_set(struct radix_tree_root *root,
 			tag_set(slot, tag, offset);
 		slot = slot->slots[offset];
 		BUG_ON(slot == NULL);
+		slot = indirect_to_ptr(slot);
 		shift -= RADIX_TREE_MAP_SHIFT;
 		height--;
 	}
@@ -658,11 +664,12 @@ void *radix_tree_tag_clear(struct radix_tree_root *root,
 		goto out;
 
 	shift = height * RADIX_TREE_MAP_SHIFT;
-	slot = indirect_to_ptr(root->rnode);
+	slot = root->rnode;
 
 	while (shift) {
 		if (slot == NULL)
 			goto out;
+		slot = indirect_to_ptr(slot);
 
 		shift -= RADIX_TREE_MAP_SHIFT;
 		offset = (index >> shift) & RADIX_TREE_MAP_MASK;
@@ -738,6 +745,7 @@ int radix_tree_tag_get(struct radix_tree_root *root,
 
 		if (node == NULL)
 			return 0;
+		node = indirect_to_ptr(node);
 
 		offset = (index >> shift) & RADIX_TREE_MAP_MASK;
 		if (!tag_get(node, tag, offset))
@@ -838,6 +846,7 @@ restart:
 		node = rcu_dereference_raw(node->slots[offset]);
 		if (node == NULL)
 			goto restart;
+		node = indirect_to_ptr(node);
 		shift -= RADIX_TREE_MAP_SHIFT;
 		offset = (index >> shift) & RADIX_TREE_MAP_MASK;
 	}
@@ -939,6 +948,7 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
 			shift -= RADIX_TREE_MAP_SHIFT;
 			node = slot;
 			slot = slot->slots[offset];
+			slot = indirect_to_ptr(slot);
 			continue;
 		}
 
@@ -1195,6 +1205,7 @@ static unsigned long __locate(struct radix_tree_node *slot, void *item,
 		slot = rcu_dereference_raw(slot->slots[i]);
 		if (slot == NULL)
 			goto out;
+		slot = indirect_to_ptr(slot);
 	}
 
 	/* Bottom level: check items */
@@ -1278,7 +1289,8 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)
 		 */
 		if (to_free->count != 1)
 			break;
-		if (!to_free->slots[0])
+		slot = to_free->slots[0];
+		if (!slot)
 			break;
 
 		/*
@@ -1288,8 +1300,8 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)
 		 * (to_free->slots[0]), it will be safe to dereference the new
 		 * one (root->rnode) as far as dependent read barriers go.
 		 */
-		slot = to_free->slots[0];
 		if (root->height > 1) {
+			slot = indirect_to_ptr(slot);
 			slot->parent = NULL;
 			slot = ptr_to_indirect(slot);
 		}
-- 
cgit 


From 0070e28d97e72aeac2a85f538d8a452400dfe1c7 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@linux.intel.com>
Date: Thu, 17 Mar 2016 14:21:51 -0700
Subject: radix_tree: loop based on shift count, not height

When we introduce entries that can cover multiple indices, we will need
to stop in __radix_tree_create based on the shift, not the height.
Split out for ease of bisect.

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Matthew Wilcox <willy@linux.intel.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/radix-tree.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'lib')

diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index ff91792346f6..9bb440f317c9 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -415,10 +415,10 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
 	slot = indirect_to_ptr(root->rnode);
 
 	height = root->height;
-	shift = (height-1) * RADIX_TREE_MAP_SHIFT;
+	shift = height * RADIX_TREE_MAP_SHIFT;
 
 	offset = 0;			/* uninitialised var warning */
-	while (height > 0) {
+	while (shift > 0) {
 		if (slot == NULL) {
 			/* Have to add a child node.  */
 			if (!(slot = radix_tree_node_alloc(root)))
@@ -436,11 +436,11 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
 		}
 
 		/* Go a level down */
+		shift -= RADIX_TREE_MAP_SHIFT;
 		offset = (index >> shift) & RADIX_TREE_MAP_MASK;
 		node = slot;
 		slot = node->slots[offset];
 		slot = indirect_to_ptr(slot);
-		shift -= RADIX_TREE_MAP_SHIFT;
 		height--;
 	}
 
-- 
cgit 


From e61452365372570253b2b1de84bab0cdb2e62c64 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@linux.intel.com>
Date: Thu, 17 Mar 2016 14:21:54 -0700
Subject: radix_tree: add support for multi-order entries

With huge pages, it is convenient to have the radix tree be able to
return an entry that covers multiple indices.  Previous attempts to deal
with the problem have involved inserting N duplicate entries, which is a
waste of memory and leads to problems trying to handle aliased tags, or
probing the tree multiple times to find alternative entries which might
cover the requested index.

This approach inserts one canonical entry into the tree for a given
range of indices, and may also insert other entries in order to ensure
that lookups find the canonical entry.

This solution only tolerates inserting powers of two that are greater
than the fanout of the tree.  If we wish to expand the radix tree's
abilities to support large-ish pages that is less than the fanout at the
penultimate level of the tree, then we would need to add one more step
in lookup to ensure that any sibling nodes in the final level of the
tree are dereferenced and we return the canonical entry that they
reference.

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Matthew Wilcox <willy@linux.intel.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/radix-tree.c | 109 ++++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 83 insertions(+), 26 deletions(-)

(limited to 'lib')

diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 9bb440f317c9..d907dca302d5 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -333,7 +333,8 @@ static inline unsigned long radix_tree_maxindex(unsigned int height)
 /*
  *	Extend a radix tree so it can store key @index.
  */
-static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
+static int radix_tree_extend(struct radix_tree_root *root,
+				unsigned long index, unsigned order)
 {
 	struct radix_tree_node *node;
 	struct radix_tree_node *slot;
@@ -345,7 +346,7 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
 	while (index > radix_tree_maxindex(height))
 		height++;
 
-	if (root->rnode == NULL) {
+	if ((root->rnode == NULL) && (order == 0)) {
 		root->height = height;
 		goto out;
 	}
@@ -386,6 +387,7 @@ out:
  *	__radix_tree_create	-	create a slot in a radix tree
  *	@root:		radix tree root
  *	@index:		index key
+ *	@order:		index occupies 2^order aligned slots
  *	@nodep:		returns node
  *	@slotp:		returns slot
  *
@@ -399,26 +401,29 @@ out:
  *	Returns -ENOMEM, or 0 for success.
  */
 int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
-			struct radix_tree_node **nodep, void ***slotp)
+			unsigned order, struct radix_tree_node **nodep,
+			void ***slotp)
 {
 	struct radix_tree_node *node = NULL, *slot;
 	unsigned int height, shift, offset;
 	int error;
 
+	BUG_ON((0 < order) && (order < RADIX_TREE_MAP_SHIFT));
+
 	/* Make sure the tree is high enough.  */
 	if (index > radix_tree_maxindex(root->height)) {
-		error = radix_tree_extend(root, index);
+		error = radix_tree_extend(root, index, order);
 		if (error)
 			return error;
 	}
 
-	slot = indirect_to_ptr(root->rnode);
+	slot = root->rnode;
 
 	height = root->height;
 	shift = height * RADIX_TREE_MAP_SHIFT;
 
 	offset = 0;			/* uninitialised var warning */
-	while (shift > 0) {
+	while (shift > order) {
 		if (slot == NULL) {
 			/* Have to add a child node.  */
 			if (!(slot = radix_tree_node_alloc(root)))
@@ -433,15 +438,31 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
 			} else
 				rcu_assign_pointer(root->rnode,
 							ptr_to_indirect(slot));
-		}
+		} else if (!radix_tree_is_indirect_ptr(slot))
+			break;
 
 		/* Go a level down */
+		height--;
 		shift -= RADIX_TREE_MAP_SHIFT;
 		offset = (index >> shift) & RADIX_TREE_MAP_MASK;
-		node = slot;
+		node = indirect_to_ptr(slot);
 		slot = node->slots[offset];
-		slot = indirect_to_ptr(slot);
-		height--;
+	}
+
+	/* Insert pointers to the canonical entry */
+	if ((shift - order) > 0) {
+		int i, n = 1 << (shift - order);
+		offset = offset & ~(n - 1);
+		slot = ptr_to_indirect(&node->slots[offset]);
+		for (i = 0; i < n; i++) {
+			if (node->slots[offset + i])
+				return -EEXIST;
+		}
+
+		for (i = 1; i < n; i++) {
+			rcu_assign_pointer(node->slots[offset + i], slot);
+			node->count++;
+		}
 	}
 
 	if (nodep)
@@ -452,15 +473,16 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
 }
 
 /**
- *	radix_tree_insert    -    insert into a radix tree
+ *	__radix_tree_insert    -    insert into a radix tree
  *	@root:		radix tree root
  *	@index:		index key
+ *	@order:		key covers the 2^order indices around index
  *	@item:		item to insert
  *
  *	Insert an item into the radix tree at position @index.
  */
-int radix_tree_insert(struct radix_tree_root *root,
-			unsigned long index, void *item)
+int __radix_tree_insert(struct radix_tree_root *root, unsigned long index,
+			unsigned order, void *item)
 {
 	struct radix_tree_node *node;
 	void **slot;
@@ -468,7 +490,7 @@ int radix_tree_insert(struct radix_tree_root *root,
 
 	BUG_ON(radix_tree_is_indirect_ptr(item));
 
-	error = __radix_tree_create(root, index, &node, &slot);
+	error = __radix_tree_create(root, index, order, &node, &slot);
 	if (error)
 		return error;
 	if (*slot != NULL)
@@ -486,7 +508,7 @@ int radix_tree_insert(struct radix_tree_root *root,
 
 	return 0;
 }
-EXPORT_SYMBOL(radix_tree_insert);
+EXPORT_SYMBOL(__radix_tree_insert);
 
 /**
  *	__radix_tree_lookup	-	lookup an item in a radix tree
@@ -537,6 +559,8 @@ void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index,
 		node = rcu_dereference_raw(*slot);
 		if (node == NULL)
 			return NULL;
+		if (!radix_tree_is_indirect_ptr(node))
+			break;
 		node = indirect_to_ptr(node);
 
 		shift -= RADIX_TREE_MAP_SHIFT;
@@ -624,6 +648,8 @@ void *radix_tree_tag_set(struct radix_tree_root *root,
 			tag_set(slot, tag, offset);
 		slot = slot->slots[offset];
 		BUG_ON(slot == NULL);
+		if (!radix_tree_is_indirect_ptr(slot))
+			break;
 		slot = indirect_to_ptr(slot);
 		shift -= RADIX_TREE_MAP_SHIFT;
 		height--;
@@ -669,6 +695,8 @@ void *radix_tree_tag_clear(struct radix_tree_root *root,
 	while (shift) {
 		if (slot == NULL)
 			goto out;
+		if (!radix_tree_is_indirect_ptr(slot))
+			break;
 		slot = indirect_to_ptr(slot);
 
 		shift -= RADIX_TREE_MAP_SHIFT;
@@ -753,6 +781,8 @@ int radix_tree_tag_get(struct radix_tree_root *root,
 		if (height == 1)
 			return 1;
 		node = rcu_dereference_raw(node->slots[offset]);
+		if (!radix_tree_is_indirect_ptr(node))
+			return 1;
 		shift -= RADIX_TREE_MAP_SHIFT;
 		height--;
 	}
@@ -813,6 +843,7 @@ restart:
 
 	node = rnode;
 	while (1) {
+		struct radix_tree_node *slot;
 		if ((flags & RADIX_TREE_ITER_TAGGED) ?
 				!test_bit(offset, node->tags[tag]) :
 				!node->slots[offset]) {
@@ -843,10 +874,12 @@ restart:
 		if (!shift)
 			break;
 
-		node = rcu_dereference_raw(node->slots[offset]);
-		if (node == NULL)
+		slot = rcu_dereference_raw(node->slots[offset]);
+		if (slot == NULL)
 			goto restart;
-		node = indirect_to_ptr(node);
+		if (!radix_tree_is_indirect_ptr(slot))
+			break;
+		node = indirect_to_ptr(slot);
 		shift -= RADIX_TREE_MAP_SHIFT;
 		offset = (index >> shift) & RADIX_TREE_MAP_MASK;
 	}
@@ -944,16 +977,20 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
 		if (!tag_get(slot, iftag, offset))
 			goto next;
 		if (shift) {
-			/* Go down one level */
-			shift -= RADIX_TREE_MAP_SHIFT;
 			node = slot;
 			slot = slot->slots[offset];
-			slot = indirect_to_ptr(slot);
-			continue;
+			if (radix_tree_is_indirect_ptr(slot)) {
+				slot = indirect_to_ptr(slot);
+				shift -= RADIX_TREE_MAP_SHIFT;
+				continue;
+			} else {
+				slot = node;
+				node = node->parent;
+			}
 		}
 
 		/* tag the leaf */
-		tagged++;
+		tagged += 1 << shift;
 		tag_set(slot, settag, offset);
 
 		/* walk back up the path tagging interior nodes */
@@ -1201,11 +1238,20 @@ static unsigned long __locate(struct radix_tree_node *slot, void *item,
 				goto out;
 		}
 
-		shift -= RADIX_TREE_MAP_SHIFT;
 		slot = rcu_dereference_raw(slot->slots[i]);
 		if (slot == NULL)
 			goto out;
+		if (!radix_tree_is_indirect_ptr(slot)) {
+			if (slot == item) {
+				*found_index = index + i;
+				index = 0;
+			} else {
+				index += shift;
+			}
+			goto out;
+		}
 		slot = indirect_to_ptr(slot);
+		shift -= RADIX_TREE_MAP_SHIFT;
 	}
 
 	/* Bottom level: check items */
@@ -1285,7 +1331,8 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)
 
 		/*
 		 * The candidate node has more than one child, or its child
-		 * is not at the leftmost slot, we cannot shrink.
+		 * is not at the leftmost slot, or it is a multiorder entry,
+		 * we cannot shrink.
 		 */
 		if (to_free->count != 1)
 			break;
@@ -1301,6 +1348,9 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)
 		 * one (root->rnode) as far as dependent read barriers go.
 		 */
 		if (root->height > 1) {
+			if (!radix_tree_is_indirect_ptr(slot))
+				break;
+
 			slot = indirect_to_ptr(slot);
 			slot->parent = NULL;
 			slot = ptr_to_indirect(slot);
@@ -1399,7 +1449,7 @@ void *radix_tree_delete_item(struct radix_tree_root *root,
 			     unsigned long index, void *item)
 {
 	struct radix_tree_node *node;
-	unsigned int offset;
+	unsigned int offset, i;
 	void **slot;
 	void *entry;
 	int tag;
@@ -1428,6 +1478,13 @@ void *radix_tree_delete_item(struct radix_tree_root *root,
 			radix_tree_tag_clear(root, index, tag);
 	}
 
+	/* Delete any sibling slots pointing to this slot */
+	for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) {
+		if (node->slots[offset + i] != ptr_to_indirect(slot))
+			break;
+		node->slots[offset + i] = NULL;
+		node->count--;
+	}
 	node->slots[offset] = NULL;
 	node->count--;
 
-- 
cgit 


From 7cf19af4debc804e408b059d58df7c9c226ca6fb Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@linux.intel.com>
Date: Thu, 17 Mar 2016 14:21:57 -0700
Subject: radix_tree: add radix_tree_dump

This is debug code which is #if 0 out.

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Matthew Wilcox <willy@linux.intel.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/radix-tree.c | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

(limited to 'lib')

diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index d907dca302d5..1624c4117961 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -173,6 +173,41 @@ radix_tree_find_next_bit(const unsigned long *addr,
 	return size;
 }
 
+#if 0
+static void dump_node(void *slot, int height, int offset)
+{
+	struct radix_tree_node *node;
+	int i;
+
+	if (!slot)
+		return;
+
+	if (height == 0) {
+		pr_debug("radix entry %p offset %d\n", slot, offset);
+		return;
+	}
+
+	node = indirect_to_ptr(slot);
+	pr_debug("radix node: %p offset %d tags %lx %lx %lx path %x count %d parent %p\n",
+		slot, offset, node->tags[0][0], node->tags[1][0],
+		node->tags[2][0], node->path, node->count, node->parent);
+
+	for (i = 0; i < RADIX_TREE_MAP_SIZE; i++)
+		dump_node(node->slots[i], height - 1, i);
+}
+
+/* For debug */
+static void radix_tree_dump(struct radix_tree_root *root)
+{
+	pr_debug("radix root: %p height %d rnode %p tags %x\n",
+			root, root->height, root->rnode,
+			root->gfp_mask >> __GFP_BITS_SHIFT);
+	if (!radix_tree_is_indirect_ptr(root->rnode))
+		return;
+	dump_node(root->rnode, root->height, 0);
+}
+#endif
+
 /*
  * This assumes that the caller has performed appropriate preallocation, and
  * that the caller has pinned this thread of control to the current CPU.
-- 
cgit 


From 56b060814e2d87d6646a85a2f4609c73587399ca Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 17 Mar 2016 14:22:14 -0700
Subject: lib/string: introduce match_string() helper

Occasionally we have to search for an occurrence of a string in an array
of strings.  Make a simple helper for that purpose.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Cc: David Airlie <airlied@linux.ie>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Mika Westerberg <mika.westerberg@linux.intel.com>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Sebastian Reichel <sre@kernel.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/string.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'lib')

diff --git a/lib/string.c b/lib/string.c
index 0323c0d5629a..e9c9db161d2c 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -630,6 +630,32 @@ bool sysfs_streq(const char *s1, const char *s2)
 }
 EXPORT_SYMBOL(sysfs_streq);
 
+/**
+ * match_string - matches given string in an array
+ * @array:	array of strings
+ * @n:		number of strings in the array or -1 for NULL terminated arrays
+ * @string:	string to match with
+ *
+ * Return:
+ * index of a @string in the @array if matches, or %-EINVAL otherwise.
+ */
+int match_string(const char * const *array, size_t n, const char *string)
+{
+	int index;
+	const char *item;
+
+	for (index = 0; index < n; index++) {
+		item = array[index];
+		if (!item)
+			break;
+		if (!strcmp(item, string))
+			return index;
+	}
+
+	return -EINVAL;
+}
+EXPORT_SYMBOL(match_string);
+
 /**
  * strtobool - convert common user inputs into boolean values
  * @s: input string
-- 
cgit 


From ef951599074ba4fad2d0efa0a977129b41e6d203 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 17 Mar 2016 14:22:50 -0700
Subject: lib: move strtobool() to kstrtobool()

Create the kstrtobool_from_user() helper and move strtobool() logic into
the new kstrtobool() (matching all the other kstrto* functions).
Provides an inline wrapper for existing strtobool() callers.

Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Joe Perches <joe@perches.com>
Cc: Andy Shevchenko <andy.shevchenko@gmail.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Amitkumar Karwar <akarwar@marvell.com>
Cc: Nishant Sarmukadam <nishants@marvell.com>
Cc: Kalle Valo <kvalo@codeaurora.org>
Cc: Steve French <sfrench@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/kstrtox.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/string.c  | 29 -----------------------------
 2 files changed, 50 insertions(+), 29 deletions(-)

(limited to 'lib')

diff --git a/lib/kstrtox.c b/lib/kstrtox.c
index 94be244e8441..e8ba4a013e82 100644
--- a/lib/kstrtox.c
+++ b/lib/kstrtox.c
@@ -321,6 +321,56 @@ int kstrtos8(const char *s, unsigned int base, s8 *res)
 }
 EXPORT_SYMBOL(kstrtos8);
 
+/**
+ * kstrtobool - convert common user inputs into boolean values
+ * @s: input string
+ * @res: result
+ *
+ * This routine returns 0 iff the first character is one of 'Yy1Nn0'.
+ * Otherwise it will return -EINVAL.  Value pointed to by res is
+ * updated upon finding a match.
+ */
+int kstrtobool(const char *s, bool *res)
+{
+	if (!s)
+		return -EINVAL;
+
+	switch (s[0]) {
+	case 'y':
+	case 'Y':
+	case '1':
+		*res = true;
+		return 0;
+	case 'n':
+	case 'N':
+	case '0':
+		*res = false;
+		return 0;
+	default:
+		break;
+	}
+
+	return -EINVAL;
+}
+EXPORT_SYMBOL(kstrtobool);
+
+/*
+ * Since "base" would be a nonsense argument, this open-codes the
+ * _from_user helper instead of using the helper macro below.
+ */
+int kstrtobool_from_user(const char __user *s, size_t count, bool *res)
+{
+	/* Longest string needed to differentiate, newline, terminator */
+	char buf[4];
+
+	count = min(count, sizeof(buf) - 1);
+	if (copy_from_user(buf, s, count))
+		return -EFAULT;
+	buf[count] = '\0';
+	return kstrtobool(buf, res);
+}
+EXPORT_SYMBOL(kstrtobool_from_user);
+
 #define kstrto_from_user(f, g, type)					\
 int f(const char __user *s, size_t count, unsigned int base, type *res)	\
 {									\
diff --git a/lib/string.c b/lib/string.c
index e9c9db161d2c..ed83562a53ae 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -656,35 +656,6 @@ int match_string(const char * const *array, size_t n, const char *string)
 }
 EXPORT_SYMBOL(match_string);
 
-/**
- * strtobool - convert common user inputs into boolean values
- * @s: input string
- * @res: result
- *
- * This routine returns 0 iff the first character is one of 'Yy1Nn0'.
- * Otherwise it will return -EINVAL.  Value pointed to by res is
- * updated upon finding a match.
- */
-int strtobool(const char *s, bool *res)
-{
-	switch (s[0]) {
-	case 'y':
-	case 'Y':
-	case '1':
-		*res = true;
-		break;
-	case 'n':
-	case 'N':
-	case '0':
-		*res = false;
-		break;
-	default:
-		return -EINVAL;
-	}
-	return 0;
-}
-EXPORT_SYMBOL(strtobool);
-
 #ifndef __HAVE_ARCH_MEMSET
 /**
  * memset - Fill a region of memory with the given value
-- 
cgit 


From a81a5a17d44b26521fb1199f8ccf27f4af337a67 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 17 Mar 2016 14:22:57 -0700
Subject: lib: add "on"/"off" support to kstrtobool

Add support for "on" and "off" when converting to boolean.

Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Amitkumar Karwar <akarwar@marvell.com>
Cc: Andy Shevchenko <andy.shevchenko@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Joe Perches <joe@perches.com>
Cc: Kalle Valo <kvalo@codeaurora.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nishant Sarmukadam <nishants@marvell.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Steve French <sfrench@samba.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/kstrtox.c | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

(limited to 'lib')

diff --git a/lib/kstrtox.c b/lib/kstrtox.c
index e8ba4a013e82..d8a5cf66c316 100644
--- a/lib/kstrtox.c
+++ b/lib/kstrtox.c
@@ -326,9 +326,9 @@ EXPORT_SYMBOL(kstrtos8);
  * @s: input string
  * @res: result
  *
- * This routine returns 0 iff the first character is one of 'Yy1Nn0'.
- * Otherwise it will return -EINVAL.  Value pointed to by res is
- * updated upon finding a match.
+ * This routine returns 0 iff the first character is one of 'Yy1Nn0', or
+ * [oO][NnFf] for "on" and "off". Otherwise it will return -EINVAL.  Value
+ * pointed to by res is updated upon finding a match.
  */
 int kstrtobool(const char *s, bool *res)
 {
@@ -346,6 +346,20 @@ int kstrtobool(const char *s, bool *res)
 	case '0':
 		*res = false;
 		return 0;
+	case 'o':
+	case 'O':
+		switch (s[1]) {
+		case 'n':
+		case 'N':
+			*res = true;
+			return 0;
+		case 'f':
+		case 'F':
+			*res = false;
+			return 0;
+		default:
+			break;
+		}
 	default:
 		break;
 	}
-- 
cgit 


From 2553b67a1fbe7bf202e4e8070ab0b00d3d3a06a2 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Thu, 17 Mar 2016 14:23:04 -0700
Subject: lib/bug.c: use common WARN helper

The traceoff_on_warning option doesn't have any effect on s390, powerpc,
arm64, parisc, and sh because there are two different types of WARN
implementations:

1) The above mentioned architectures treat WARN() as a special case of a
   BUG() exception.  They handle warnings in report_bug() in lib/bug.c.

2) All other architectures just call warn_slowpath_*() directly.  Their
   warnings are handled in warn_slowpath_common() in kernel/panic.c.

Support traceoff_on_warning on all architectures and prevent any future
divergence by using a single common function to emit the warning.

Also remove the '()' from '%pS()', because the parentheses look funky:

  [   45.607629] WARNING: at /root/warn_mod/warn_mod.c:17 .init_dummy+0x20/0x40 [warn_mod]()

Reported-by: Chunyu Hu <chuhu@redhat.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Tested-by: Prarit Bhargava <prarit@redhat.com>
Acked-by: Prarit Bhargava <prarit@redhat.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/bug.c | 26 ++------------------------
 1 file changed, 2 insertions(+), 24 deletions(-)

(limited to 'lib')

diff --git a/lib/bug.c b/lib/bug.c
index 6cde380f09de..bc3656e944d2 100644
--- a/lib/bug.c
+++ b/lib/bug.c
@@ -167,30 +167,8 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
 
 	if (warning) {
 		/* this is a WARN_ON rather than BUG/BUG_ON */
-		pr_warn("------------[ cut here ]------------\n");
-
-		if (file)
-			pr_warn("WARNING: at %s:%u\n", file, line);
-		else
-			pr_warn("WARNING: at %p [verbose debug info unavailable]\n",
-				(void *)bugaddr);
-
-		if (panic_on_warn) {
-			/*
-			 * This thread may hit another WARN() in the panic path.
-			 * Resetting this prevents additional WARN() from
-			 * panicking the system on this thread.  Other threads
-			 * are blocked by the panic_mutex in panic().
-			 */
-			panic_on_warn = 0;
-			panic("panic_on_warn set ...\n");
-		}
-
-		print_modules();
-		show_regs(regs);
-		print_oops_end_marker();
-		/* Just a warning, don't kill lockdep. */
-		add_taint(BUG_GET_TAINT(bug), LOCKDEP_STILL_OK);
+		__warn(file, line, (void *)bugaddr, BUG_GET_TAINT(bug), regs,
+		       NULL);
 		return BUG_TRAP_TYPE_WARN;
 	}
 
-- 
cgit 


From f9310b2f9a19b7f16c7b1c1558f8b649b9b933c1 Mon Sep 17 00:00:00 2001
From: Jessica Yu <jeyu@redhat.com>
Date: Thu, 17 Mar 2016 14:23:07 -0700
Subject: sscanf: implement basic character sets

Implement basic character sets for the '%[' conversion specifier.

The '%[' conversion specifier matches a nonempty sequence of characters
from the specified set of accepted (or with '^', rejected) characters
between the brackets.  The substring matched is to be made up of
characters in (or not in) the set.  This is useful for matching
substrings that are delimited by something other than spaces.

This implementation differs from its glibc counterpart in the following ways:
 (1) No support for character ranges (e.g., 'a-z' or '0-9')
 (2) The hyphen '-' is not a special character
 (3) The closing bracket ']' cannot be matched
 (4) No support (yet) for discarding matching input ('%*[')

The bitmap code is largely based upon sample code which was provided by
Rasmus.

The motivation for adding character set support to sscanf originally
stemmed from the kernel livepatching project.  An ongoing patchset
utilizes new livepatch Elf symbol and section names to store important
metadata livepatch needs to properly apply its patches.  Such metadata
is stored in these section and symbol names as substrings delimited by
periods '.' and commas ','.  For example, a livepatch symbol name might
look like this:

.klp.sym.vmlinux.printk,0

However, sscanf currently can only extract "substrings" delimited by
whitespace using the "%s" specifier.  Thus for the above symbol name,
one cannot not use sscanf() to extract substrings "vmlinux" or
"printk", for example.  A number of discussions on the livepatch
mailing list dealing with string parsing code for extracting these '.'
and ',' delimited substrings eventually led to the conclusion that such
code would be completely unnecessary if the kernel sscanf() supported
character sets.  Thus only a single sscanf() call would be necessary to
extract these substrings.  In addition, such an addition to sscanf()
could benefit other areas of the kernel that might have a similar need
in the future.

[akpm@linux-foundation.org: 80-col tweaks]
Signed-off-by: Jessica Yu <jeyu@redhat.com>
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/vsprintf.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 58 insertions(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 525c8e19bda2..ccb664b54280 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -2640,8 +2640,12 @@ int vsscanf(const char *buf, const char *fmt, va_list args)
 		if (*fmt == '*') {
 			if (!*str)
 				break;
-			while (!isspace(*fmt) && *fmt != '%' && *fmt)
+			while (!isspace(*fmt) && *fmt != '%' && *fmt) {
+				/* '%*[' not yet supported, invalid format */
+				if (*fmt == '[')
+					return num;
 				fmt++;
+			}
 			while (!isspace(*str) && *str)
 				str++;
 			continue;
@@ -2714,6 +2718,59 @@ int vsscanf(const char *buf, const char *fmt, va_list args)
 			num++;
 		}
 		continue;
+		/*
+		 * Warning: This implementation of the '[' conversion specifier
+		 * deviates from its glibc counterpart in the following ways:
+		 * (1) It does NOT support ranges i.e. '-' is NOT a special
+		 *     character
+		 * (2) It cannot match the closing bracket ']' itself
+		 * (3) A field width is required
+		 * (4) '%*[' (discard matching input) is currently not supported
+		 *
+		 * Example usage:
+		 * ret = sscanf("00:0a:95","%2[^:]:%2[^:]:%2[^:]",
+		 *		buf1, buf2, buf3);
+		 * if (ret < 3)
+		 *    // etc..
+		 */
+		case '[':
+		{
+			char *s = (char *)va_arg(args, char *);
+			DECLARE_BITMAP(set, 256) = {0};
+			unsigned int len = 0;
+			bool negate = (*fmt == '^');
+
+			/* field width is required */
+			if (field_width == -1)
+				return num;
+
+			if (negate)
+				++fmt;
+
+			for ( ; *fmt && *fmt != ']'; ++fmt, ++len)
+				set_bit((u8)*fmt, set);
+
+			/* no ']' or no character set found */
+			if (!*fmt || !len)
+				return num;
+			++fmt;
+
+			if (negate) {
+				bitmap_complement(set, set, 256);
+				/* exclude null '\0' byte */
+				clear_bit(0, set);
+			}
+
+			/* match must be non-empty */
+			if (!test_bit((u8)*str, set))
+				return num;
+
+			while (test_bit((u8)*str, set) && field_width--)
+				*s++ = *str++;
+			*s = '\0';
+			++num;
+		}
+		continue;
 		case 'o':
 			base = 8;
 			break;
-- 
cgit 


From 5c9a8750a6409c63a0f01d51a9024861022f6593 Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov@google.com>
Date: Tue, 22 Mar 2016 14:27:30 -0700
Subject: kernel: add kcov code coverage

kcov provides code coverage collection for coverage-guided fuzzing
(randomized testing).  Coverage-guided fuzzing is a testing technique
that uses coverage feedback to determine new interesting inputs to a
system.  A notable user-space example is AFL
(http://lcamtuf.coredump.cx/afl/).  However, this technique is not
widely used for kernel testing due to missing compiler and kernel
support.

kcov does not aim to collect as much coverage as possible.  It aims to
collect more or less stable coverage that is function of syscall inputs.
To achieve this goal it does not collect coverage in soft/hard
interrupts and instrumentation of some inherently non-deterministic or
non-interesting parts of kernel is disbled (e.g.  scheduler, locking).

Currently there is a single coverage collection mode (tracing), but the
API anticipates additional collection modes.  Initially I also
implemented a second mode which exposes coverage in a fixed-size hash
table of counters (what Quentin used in his original patch).  I've
dropped the second mode for simplicity.

This patch adds the necessary support on kernel side.  The complimentary
compiler support was added in gcc revision 231296.

We've used this support to build syzkaller system call fuzzer, which has
found 90 kernel bugs in just 2 months:

  https://github.com/google/syzkaller/wiki/Found-Bugs

We've also found 30+ bugs in our internal systems with syzkaller.
Another (yet unexplored) direction where kcov coverage would greatly
help is more traditional "blob mutation".  For example, mounting a
random blob as a filesystem, or receiving a random blob over wire.

Why not gcov.  Typical fuzzing loop looks as follows: (1) reset
coverage, (2) execute a bit of code, (3) collect coverage, repeat.  A
typical coverage can be just a dozen of basic blocks (e.g.  an invalid
input).  In such context gcov becomes prohibitively expensive as
reset/collect coverage steps depend on total number of basic
blocks/edges in program (in case of kernel it is about 2M).  Cost of
kcov depends only on number of executed basic blocks/edges.  On top of
that, kernel requires per-thread coverage because there are always
background threads and unrelated processes that also produce coverage.
With inlined gcov instrumentation per-thread coverage is not possible.

kcov exposes kernel PCs and control flow to user-space which is
insecure.  But debugfs should not be mapped as user accessible.

Based on a patch by Quentin Casasnovas.

[akpm@linux-foundation.org: make task_struct.kcov_mode have type `enum kcov_mode']
[akpm@linux-foundation.org: unbreak allmodconfig]
[akpm@linux-foundation.org: follow x86 Makefile layout standards]
Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: syzkaller <syzkaller@googlegroups.com>
Cc: Vegard Nossum <vegard.nossum@oracle.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Tavis Ormandy <taviso@google.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Quentin Casasnovas <quentin.casasnovas@oracle.com>
Cc: Kostya Serebryany <kcc@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Kees Cook <keescook@google.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: David Drysdale <drysdale@google.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Kirill A. Shutemov <kirill@shutemov.name>
Cc: Jiri Slaby <jslaby@suse.cz>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/Kconfig.debug | 21 +++++++++++++++++++++
 lib/Makefile      | 12 ++++++++++++
 2 files changed, 33 insertions(+)

(limited to 'lib')

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 5a60f45cd9bb..532d4d52d1df 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -696,6 +696,27 @@ source "lib/Kconfig.kasan"
 
 endmenu # "Memory Debugging"
 
+config ARCH_HAS_KCOV
+	bool
+	help
+	  KCOV does not have any arch-specific code, but currently it is enabled
+	  only for x86_64. KCOV requires testing on other archs, and most likely
+	  disabling of instrumentation for some early boot code.
+
+config KCOV
+	bool "Code coverage for fuzzing"
+	depends on ARCH_HAS_KCOV
+	select DEBUG_FS
+	help
+	  KCOV exposes kernel code coverage information in a form suitable
+	  for coverage-guided fuzzing (randomized testing).
+
+	  If RANDOMIZE_BASE is enabled, PC values will not be stable across
+	  different machines and across reboots. If you need stable PC values,
+	  disable RANDOMIZE_BASE.
+
+	  For more details, see Documentation/kcov.txt.
+
 config DEBUG_SHIRQ
 	bool "Debug shared IRQ handlers"
 	depends on DEBUG_KERNEL
diff --git a/lib/Makefile b/lib/Makefile
index 4962d14c450f..a1de5b61ff40 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -7,6 +7,18 @@ ORIG_CFLAGS := $(KBUILD_CFLAGS)
 KBUILD_CFLAGS = $(subst $(CC_FLAGS_FTRACE),,$(ORIG_CFLAGS))
 endif
 
+# These files are disabled because they produce lots of non-interesting and/or
+# flaky coverage that is not a function of syscall inputs. For example,
+# rbtree can be global and individual rotations don't correlate with inputs.
+KCOV_INSTRUMENT_string.o := n
+KCOV_INSTRUMENT_rbtree.o := n
+KCOV_INSTRUMENT_list_debug.o := n
+KCOV_INSTRUMENT_debugobjects.o := n
+KCOV_INSTRUMENT_dynamic_debug.o := n
+# Kernel does not boot if we instrument this file as it uses custom calling
+# convention (see CONFIG_ARCH_HWEIGHT_CFLAGS).
+KCOV_INSTRUMENT_hweight.o := n
+
 lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 rbtree.o radix-tree.o dump_stack.o timerqueue.o\
 	 idr.o int_sqrt.o extable.o \
-- 
cgit 


From dde5cf39d4d2cce71f2997c37210dd624d0e4bf6 Mon Sep 17 00:00:00 2001
From: Andrey Ryabinin <aryabinin@virtuozzo.com>
Date: Tue, 22 Mar 2016 14:27:45 -0700
Subject: ubsan: fix tree-wide -Wmaybe-uninitialized false positives

-fsanitize=* options makes GCC less smart than usual and increase number
of 'maybe-uninitialized' false-positives. So this patch does two things:

 * Add -Wno-maybe-uninitialized to CFLAGS_UBSAN which will disable all
   such warnings for instrumented files.

 * Remove CONFIG_UBSAN_SANITIZE_ALL from all[yes|mod]config builds. So
   the all[yes|mod]config build goes without -fsanitize=* and still with
   -Wmaybe-uninitialized.

Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/Kconfig.ubsan | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'lib')

diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan
index e07c1ba9ba13..39494af9a84a 100644
--- a/lib/Kconfig.ubsan
+++ b/lib/Kconfig.ubsan
@@ -13,6 +13,11 @@ config UBSAN_SANITIZE_ALL
 	bool "Enable instrumentation for the entire kernel"
 	depends on UBSAN
 	depends on ARCH_HAS_UBSAN_SANITIZE_ALL
+
+	# We build with -Wno-maybe-uninitilzed, but we still want to
+	# use -Wmaybe-uninitilized in allmodconfig builds.
+	# So dependsy bellow used to disable this option in allmodconfig
+	depends on !COMPILE_TEST
 	default y
 	help
 	  This option activates instrumentation for the entire kernel.
-- 
cgit 


From 6c31da3464b4d28825d1827ee41a3a217b2dcf0e Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Sat, 19 Mar 2016 17:54:10 +0100
Subject: parisc,metag: Implement CONFIG_DEBUG_STACK_USAGE option

On parisc and metag the stack grows upwards, so for those we need to
scan the stack downwards in order to calculate how much stack a process
has used.

Tested on a 64bit parisc kernel.

Signed-off-by: Helge Deller <deller@gmx.de>
---
 lib/Kconfig.debug | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 532d4d52d1df..1e9a607534ca 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -558,7 +558,7 @@ config DEBUG_KMEMLEAK_DEFAULT_OFF
 
 config DEBUG_STACK_USAGE
 	bool "Stack utilization instrumentation"
-	depends on DEBUG_KERNEL && !IA64 && !PARISC && !METAG
+	depends on DEBUG_KERNEL && !IA64
 	help
 	  Enables the display of the minimum amount of free stack which each
 	  task has ever had available in the sysrq-T and sysrq-P debug output.
-- 
cgit 


From e6e8379c876de16c6b78f83b15d5ac32c79cb440 Mon Sep 17 00:00:00 2001
From: Alexander Potapenko <glider@google.com>
Date: Fri, 25 Mar 2016 14:21:56 -0700
Subject: kasan: modify kmalloc_large_oob_right(), add
 kmalloc_pagealloc_oob_right()

This patchset implements SLAB support for KASAN

Unlike SLUB, SLAB doesn't store allocation/deallocation stacks for heap
objects, therefore we reimplement this feature in mm/kasan/stackdepot.c.
The intention is to ultimately switch SLUB to use this implementation as
well, which will save a lot of memory (right now SLUB bloats each object
by 256 bytes to store the allocation/deallocation stacks).

Also neither SLUB nor SLAB delay the reuse of freed memory chunks, which
is necessary for better detection of use-after-free errors.  We
introduce memory quarantine (mm/kasan/quarantine.c), which allows
delayed reuse of deallocated memory.

This patch (of 7):

Rename kmalloc_large_oob_right() to kmalloc_pagealloc_oob_right(), as
the test only checks the page allocator functionality.  Also reimplement
kmalloc_large_oob_right() so that the test allocates a large enough
chunk of memory that still does not trigger the page allocator fallback.

Signed-off-by: Alexander Potapenko <glider@google.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Andrey Konovalov <adech.fo@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Konstantin Serebryany <kcc@google.com>
Cc: Dmitry Chernenkov <dmitryc@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/test_kasan.c | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/test_kasan.c b/lib/test_kasan.c
index c32f3b0048dc..90ad74f71535 100644
--- a/lib/test_kasan.c
+++ b/lib/test_kasan.c
@@ -65,11 +65,34 @@ static noinline void __init kmalloc_node_oob_right(void)
 	kfree(ptr);
 }
 
-static noinline void __init kmalloc_large_oob_right(void)
+#ifdef CONFIG_SLUB
+static noinline void __init kmalloc_pagealloc_oob_right(void)
 {
 	char *ptr;
 	size_t size = KMALLOC_MAX_CACHE_SIZE + 10;
 
+	/* Allocate a chunk that does not fit into a SLUB cache to trigger
+	 * the page allocator fallback.
+	 */
+	pr_info("kmalloc pagealloc allocation: out-of-bounds to right\n");
+	ptr = kmalloc(size, GFP_KERNEL);
+	if (!ptr) {
+		pr_err("Allocation failed\n");
+		return;
+	}
+
+	ptr[size] = 0;
+	kfree(ptr);
+}
+#endif
+
+static noinline void __init kmalloc_large_oob_right(void)
+{
+	char *ptr;
+	size_t size = KMALLOC_MAX_CACHE_SIZE - 256;
+	/* Allocate a chunk that is large enough, but still fits into a slab
+	 * and does not trigger the page allocator fallback in SLUB.
+	 */
 	pr_info("kmalloc large allocation: out-of-bounds to right\n");
 	ptr = kmalloc(size, GFP_KERNEL);
 	if (!ptr) {
@@ -324,6 +347,9 @@ static int __init kmalloc_tests_init(void)
 	kmalloc_oob_right();
 	kmalloc_oob_left();
 	kmalloc_node_oob_right();
+#ifdef CONFIG_SLUB
+	kmalloc_pagealloc_oob_right();
+#endif
 	kmalloc_large_oob_right();
 	kmalloc_oob_krealloc_more();
 	kmalloc_oob_krealloc_less();
-- 
cgit 


From 7ed2f9e663854db313f177a511145630e398b402 Mon Sep 17 00:00:00 2001
From: Alexander Potapenko <glider@google.com>
Date: Fri, 25 Mar 2016 14:21:59 -0700
Subject: mm, kasan: SLAB support

Add KASAN hooks to SLAB allocator.

This patch is based on the "mm: kasan: unified support for SLUB and SLAB
allocators" patch originally prepared by Dmitry Chernenkov.

Signed-off-by: Alexander Potapenko <glider@google.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Andrey Konovalov <adech.fo@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Konstantin Serebryany <kcc@google.com>
Cc: Dmitry Chernenkov <dmitryc@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/Kconfig.kasan | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan
index 0fee5acd5aa0..0e4d2b3b0aee 100644
--- a/lib/Kconfig.kasan
+++ b/lib/Kconfig.kasan
@@ -5,7 +5,7 @@ if HAVE_ARCH_KASAN
 
 config KASAN
 	bool "KASan: runtime memory debugger"
-	depends on SLUB_DEBUG
+	depends on SLUB_DEBUG || (SLAB && !DEBUG_SLAB)
 	select CONSTRUCTORS
 	help
 	  Enables kernel address sanitizer - runtime memory debugger,
@@ -16,6 +16,8 @@ config KASAN
 	  This feature consumes about 1/8 of available memory and brings about
 	  ~x3 performance slowdown.
 	  For better error detection enable CONFIG_STACKTRACE.
+	  Currently CONFIG_KASAN doesn't work with CONFIG_DEBUG_SLAB
+	  (the resulting kernel does not boot).
 
 choice
 	prompt "Instrumentation type"
-- 
cgit 


From cd11016e5f5212c13c0cec7384a525edc93b4921 Mon Sep 17 00:00:00 2001
From: Alexander Potapenko <glider@google.com>
Date: Fri, 25 Mar 2016 14:22:08 -0700
Subject: mm, kasan: stackdepot implementation. Enable stackdepot for SLAB

Implement the stack depot and provide CONFIG_STACKDEPOT.  Stack depot
will allow KASAN store allocation/deallocation stack traces for memory
chunks.  The stack traces are stored in a hash table and referenced by
handles which reside in the kasan_alloc_meta and kasan_free_meta
structures in the allocated memory chunks.

IRQ stack traces are cut below the IRQ entry point to avoid unnecessary
duplication.

Right now stackdepot support is only enabled in SLAB allocator.  Once
KASAN features in SLAB are on par with those in SLUB we can switch SLUB
to stackdepot as well, thus removing the dependency on SLUB stack
bookkeeping, which wastes a lot of memory.

This patch is based on the "mm: kasan: stack depots" patch originally
prepared by Dmitry Chernenkov.

Joonsoo has said that he plans to reuse the stackdepot code for the
mm/page_owner.c debugging facility.

[akpm@linux-foundation.org: s/depot_stack_handle/depot_stack_handle_t]
[aryabinin@virtuozzo.com: comment style fixes]
Signed-off-by: Alexander Potapenko <glider@google.com>
Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Andrey Konovalov <adech.fo@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Konstantin Serebryany <kcc@google.com>
Cc: Dmitry Chernenkov <dmitryc@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/Kconfig       |   4 +
 lib/Kconfig.kasan |   1 +
 lib/Makefile      |   3 +
 lib/stackdepot.c  | 284 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 292 insertions(+)
 create mode 100644 lib/stackdepot.c

(limited to 'lib')

diff --git a/lib/Kconfig b/lib/Kconfig
index 133ebc0c1773..3cca1222578e 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -536,4 +536,8 @@ config ARCH_HAS_PMEM_API
 config ARCH_HAS_MMIO_FLUSH
 	bool
 
+config STACKDEPOT
+	bool
+	select STACKTRACE
+
 endmenu
diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan
index 0e4d2b3b0aee..67d8c6838ba9 100644
--- a/lib/Kconfig.kasan
+++ b/lib/Kconfig.kasan
@@ -7,6 +7,7 @@ config KASAN
 	bool "KASan: runtime memory debugger"
 	depends on SLUB_DEBUG || (SLAB && !DEBUG_SLAB)
 	select CONSTRUCTORS
+	select STACKDEPOT if SLAB
 	help
 	  Enables kernel address sanitizer - runtime memory debugger,
 	  designed to find out-of-bounds accesses and use-after-free bugs.
diff --git a/lib/Makefile b/lib/Makefile
index a1de5b61ff40..7bd6fd436c97 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -181,6 +181,9 @@ obj-$(CONFIG_SG_SPLIT) += sg_split.o
 obj-$(CONFIG_STMP_DEVICE) += stmp_device.o
 obj-$(CONFIG_IRQ_POLL) += irq_poll.o
 
+obj-$(CONFIG_STACKDEPOT) += stackdepot.o
+KASAN_SANITIZE_stackdepot.o := n
+
 libfdt_files = fdt.o fdt_ro.o fdt_wip.o fdt_rw.o fdt_sw.o fdt_strerror.o \
 	       fdt_empty_tree.o
 $(foreach file, $(libfdt_files), \
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
new file mode 100644
index 000000000000..654c9d87e83a
--- /dev/null
+++ b/lib/stackdepot.c
@@ -0,0 +1,284 @@
+/*
+ * Generic stack depot for storing stack traces.
+ *
+ * Some debugging tools need to save stack traces of certain events which can
+ * be later presented to the user. For example, KASAN needs to safe alloc and
+ * free stacks for each object, but storing two stack traces per object
+ * requires too much memory (e.g. SLUB_DEBUG needs 256 bytes per object for
+ * that).
+ *
+ * Instead, stack depot maintains a hashtable of unique stacktraces. Since alloc
+ * and free stacks repeat a lot, we save about 100x space.
+ * Stacks are never removed from depot, so we store them contiguously one after
+ * another in a contiguos memory allocation.
+ *
+ * Author: Alexander Potapenko <glider@google.com>
+ * Copyright (C) 2016 Google, Inc.
+ *
+ * Based on code by Dmitry Chernenkov.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ */
+
+#include <linux/gfp.h>
+#include <linux/jhash.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/percpu.h>
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/stacktrace.h>
+#include <linux/stackdepot.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#define DEPOT_STACK_BITS (sizeof(depot_stack_handle_t) * 8)
+
+#define STACK_ALLOC_ORDER 2 /* 'Slab' size order for stack depot, 4 pages */
+#define STACK_ALLOC_SIZE (1LL << (PAGE_SHIFT + STACK_ALLOC_ORDER))
+#define STACK_ALLOC_ALIGN 4
+#define STACK_ALLOC_OFFSET_BITS (STACK_ALLOC_ORDER + PAGE_SHIFT - \
+					STACK_ALLOC_ALIGN)
+#define STACK_ALLOC_INDEX_BITS (DEPOT_STACK_BITS - STACK_ALLOC_OFFSET_BITS)
+#define STACK_ALLOC_SLABS_CAP 1024
+#define STACK_ALLOC_MAX_SLABS \
+	(((1LL << (STACK_ALLOC_INDEX_BITS)) < STACK_ALLOC_SLABS_CAP) ? \
+	 (1LL << (STACK_ALLOC_INDEX_BITS)) : STACK_ALLOC_SLABS_CAP)
+
+/* The compact structure to store the reference to stacks. */
+union handle_parts {
+	depot_stack_handle_t handle;
+	struct {
+		u32 slabindex : STACK_ALLOC_INDEX_BITS;
+		u32 offset : STACK_ALLOC_OFFSET_BITS;
+	};
+};
+
+struct stack_record {
+	struct stack_record *next;	/* Link in the hashtable */
+	u32 hash;			/* Hash in the hastable */
+	u32 size;			/* Number of frames in the stack */
+	union handle_parts handle;
+	unsigned long entries[1];	/* Variable-sized array of entries. */
+};
+
+static void *stack_slabs[STACK_ALLOC_MAX_SLABS];
+
+static int depot_index;
+static int next_slab_inited;
+static size_t depot_offset;
+static DEFINE_SPINLOCK(depot_lock);
+
+static bool init_stack_slab(void **prealloc)
+{
+	if (!*prealloc)
+		return false;
+	/*
+	 * This smp_load_acquire() pairs with smp_store_release() to
+	 * |next_slab_inited| below and in depot_alloc_stack().
+	 */
+	if (smp_load_acquire(&next_slab_inited))
+		return true;
+	if (stack_slabs[depot_index] == NULL) {
+		stack_slabs[depot_index] = *prealloc;
+	} else {
+		stack_slabs[depot_index + 1] = *prealloc;
+		/*
+		 * This smp_store_release pairs with smp_load_acquire() from
+		 * |next_slab_inited| above and in depot_save_stack().
+		 */
+		smp_store_release(&next_slab_inited, 1);
+	}
+	*prealloc = NULL;
+	return true;
+}
+
+/* Allocation of a new stack in raw storage */
+static struct stack_record *depot_alloc_stack(unsigned long *entries, int size,
+		u32 hash, void **prealloc, gfp_t alloc_flags)
+{
+	int required_size = offsetof(struct stack_record, entries) +
+		sizeof(unsigned long) * size;
+	struct stack_record *stack;
+
+	required_size = ALIGN(required_size, 1 << STACK_ALLOC_ALIGN);
+
+	if (unlikely(depot_offset + required_size > STACK_ALLOC_SIZE)) {
+		if (unlikely(depot_index + 1 >= STACK_ALLOC_MAX_SLABS)) {
+			WARN_ONCE(1, "Stack depot reached limit capacity");
+			return NULL;
+		}
+		depot_index++;
+		depot_offset = 0;
+		/*
+		 * smp_store_release() here pairs with smp_load_acquire() from
+		 * |next_slab_inited| in depot_save_stack() and
+		 * init_stack_slab().
+		 */
+		if (depot_index + 1 < STACK_ALLOC_MAX_SLABS)
+			smp_store_release(&next_slab_inited, 0);
+	}
+	init_stack_slab(prealloc);
+	if (stack_slabs[depot_index] == NULL)
+		return NULL;
+
+	stack = stack_slabs[depot_index] + depot_offset;
+
+	stack->hash = hash;
+	stack->size = size;
+	stack->handle.slabindex = depot_index;
+	stack->handle.offset = depot_offset >> STACK_ALLOC_ALIGN;
+	memcpy(stack->entries, entries, size * sizeof(unsigned long));
+	depot_offset += required_size;
+
+	return stack;
+}
+
+#define STACK_HASH_ORDER 20
+#define STACK_HASH_SIZE (1L << STACK_HASH_ORDER)
+#define STACK_HASH_MASK (STACK_HASH_SIZE - 1)
+#define STACK_HASH_SEED 0x9747b28c
+
+static struct stack_record *stack_table[STACK_HASH_SIZE] = {
+	[0 ...	STACK_HASH_SIZE - 1] = NULL
+};
+
+/* Calculate hash for a stack */
+static inline u32 hash_stack(unsigned long *entries, unsigned int size)
+{
+	return jhash2((u32 *)entries,
+			       size * sizeof(unsigned long) / sizeof(u32),
+			       STACK_HASH_SEED);
+}
+
+/* Find a stack that is equal to the one stored in entries in the hash */
+static inline struct stack_record *find_stack(struct stack_record *bucket,
+					     unsigned long *entries, int size,
+					     u32 hash)
+{
+	struct stack_record *found;
+
+	for (found = bucket; found; found = found->next) {
+		if (found->hash == hash &&
+		    found->size == size &&
+		    !memcmp(entries, found->entries,
+			    size * sizeof(unsigned long))) {
+			return found;
+		}
+	}
+	return NULL;
+}
+
+void depot_fetch_stack(depot_stack_handle_t handle, struct stack_trace *trace)
+{
+	union handle_parts parts = { .handle = handle };
+	void *slab = stack_slabs[parts.slabindex];
+	size_t offset = parts.offset << STACK_ALLOC_ALIGN;
+	struct stack_record *stack = slab + offset;
+
+	trace->nr_entries = trace->max_entries = stack->size;
+	trace->entries = stack->entries;
+	trace->skip = 0;
+}
+
+/**
+ * depot_save_stack - save stack in a stack depot.
+ * @trace - the stacktrace to save.
+ * @alloc_flags - flags for allocating additional memory if required.
+ *
+ * Returns the handle of the stack struct stored in depot.
+ */
+depot_stack_handle_t depot_save_stack(struct stack_trace *trace,
+				    gfp_t alloc_flags)
+{
+	u32 hash;
+	depot_stack_handle_t retval = 0;
+	struct stack_record *found = NULL, **bucket;
+	unsigned long flags;
+	struct page *page = NULL;
+	void *prealloc = NULL;
+
+	if (unlikely(trace->nr_entries == 0))
+		goto fast_exit;
+
+	hash = hash_stack(trace->entries, trace->nr_entries);
+	/* Bad luck, we won't store this stack. */
+	if (hash == 0)
+		goto exit;
+
+	bucket = &stack_table[hash & STACK_HASH_MASK];
+
+	/*
+	 * Fast path: look the stack trace up without locking.
+	 * The smp_load_acquire() here pairs with smp_store_release() to
+	 * |bucket| below.
+	 */
+	found = find_stack(smp_load_acquire(bucket), trace->entries,
+			   trace->nr_entries, hash);
+	if (found)
+		goto exit;
+
+	/*
+	 * Check if the current or the next stack slab need to be initialized.
+	 * If so, allocate the memory - we won't be able to do that under the
+	 * lock.
+	 *
+	 * The smp_load_acquire() here pairs with smp_store_release() to
+	 * |next_slab_inited| in depot_alloc_stack() and init_stack_slab().
+	 */
+	if (unlikely(!smp_load_acquire(&next_slab_inited))) {
+		/*
+		 * Zero out zone modifiers, as we don't have specific zone
+		 * requirements. Keep the flags related to allocation in atomic
+		 * contexts and I/O.
+		 */
+		alloc_flags &= ~GFP_ZONEMASK;
+		alloc_flags &= (GFP_ATOMIC | GFP_KERNEL);
+		page = alloc_pages(alloc_flags, STACK_ALLOC_ORDER);
+		if (page)
+			prealloc = page_address(page);
+	}
+
+	spin_lock_irqsave(&depot_lock, flags);
+
+	found = find_stack(*bucket, trace->entries, trace->nr_entries, hash);
+	if (!found) {
+		struct stack_record *new =
+			depot_alloc_stack(trace->entries, trace->nr_entries,
+					  hash, &prealloc, alloc_flags);
+		if (new) {
+			new->next = *bucket;
+			/*
+			 * This smp_store_release() pairs with
+			 * smp_load_acquire() from |bucket| above.
+			 */
+			smp_store_release(bucket, new);
+			found = new;
+		}
+	} else if (prealloc) {
+		/*
+		 * We didn't need to store this stack trace, but let's keep
+		 * the preallocated memory for the future.
+		 */
+		WARN_ON(!init_stack_slab(&prealloc));
+	}
+
+	spin_unlock_irqrestore(&depot_lock, flags);
+exit:
+	if (prealloc) {
+		/* Nobody used this memory, ok to free it. */
+		free_pages((unsigned long)prealloc, STACK_ALLOC_ORDER);
+	}
+	if (found)
+		retval = found->handle.handle;
+fast_exit:
+	return retval;
+}
-- 
cgit 


From 9dcadd381b1d199074937019d612346c061de415 Mon Sep 17 00:00:00 2001
From: Alexander Potapenko <glider@google.com>
Date: Fri, 25 Mar 2016 14:22:11 -0700
Subject: kasan: test fix: warn if the UAF could not be detected in
 kmalloc_uaf2

Signed-off-by: Alexander Potapenko <glider@google.com>
Acked-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Andrey Konovalov <adech.fo@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Konstantin Serebryany <kcc@google.com>
Cc: Dmitry Chernenkov <dmitryc@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/test_kasan.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'lib')

diff --git a/lib/test_kasan.c b/lib/test_kasan.c
index 90ad74f71535..82169fbf2453 100644
--- a/lib/test_kasan.c
+++ b/lib/test_kasan.c
@@ -294,6 +294,8 @@ static noinline void __init kmalloc_uaf2(void)
 	}
 
 	ptr1[40] = 'x';
+	if (ptr1 == ptr2)
+		pr_err("Could not detect use-after-free: ptr1 == ptr2\n");
 	kfree(ptr2);
 }
 
-- 
cgit